I am having problems with loading my data to train. I think I have not converted the data types correctly. I am following a tutorial and using copilot and I might have got too confused.
here this the error message:
Epoch 1/10
---------------------------------------------------------------------------
UnimplementedError Traceback (most recent call last)
<ipython-input-30-5f6b1199e80a> in <cell line: 2>()
1 # Train the model
----> 2 model.fit(train_dataset, epochs=10, validation_data=test_dataset)
1 frames
/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
51 try:
52 ctx.ensure_initialized()
---> 53 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
54 inputs, attrs, num_outputs)
55 except core._NotOkStatusException as e:
UnimplementedError: Graph execution error:
Detected at node functional_1/Cast defined at (most recent call last):
File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>
File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>
File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper
File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute
File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell
File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
File "<ipython-input-30-5f6b1199e80a>", line 2, in <cell line: 2>
File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 318, in fit
File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator
File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 108, in one_step_on_data
File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 51, in train_step
File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py", line 882, in __call__
File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py", line 46, in __call__
File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler
File "/usr/local/lib/python3.10/dist-packages/keras/src/models/functional.py", line 167, in call
File "/usr/local/lib/python3.10/dist-packages/keras/src/models/functional.py", line 258, in _standardize_inputs
File "/usr/local/lib/python3.10/dist-packages/keras/src/models/functional.py", line 218, in _convert_inputs_to_tensors
File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/core.py", line 743, in convert_to_tensor
File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/core.py", line 120, in convert_to_tensor
Cast string to float is not supported
[[{{node functional_1/Cast}}]] [Op:__inference_one_step_on_iterator_2557]
and here is my code:
!pip install -U scikit-learn
!pip install isodate==0.6.1
!sudo apt install graphviz -y
!pip install tensorflow
import numpy as np
import pandas as pd
import tensorflow as tf
import isodate
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from keras import layers
from tensorflow.keras import layers
from tensorflow.keras.layers import CategoryEncoding, StringLookup, IntegerLookup, Normalization
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive')
hips = pd.read_csv('/content/drive/MyDrive/snakeHipsAnal/snakeHips.csv')
hips.isnull().sum()
hips.dropna()
hips['durationSecs'] = hips['duration'].apply(lambda x: isodate.parse_duration(x))
hips['duration_seconds'] = hips['durationSecs'].dt.total_seconds()
hips['publishedAt'] = pd.to_datetime(hips['publishedAt'])
hips['publishedAt_timestamp'] = hips['publishedAt'].apply(lambda x: x.timestamp())
hips = hips.replace('\$', '', regex=True)
hips['publishDayName'] = hips['pushblishDayName']
columns_to_keep = [
'title', 'publishedAt_timestamp', 'viewCount', 'likeCount',
'commentCount', 'tagCount', 'duration_seconds', 'publishDayName', 'definition', 'caption'
]
# Keep only the relevant columns
hips = hips[columns_to_keep]
df = hips
df.head()
# Define a composite score for popularity (e.g., a weighted sum of views, likes, and comments)
df['popularity_score'] = df['viewCount'] + df['likeCount'] * 10 + df['commentCount'] * 20
# Optionally, create a binary target column based on the composite score
# For example, videos with a popularity score above a certain threshold are considered popular
threshold = df['popularity_score'].median()
df['target'] = np.where(df['popularity_score'] > threshold, 1, 0)
# Drop unused columns
df = df.drop(columns=['title', 'publishedAt_timestamp', 'popularity_score', 'caption'], errors='ignore')
# Split the DataFrame into training and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
# Convert DataFrames to TensorFlow Datasets
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
dataframe = dataframe.copy()
labels = dataframe.pop('target')
ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(dataframe))
ds = ds.batch(batch_size)
return ds
train_dataset = df_to_dataset(train_df)
test_dataset = df_to_dataset(test_df, shuffle=False)
# Define the normalization layer function
def get_normalization_layer(name, dataset):
# Create a Normalization layer for the feature
normalizer = layers.Normalization(axis=None)
# Prepare a Dataset that only yields the feature
feature_ds = dataset.map(lambda x, y: tf.expand_dims(tf.cast(x[name], tf.float32), -1))
# Learn the statistics of the data
normalizer.adapt(feature_ds)
return normalizer
# Define the category encoding layer function
def get_category_encoding_layer(name, dataset, dtype, max_tokens=None):
if dtype == 'string':
index = layers.StringLookup(max_tokens=max_tokens, output_mode='int')
else:
index = layers.IntegerLookup(max_tokens=max_tokens, output_mode='int')
feature_ds = dataset.map(lambda x, y: x[name])
index.adapt(feature_ds)
encoder = layers.CategoryEncoding(num_tokens=index.vocabulary_size())
return lambda feature: encoder(index(feature))
# List of numeric features
numeric_features = ['viewCount', 'likeCount', 'commentCount', 'tagCount', 'duration_seconds']
all_inputs = []
encoded_features = []
# Process numeric features
for header in numeric_features:
numeric_col = tf.keras.Input(shape=(1,), name=header, dtype=tf.float32)
normalization_layer = get_normalization_layer(header, train_dataset)
encoded_numeric_col = normalization_layer(numeric_col)
all_inputs.append(numeric_col)
encoded_features.append(encoded_numeric_col)
# Process categorical feature 'publishDayName'
publish_day_col = tf.keras.Input(shape=(1,), name='publishDayName', dtype=tf.string)
publish_day_encoding_layer = get_category_encoding_layer('publishDayName', train_dataset, dtype='string')
encoded_publish_day_col = publish_day_encoding_layer(publish_day_col)
all_inputs.append(publish_day_col)
encoded_features.append(encoded_publish_day_col)
# Process other categorical features if they exist
# Example: 'definition'
definition_col = tf.keras.Input(shape=(1,), name='definition', dtype=tf.string)
definition_encoding_layer = get_category_encoding_layer('definition', train_dataset, dtype='string')
encoded_definition_col = definition_encoding_layer(definition_col)
all_inputs.append(definition_col)
encoded_features.append(encoded_definition_col)
# Combine all features
all_features = layers.concatenate(encoded_features)
# Define the model
x = layers.Dense(32, activation='relu')(all_features)
x = layers.Dense(16, activation='relu')(x)
output = layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs=all_inputs, outputs=output)
# Compile the model
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), metrics=['accuracy'])
# Prepare the datasets for training
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)
# Train the model
model.fit(train_dataset, epochs=10, validation_data=test_dataset)
thanks