Hello everyone,
I built a model that gets its input from a csv file. Everything works fine when I use the first four features. But as soon as I add the 5th feature, problems arise. The problem with feature 5 is that each entry is a numpy array(shape=(4,288)) stored as a string in the csv file.
The csv file has the following structure:
Feature1,Feature2,Feature3,Feature4,Labels,Feature5
13.37,33.09,-0.08,992.2,nass,“[[1, 160, 246, 255], … ,[1, 160, 246, 255]]”
26.37,33.03,-0.08,992.2,trocken,“[[110, 160, 246, 255], … ,[20, 160, 246, 255]]”
I use pandas to read the csv file.
CODE:
def build_model(num_features: int, num_classes: int) -> Sequential:
model = Sequential()
model.add(Dense(units=100, input_shape=(num_features,)))
model.add(Activation("relu"))
model.add(Dense(units=100, input_shape=(num_features,)))
model.add(Activation("relu"))
model.add(Dense(units=num_classes))
model.add(Activation("softmax"))
model.summary()
return model
df = pd.read_csv(r'D:\pyenv\Daten\gps_time_Regenverlauf2.csv', skiprows=[1])
df = df.replace(['trocken','feucht','nass','Wasser steht','Schnee(decke)','Schnee(matsch)','duenne Schneeschicht'],[0,1,1,1,2,2,2])
df = df.dropna()
for i in range(0,999):
df.iat[i,999] = df.iat[i,999].strip("'")
print("Daten:", df.head())
labels = df.pop('Labels')
labels = np.asarray(labels).astype('float32')
data = np.array(df.loc[:1000, ['Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5']])
#data = np.array(df.loc[:1000, ['Feature1', 'Feature2', 'Feature3', 'Feature4']])
x_train, x_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.20, random_state=42)
num_features = 5
num_classes = 3
labels_train = to_categorical(labels_train, num_classes=num_classes, dtype=np.float32)
labels_test = to_categorical(labels_test, num_classes=num_classes, dtype=np.float32)
model = build_model(num_features, num_classes)
model.compile(loss= tf.keras.losses.CategoricalCrossentropy() , optimizer=tf.keras.optimizers.Adam(), metrics=["accuracy"])
model.fit(x_train, labels_train, epochs=50, verbose=1,validation_data=(x_test , labels_test))
model.summary()
model.evaluate(x_test_normalized, labels_test, verbose=2)
x_pred = model.predict(x_test_normalized)
I get the following error.
ERROR:
dense_2 (Dense) (None, 3) 303
activation2 (Activation) (None, 3) 0
=================================================================
Total params: 10,903
Trainable params: 10,903Non-trainable params: 0__
Traceback (most recent call last):
File "classification_DNN.py", line 165, in <module>
model.fit(x_train, labels_train, epochs=10, verbose=1,validation_data=(x_test , labels_test))
File "D:\pyenv.venv\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "D:\pyenv.venv\lib\site-packages\tensorflow\python\framework\constant_op.py", line 102, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).
How can I make the model work with feature 5 ?
I was thinking about to reshape feature 5 but then I still have a sequence. I read that for sequences I should use a RNN with lstm but then I don’t know how to combine that with the other input features.
All help will be much appreciated.
Cheers, Mo