Hello - New to TF/keras and trying to understand why this model is compiling but not training appropriately.
[IN]
def prepare_data_for_keras(df):
"""Prepares a DataFrame for Keras NN modeling
Args:
df: Pandas DataFrame containing the data
Returns:
A tuple containing:
- X_train: Dictionary of numpy arrays for training input features
- y_train: Numpy array of training labels
- X_test: Dictionary of numpy arrays for testing input features
- y_test: Numpy array of testing labels
- tokenizer: Fitted Tokenizer object for Description
- label_encoder: Fitted LabelEncoder for PartClassificationResult
- manufacturer_encoder: Fitted LabelEncoder for Manufacturer
- ordinal_encoder: Fitted OrdinalEncoder for Pins
"""
# Manufacturer (String Encode)
manufacturer_encoder = LabelEncoder()
df['Manufacturer'] = manufacturer_encoder.fit_transform(df['Manufacturer'])
# MountType (Category - One-Hot Encode)
mount_type_dummies = pd.get_dummies(df['MountType'], prefix='MountType')
df = pd.concat([df, mount_type_dummies], axis=1)
df.drop('MountType', axis=1, inplace=True) # Remove original column
# Description (String - Text Vectorization with Tokenizer)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Description'])
sequences = tokenizer.texts_to_sequences(df['Description'])
max_len = max([len(s) for s in sequences]) # Get max sequence length for padding
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')
# Pins (Category - Ordinal Encoding)
pins_values = df['Pins'].astype(str).fillna('UNKNOWN').values.reshape(-1, 1) # Convert to string and handle NaNs
ordinal_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
df['Pins'] = ordinal_encoder.fit_transform(pins_values)
# PartClassificationResult (Label - Label Encoding)
label_encoder = LabelEncoder()
df['PartClassificationResult'] = label_encoder.fit_transform(df['PartClassificationResult'])
# DescEmbeddings
desc_embeddings = np.stack(df['DescEmbeddings'].values)
# Split data
indices = np.arange(len(df))
train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)
# Create X_train, X_test
X_train = {
'manufacturer': df['Manufacturer'].values[train_indices].reshape(-1, 1),
'mount_type': df['MountType_SMD'].values[train_indices].reshape(-1, 1),
'description': padded_sequences[train_indices],
'pins': df['Pins'].values[train_indices].reshape(-1, 1),
'desc_embeddings': desc_embeddings[train_indices]
}
X_test = {
'manufacturer': df['Manufacturer'].values[test_indices].reshape(-1, 1),
'mount_type': df['MountType_SMD'].values[test_indices].reshape(-1, 1),
'description': padded_sequences[test_indices],
'pins': df['Pins'].values[test_indices].reshape(-1, 1),
'desc_embeddings': desc_embeddings[test_indices]
}
# Create y_train, y_test
y_train = df['PartClassificationResult'].values[train_indices]
y_test = df['PartClassificationResult'].values[test_indices]
return X_train, y_train, X_test, y_test, tokenizer, label_encoder, manufacturer_encoder, ordinal_encoder
def create_keras_model(X_train, y_train, tokenizer, embedding_dim=256): #pass tokenizer
"""
Creates a Keras model by analyzing the training data to determine input parameters.
Args:
X_train: Dictionary of training input features.
y_train: Numpy array of training labels.
embedding_dim: Dimensionality of the embedding layers (default: 100).
Returns:
A compiled Keras model.
"""
# Determine input parameters from training data
num_manufacturers = len(np.unique(X_train['manufacturer']))
num_pins = len(np.unique(X_train['pins']))
num_classes = len(np.unique(y_train))
vocab_size = len(tokenizer.word_index) + 1 # For the description tokenizer
max_len = X_train['description'].shape[1] #Get sequence length
# Input layers
manufacturer_input = Input(shape=(1,), name='manufacturer')
mount_type_input = Input(shape=(X_train['mount_type'].shape[1],), name='mount_type')
description_input = Input(shape=(max_len,), name='description')
pins_input = Input(shape=(1,), name='pins')
embeddings_input = Input(shape=(embedding_dim,), name='desc_embeddings')
# Separate branches for different input types
manufacturer_branch = Embedding(input_dim=num_manufacturers, output_dim=64)(manufacturer_input)
manufacturer_branch = Flatten()(manufacturer_branch)
manufacturer_branch = Dense(32, activation='relu')(manufacturer_branch)
mount_type_branch = Dense(32, activation='relu')(mount_type_input)
description_branch = Embedding(input_dim=vocab_size, output_dim=64)(description_input)
description_branch = Flatten()(description_branch)
description_branch = Dense(32, activation='relu')(description_branch)
pins_branch = Embedding(input_dim=num_pins, output_dim=64)(pins_input)
pins_branch = Flatten()(pins_branch)
pins_branch = Dense(32, activation='relu')(pins_branch)
#embeddings_branch = Flatten()(embeddings_input)
embeddings_branch = Dense(64, activation='relu')(embeddings_input)
embeddings_branch = Dense(32, activation='relu')(embeddings_branch)
# Concatenate branches
combined = concatenate([manufacturer_branch, mount_type_branch, description_branch, pins_branch, embeddings_branch])
# Dense layers
dense1 = Dense(64, activation='relu', name='first_dense')(combined)
output = Dense(num_classes, activation='softmax', name='output')(dense1)
# Create model
model = Model(inputs=[manufacturer_input, mount_type_input, description_input, pins_input, embeddings_input], outputs=output)
# Compile model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) #sparse_categorical since labels are integers
return model
# Prepare data and create the model
X_train, y_train, X_test, y_test, tokenizer, label_encoder, manufacturer_encoder, ordinal_encoder = prepare_data_for_keras(df.copy())
tf.keras.backend.clear_session()
model = create_keras_model(X_train, y_train, tokenizer) #Pass tokenizer
model.summary(expand_nested=True)
[OUT]
Model: "functional"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃ Connected to ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩
│ manufacturer (InputLayer) │ (None, 1) │ 0 │ - │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ description (InputLayer) │ (None, 10) │ 0 │ - │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ pins (InputLayer) │ (None, 1) │ 0 │ - │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ embedding (Embedding) │ (None, 1, 64) │ 1,088 │ manufacturer[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ embedding_1 (Embedding) │ (None, 10, 64) │ 11,136 │ description[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ embedding_2 (Embedding) │ (None, 1, 64) │ 960 │ pins[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ desc_embeddings │ (None, 256) │ 0 │ - │
│ (InputLayer) │ │ │ │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ flatten (Flatten) │ (None, 64) │ 0 │ embedding[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ mount_type (InputLayer) │ (None, 1) │ 0 │ - │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ flatten_1 (Flatten) │ (None, 640) │ 0 │ embedding_1[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ flatten_2 (Flatten) │ (None, 64) │ 0 │ embedding_2[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ dense_4 (Dense) │ (None, 64) │ 16,448 │ desc_embeddings[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ dense (Dense) │ (None, 32) │ 2,080 │ flatten[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ dense_1 (Dense) │ (None, 32) │ 64 │ mount_type[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ dense_2 (Dense) │ (None, 32) │ 20,512 │ flatten_1[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ dense_3 (Dense) │ (None, 32) │ 2,080 │ flatten_2[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ dense_5 (Dense) │ (None, 32) │ 2,080 │ dense_4[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ concatenate (Concatenate) │ (None, 160) │ 0 │ dense[0][0], │
│ │ │ │ dense_1[0][0], │
│ │ │ │ dense_2[0][0], │
│ │ │ │ dense_3[0][0], │
│ │ │ │ dense_5[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ first_dense (Dense) │ (None, 64) │ 10,304 │ concatenate[0][0] │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ output (Dense) │ (None, 12) │ 780 │ first_dense[0][0] │
└───────────────────────────┴────────────────────────┴────────────────┴────────────────────────┘
Total params: 67,532 (263.80 KB)
Trainable params: 67,532 (263.80 KB)
Non-trainable params: 0 (0.00 B)
[IN]
trained_model = model.fit(
X_train,
y_train,
epochs=10,
batch_size=2,
validation_data=(X_test, y_test)
)
[OUT]
Epoch 1/10
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-142-a0316ed7ba4d> in <cell line: 1>()
----> 1 trained_model = model.fit(
2 X_train,
3 y_train,
4 epochs=10,
5 batch_size=2,
1 frames
/usr/local/lib/python3.10/dist-packages/keras/src/layers/input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
225 None,
226 }:
--> 227 raise ValueError(
228 f'Input {input_index} of layer "{layer_name}" is '
229 f"incompatible with the layer: expected axis {axis} "
ValueError: Exception encountered when calling Functional.call().
Input 0 of layer "dense_4" is incompatible with the layer: expected axis -1 of input shape to have value 256, but received input with shape (2, 1)
Arguments received by Functional.call():
• inputs={'manufacturer': 'tf.Tensor(shape=(2, 1), dtype=int64)', 'mount_type': 'tf.Tensor(shape=(2, 1), dtype=bool)', 'description': 'tf.Tensor(shape=(2, 10), dtype=int32)', 'pins': 'tf.Tensor(shape=(2, 1), dtype=float32)', 'desc_embeddings': 'tf.Tensor(shape=(2, 256), dtype=float32)'}
• training=True
• mask={'manufacturer': 'None', 'mount_type': 'None', 'description': 'None', 'pins': 'None', 'desc_embeddings': 'None'}