Handling mixed inputs to keras model

Hello - New to TF/keras and trying to understand why this model is compiling but not training appropriately.

[IN]

def prepare_data_for_keras(df):
    """Prepares a DataFrame for Keras NN modeling

    Args:
        df: Pandas DataFrame containing the data

    Returns:
        A tuple containing:
            - X_train: Dictionary of numpy arrays for training input features
            - y_train: Numpy array of training labels
            - X_test: Dictionary of numpy arrays for testing input features
            - y_test: Numpy array of testing labels
            - tokenizer: Fitted Tokenizer object for Description
            - label_encoder: Fitted LabelEncoder for PartClassificationResult
            - manufacturer_encoder: Fitted LabelEncoder for Manufacturer
            - ordinal_encoder: Fitted OrdinalEncoder for Pins
    """

    # Manufacturer (String Encode)
    manufacturer_encoder = LabelEncoder()
    df['Manufacturer'] = manufacturer_encoder.fit_transform(df['Manufacturer'])

    # MountType (Category - One-Hot Encode)
    mount_type_dummies = pd.get_dummies(df['MountType'], prefix='MountType')
    df = pd.concat([df, mount_type_dummies], axis=1)
    df.drop('MountType', axis=1, inplace=True)  # Remove original column

    # Description (String - Text Vectorization with Tokenizer)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(df['Description'])
    sequences = tokenizer.texts_to_sequences(df['Description'])
    max_len = max([len(s) for s in sequences])  # Get max sequence length for padding
    padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')

    # Pins (Category - Ordinal Encoding)
    pins_values = df['Pins'].astype(str).fillna('UNKNOWN').values.reshape(-1, 1)  # Convert to string and handle NaNs
    ordinal_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
    df['Pins'] = ordinal_encoder.fit_transform(pins_values)

    # PartClassificationResult (Label - Label Encoding)
    label_encoder = LabelEncoder()
    df['PartClassificationResult'] = label_encoder.fit_transform(df['PartClassificationResult'])

    # DescEmbeddings
    desc_embeddings = np.stack(df['DescEmbeddings'].values)

    # Split data
    indices = np.arange(len(df))
    train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)

    # Create X_train, X_test
    X_train = {
        'manufacturer': df['Manufacturer'].values[train_indices].reshape(-1, 1),
        'mount_type': df['MountType_SMD'].values[train_indices].reshape(-1, 1),
        'description': padded_sequences[train_indices],
        'pins': df['Pins'].values[train_indices].reshape(-1, 1),
        'desc_embeddings': desc_embeddings[train_indices]
    }
    X_test = {
        'manufacturer': df['Manufacturer'].values[test_indices].reshape(-1, 1),
        'mount_type': df['MountType_SMD'].values[test_indices].reshape(-1, 1),
        'description': padded_sequences[test_indices],
        'pins': df['Pins'].values[test_indices].reshape(-1, 1),
        'desc_embeddings': desc_embeddings[test_indices]
    }

    # Create y_train, y_test
    y_train = df['PartClassificationResult'].values[train_indices]
    y_test = df['PartClassificationResult'].values[test_indices]

    return X_train, y_train, X_test, y_test, tokenizer, label_encoder, manufacturer_encoder, ordinal_encoder


def create_keras_model(X_train, y_train, tokenizer, embedding_dim=256): #pass tokenizer
    """
    Creates a Keras model by analyzing the training data to determine input parameters.

    Args:
        X_train: Dictionary of training input features.
        y_train: Numpy array of training labels.
        embedding_dim: Dimensionality of the embedding layers (default: 100).

    Returns:
        A compiled Keras model.
    """

    # Determine input parameters from training data
    num_manufacturers = len(np.unique(X_train['manufacturer']))
    num_pins = len(np.unique(X_train['pins']))
    num_classes = len(np.unique(y_train))
    vocab_size = len(tokenizer.word_index) + 1  # For the description tokenizer
    max_len = X_train['description'].shape[1] #Get sequence length


    # Input layers
    manufacturer_input = Input(shape=(1,), name='manufacturer')
    mount_type_input = Input(shape=(X_train['mount_type'].shape[1],), name='mount_type')
    description_input = Input(shape=(max_len,), name='description')
    pins_input = Input(shape=(1,), name='pins')
    embeddings_input = Input(shape=(embedding_dim,), name='desc_embeddings')


    # Separate branches for different input types
    manufacturer_branch = Embedding(input_dim=num_manufacturers, output_dim=64)(manufacturer_input)
    manufacturer_branch = Flatten()(manufacturer_branch)
    manufacturer_branch = Dense(32, activation='relu')(manufacturer_branch)

    mount_type_branch = Dense(32, activation='relu')(mount_type_input)

    description_branch = Embedding(input_dim=vocab_size, output_dim=64)(description_input)
    description_branch = Flatten()(description_branch)
    description_branch = Dense(32, activation='relu')(description_branch)

    pins_branch = Embedding(input_dim=num_pins, output_dim=64)(pins_input)
    pins_branch = Flatten()(pins_branch)
    pins_branch = Dense(32, activation='relu')(pins_branch)

    #embeddings_branch = Flatten()(embeddings_input)
    embeddings_branch = Dense(64, activation='relu')(embeddings_input)
    embeddings_branch = Dense(32, activation='relu')(embeddings_branch)


    # Concatenate branches
    combined = concatenate([manufacturer_branch, mount_type_branch, description_branch, pins_branch, embeddings_branch])

    # Dense layers
    dense1 = Dense(64, activation='relu', name='first_dense')(combined)
    output = Dense(num_classes, activation='softmax', name='output')(dense1)

    # Create model
    model = Model(inputs=[manufacturer_input, mount_type_input, description_input, pins_input, embeddings_input], outputs=output)

    # Compile model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) #sparse_categorical since labels are integers

    return model

# Prepare data and create the model
X_train, y_train, X_test, y_test, tokenizer, label_encoder, manufacturer_encoder, ordinal_encoder = prepare_data_for_keras(df.copy())

tf.keras.backend.clear_session()
model = create_keras_model(X_train, y_train, tokenizer) #Pass tokenizer
model.summary(expand_nested=True)

[OUT]

Model: "functional"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)              ┃ Output Shape           ┃        Param # ┃ Connected to           ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩
│ manufacturer (InputLayer) │ (None, 1)              │              0 │ -                      │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ description (InputLayer)  │ (None, 10)             │              0 │ -                      │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ pins (InputLayer)         │ (None, 1)              │              0 │ -                      │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ embedding (Embedding)     │ (None, 1, 64)          │          1,088 │ manufacturer[0][0]     │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ embedding_1 (Embedding)   │ (None, 10, 64)         │         11,136 │ description[0][0]      │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ embedding_2 (Embedding)   │ (None, 1, 64)          │            960 │ pins[0][0]             │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ desc_embeddings           │ (None, 256)            │              0 │ -                      │
│ (InputLayer)              │                        │                │                        │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ flatten (Flatten)         │ (None, 64)             │              0 │ embedding[0][0]        │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ mount_type (InputLayer)   │ (None, 1)              │              0 │ -                      │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ flatten_1 (Flatten)       │ (None, 640)            │              0 │ embedding_1[0][0]      │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ flatten_2 (Flatten)       │ (None, 64)             │              0 │ embedding_2[0][0]      │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ dense_4 (Dense)           │ (None, 64)             │         16,448 │ desc_embeddings[0][0]  │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ dense (Dense)             │ (None, 32)             │          2,080 │ flatten[0][0]          │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ dense_1 (Dense)           │ (None, 32)             │             64 │ mount_type[0][0]       │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ dense_2 (Dense)           │ (None, 32)             │         20,512 │ flatten_1[0][0]        │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ dense_3 (Dense)           │ (None, 32)             │          2,080 │ flatten_2[0][0]        │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ dense_5 (Dense)           │ (None, 32)             │          2,080 │ dense_4[0][0]          │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ concatenate (Concatenate) │ (None, 160)            │              0 │ dense[0][0],           │
│                           │                        │                │ dense_1[0][0],         │
│                           │                        │                │ dense_2[0][0],         │
│                           │                        │                │ dense_3[0][0],         │
│                           │                        │                │ dense_5[0][0]          │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ first_dense (Dense)       │ (None, 64)             │         10,304 │ concatenate[0][0]      │
├───────────────────────────┼────────────────────────┼────────────────┼────────────────────────┤
│ output (Dense)            │ (None, 12)             │            780 │ first_dense[0][0]      │
└───────────────────────────┴────────────────────────┴────────────────┴────────────────────────┘
 Total params: 67,532 (263.80 KB)
 Trainable params: 67,532 (263.80 KB)
 Non-trainable params: 0 (0.00 B)

[IN]

trained_model = model.fit(
    X_train,
    y_train,
    epochs=10,
    batch_size=2,
    validation_data=(X_test, y_test)
)

[OUT]

Epoch 1/10
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-142-a0316ed7ba4d> in <cell line: 1>()
----> 1 trained_model = model.fit(
      2     X_train,
      3     y_train,
      4     epochs=10,
      5     batch_size=2,

1 frames
/usr/local/lib/python3.10/dist-packages/keras/src/layers/input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
    225                     None,
    226                 }:
--> 227                     raise ValueError(
    228                         f'Input {input_index} of layer "{layer_name}" is '
    229                         f"incompatible with the layer: expected axis {axis} "

ValueError: Exception encountered when calling Functional.call().

Input 0 of layer "dense_4" is incompatible with the layer: expected axis -1 of input shape to have value 256, but received input with shape (2, 1)

Arguments received by Functional.call():
  • inputs={'manufacturer': 'tf.Tensor(shape=(2, 1), dtype=int64)', 'mount_type': 'tf.Tensor(shape=(2, 1), dtype=bool)', 'description': 'tf.Tensor(shape=(2, 10), dtype=int32)', 'pins': 'tf.Tensor(shape=(2, 1), dtype=float32)', 'desc_embeddings': 'tf.Tensor(shape=(2, 256), dtype=float32)'}
  • training=True
  • mask={'manufacturer': 'None', 'mount_type': 'None', 'description': 'None', 'pins': 'None', 'desc_embeddings': 'None'}

Hi @Josh_Mcdonald, thanks for posting this issue,

From the error it seems like the desc_embeddings has the shape (num_samples ,256) but it’s receiving incorrect shaped input.

Could you try reshaping it

In X_train
desc_embeddings': desc_embeddings[train_indices].reshape(-1, 256)
In X_test
desc_embeddings': desc_embeddings[test_indices].reshape(-1, 256)

Let me know if that works for you, if it doesn’t please provide some sample dataset to reproduce.

Thanks!