I tried to train a convolutional neural network to predict the labels (categorical data) given the criteria (text). This should have been a simple classification problem. There are 7 labels, hence my network has 7 output neurons with sigmoid
activation functions.
I encoded training data using the following simple format, in a txt file, using text descriptors ('criteria'
) and categorical label variables ('label'
):
'criteria'|'label'
Here’s a peak at one entry from data file:
Headache location: Bilateral (intracranial). Facial pain: Nil. Pain quality: Pulsating. Thunderclap onset: Nil. Pain duration: 11. Pain episodes per month: 26. Chronic pain: No. Remission between episodes: Yes. Remission duration: 25. Pain intensity: Moderate (4-7). Aggravating/triggering factors: Innocuous facial stimuli, Bathing and/or showering, Chocolate, Exertion, Cold stimulus, Emotion, Valsalva maneuvers. Relieving factors: Nil. Headaches worse in the mornings and/or night: Nil. Associated symptoms: Nausea and/or vomiting. Reversible symptoms: Nil. Examination findings: Nil. Aura present: Yes. Reversible aura: Motor, Sensory, Brainstem, Visual. Duration of auras: 47. Aura in relation to headache: Aura proceeds headache. History of CNS disorders: Multiple Sclerosis, Angle-closure glaucoma. Past history: Nil. Temporal association: No. Disease worsening headache: Nil. Improved cause: Nil. Pain ipsilateral: Nil. Medication overuse: Nil. Establish drug overuse: Nil. Investigations: Nil.|Migraine with aura
Here’s a snippet of the code from the training algorithm:
'''A. IMPORT DATA'''
dataset = pd.read_csv('Data/ICHD3_Database.txt', names=['criteria', 'label'], sep='|')
features = dataset['criteria'].values
labels = dataset['label'].values
'''B. DATA PRE-PROCESSING: BAG OF WORDS (BOW) MODEL'''
def BOW_Model(features):
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.33, random_state=42)
vectorizer = CountVectorizer()
features_train = vectorizer.fit_transform(features_train)
features_test = vectorizer.transform(features_test)
return features_train, features_test, labels_train, labels_test
'''B. DATA PRE-PROCESSING: WORD EMBEDDINGS'''
def word_embeddings(features):
maxlen = 200
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.33, random_state=42)
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(features_train)
features_train = pad_sequences(tokenizer.texts_to_sequences(features_train), padding='post', maxlen=maxlen)
features_test = pad_sequences(tokenizer.texts_to_sequences(features_test), padding='post', maxlen=maxlen)
vocab_size = len(tokenizer.word_index) + 1 # Adding 1 because of reserved 0 index
tokenizer.fit_on_texts(labels_train)
labels_train = pad_sequences(tokenizer.texts_to_sequences(labels_train), padding='post', maxlen=maxlen)
labels_test = pad_sequences(tokenizer.texts_to_sequences(labels_test), padding='post', maxlen=maxlen)
vocab_size += len(tokenizer.word_index) + 1 # Adding 1 because of reserved 0 index
return features_train, features_test, labels_train, labels_test, vocab_size, maxlen
features_train, features_test, labels_train, labels_test, vocab_size, maxlen = word_embeddings(features) # Pre-process text using word embeddings
'''C. CREATE THE MODEL'''
def design_model(features, hidden_layers=2, number_neurons=128):
model = Sequential(name = "My_Sequential_Model")
model.add(layers.Embedding(input_dim=vocab_size, output_dim=50, input_length=maxlen))
model.add(layers.Conv1D(128, 5, activation='relu'))
model.add(layers.GlobalMaxPool1D())
for i in range(hidden_layers):
model.add(Dense(number_neurons, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(7, activation='sigmoid'))
opt = Adam(learning_rate=0.01)
model.compile(loss='binary_crossentropy', metrics=['mae'], optimizer=opt)
return model
'''E. TRAIN THE MODEL'''
model = design_model(features_train, hidden_layers=2, number_neurons=30)
history = model.fit(features_train, labels_train, epochs=10, batch_size=16, verbose=0, validation_split=0.33, callbacks=[EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)])
But when I run the model, I get the following error:
Traceback (most recent call last):
File "c:\Users\user\Desktop\Deep Learning\deep_learning_headache.py", line 112, in <module>
history = model.fit(features_train, labels_train, epochs=10, batch_size=16, verbose=0, validation_split=0.33, callbacks=[EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)]) # 18. Fit model using optimized epochs & batch size. When the training performance reaches the plateau or starts degrading, the learning stops.
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\user\AppData\Local\Temp\__autograph_generated_file6x9w264i.py", line 15, in tf__train_function
retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
ValueError: in user code:
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\engine\training.py", line 1401, in train_function *
return step_function(self, iterator)
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\engine\training.py", line 1384, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\engine\training.py", line 1373, in run_step **
outputs = model.train_step(data)
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\engine\training.py", line 1151, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\engine\training.py", line 1209, in compute_loss
return self.compiled_loss(
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\engine\compile_utils.py", line 277, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\losses.py", line 143, in __call__
losses = call_fn(y_true, y_pred)
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\losses.py", line 270, in call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\losses.py", line 2532, in binary_crossentropy
backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\backend.py", line 5822, in binary_crossentropy
return tf.nn.sigmoid_cross_entropy_with_logits(
ValueError: `logits` and `labels` must have the same shape, received ((None, 1) vs (None, 200)).
Where am I going wrong?