I’m working on this project: Jargon Detection
Data was cleaned and padded sequences were formed.
- Language: Python (3.9.x)
- Framework: tensorflow (2.12.x) [tensorflow-deps 2.9.0 0 apple
tensorflow-estimator 2.12.0 pypi_0 pypi
tensorflow-hub 0.13.0 pypi_0 pypi
tensorflow-macos 2.12.0 pypi_0 pypi
tensorflow-metal 0.8.0 pypi_0 pypi]
Model code:
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional, Lambda, Layer, Concatenate
from tensorflow.keras.models import Sequential, Model
import tensorflow_hub as hub
elmo_model = hub.load("https://tfhub.dev/google/elmo/3").signatures["default"]
class ElmoEmbeddings(Layer):
def __init__(self, elmo_model, max_len, **kwargs):
super(ElmoEmbeddings, self).__init__(**kwargs)
self.elmo_model = elmo_model
self.max_len = max_len
def call(self, inputs):
embeddings_list = []
# Iterate over input sequences and compute embeddings
for sequence in inputs:
input_tensor = tf.constant(sequence)
embeddings = elmo_model(input_tensor)['elmo']
print(f"embeddings shape: {embeddings.shape}")
# Extract the tensor from the dictionary and append to the list
embeddings_list.append(embeddings)
# Stack the embeddings
stacked_embeddings = tf.stack(embeddings_list)
# Squeeze the extra dimension
stacked_embeddings = tf.squeeze(stacked_embeddings, axis=2)
return stacked_embeddings
def compute_output_shape(self, input_shape):
return (input_shape[0], self.max_len, 1024)
# Define inputs
input_layer = Input(shape=(max_len,), dtype=tf.string, name='input_layer')
elmo_embeddings = ElmoEmbeddings(elmo_model, max_len, name='elmo_embeddings')(input_layer)
lstm_output = Bidirectional(LSTM(units=512, return_sequences=True, recurrent_dropout=0.2, dropout=0.2, name='lstm'))(elmo_embeddings)
output = TimeDistributed(Dense(n_tags, activation='softmax', name='dense'))(lstm_output)
# Define model
tech_jargon_model = Model(inputs=input_layer, outputs=output)
tech_jargon_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Train model
history = tech_jargon_model.fit(
tf.constant(X_tr), y_tr,
validation_data=(tf.constant(X_val), y_val),
batch_size=batch_size,
epochs=3,
verbose=1
)
Input:
X_tr : 18916
X_tr[0] : 25
X_tr: <class 'list'>
y_tr : 18916
y_tr[0] : 25
y_tr: <class 'numpy.ndarray'>
Error:
TypeError: Exception encountered when calling layer "elmo_embeddings" (type ElmoEmbeddings).
in user code:
File "/var/folders/yq/qh4ny6ms3kz0yqqj4q2gqkn00000gn/T/ipykernel_4686/880792685.py", line 11, in call *
input_tensor = tf.constant(sequence)
TypeError: Expected any non-tensor type, but got a tensor instead.
Call arguments received by layer "elmo_embeddings" (type ElmoEmbeddings):
• inputs=tf.Tensor(shape=(None, 25), dtype=string)
I tried many different ways to pass the input to elmo_model but every time some error pops up.
I used the same above method in a sample as shown below and got the expected output but was unable to replicate the same when used in a Model context.
Sample and code:
tokens_input = [[“the”, “cat”, “is”, “on”, “the”, “mat”],
[“dogs”, “are”, “in”, “the”, “fog”, “”]]
tokens_input = tf.constant(tokens_input)
print(f"tokens_input: {type(tokens_input)}")
# Initialize an empty list to store embeddings
embeddings_list = []
# Iterate over input sequences and compute embeddings
for sequence in tokens_input:
input_tensor = tf.constant(sequence)
embeddings = elmo_model(input_tensor)['elmo']
print(f"embeddings shape: {embeddings.shape}")
# Extract the tensor from the dictionary and append to the list
embeddings_list.append(embeddings)
# Stack the embeddings
stacked_embeddings = tf.stack(embeddings_list)
# Squeeze the extra dimension
stacked_embeddings = tf.squeeze(stacked_embeddings, axis=2)
# Print the shape of the stacked embeddings
print(f"Stacked embeddings shape: {stacked_embeddings.shape}")
print(f"Stacked embeddings type: {type(stacked_embeddings)}")
**Output:**
tokens_input: <class 'tensorflow.python.framework.ops.EagerTensor'>
embeddings shape: (6, 1, 1024)
embeddings shape: (6, 1, 1024)
Stacked embeddings shape: (2, 6, 1024)
Stacked embeddings type: <class 'tensorflow.python.framework.ops.EagerTensor'>
Given the model definition and problem statement, how can this issue be resolved?