Hi @Guy_Berreby, what type of music data are you working on? For instance, if it’s in a MIDI format, then I can see why you’re using the LSTM architecture. I also noticed you’re using tfio.audio.AudioIOTensor
( tfio.audio.AudioIOTensor | TensorFlow I/O) - maybe your two datasets are waveform-based. Can you please share some info and how you’re loading the data (code)?
I’ve summarized your code and the task below with some formatting, based on the information in the StackOverflow post you shared. @Guy_Berreby do let me know if the spaces and other info are correct, I had to make some minor adjustments:
Your ML task
- Music genre classification, two different genres -
bm
and dm
Code
model = keras.Sequential()
# Add an Embedding layer expecting input vocab of size 1000, and
# output embedding dimension of size 64.
model.add(layers.Embedding(input_dim=maxLen, output_dim=2,mask_zero=True))
#model.add(layers.Masking())
# Add an LSTM layer with 128 internal units.
# model.add(layers.Input(shape=[1,None]) )
model.add(layers.LSTM(8,return_sequences=True))
model.add(layers.Dropout(0.2) )
model.add(layers.LSTM(8))
model.add(layers.Dropout(0.2) )
# Add a Dense layer with 10 units.
model.add(layers.Dense(16,activation="relu"))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(2,activation="softmax"))
model.compile(loss='categorical_crossentropy', optimizer='adam')
def modelTrainGen(maxLen):
# One type of music - training set
bmTrainDirectory = '/content/drive/.../...train/'
# Another type of music - training set
dmTrainDirectory = '/content/drive/.../...train/'
dmTrainFileNames = os.listdir(dmTrainDirectory)
bmTrainFileNames = os.listdir(bmTrainDirectory)
maxAudioLen = maxLen
bmTensor = tf.convert_to_tensor([[1],[0]])
dmTensor = tf.convert_to_tensor([[0],[1]])
allFileNames = []
for fileName in zip(bmTrainFileNames,dmTrainFileNames):
bmFileName = fileName[0]
dmFileName = fileName[1]
allFileNames.append((bmFileName,1))
allFileNames.append((dmFileName,0))
random.shuffle(allFileNames)
for fileNameVal in allFileNames:
fileName = fileNameVal[0]
val = fileNameVal[1]
if val == 1:
bmFileName = fileName
audio = tfio.audio.AudioIOTensor(bmTrainDirectory + bmFileName)
audio_slice = tf.reduce_max(tf.transpose(audio[0:]),0)
del audio
print(audio_slice.shape)
padded_x = tf.keras.preprocessing.sequence.pad_sequences( [audio_slice], padding="post",
dtype=float,maxlen=maxAudioLen )
del audio_slice
converted = tf.convert_to_tensor(padded_x[0])
del padded_x
print("A")
print(converted.shape)
yield ( converted,bmTensor)
print("B")
del converted
else:
dmFileName = fileName
audio = tfio.audio.AudioIOTensor(dmTrainDirectory + dmFileName)
audio_slice = tf.reduce_max(tf.transpose(audio[0:]),0)
del audio
print(audio_slice.shape)
padded_x = tf.keras.preprocessing.sequence.pad_sequences( [audio_slice], padding="post", dtype=float,maxlen=maxAudioLen)
del audio_slice
converted = tf.convert_to_tensor(padded_x[0])
del padded_x
print("C")
print(converted.shape)
yield ( converted,dmTensor)
print("D")
del converted
(The following TensorFlow docs are for waveform-based data - could be useful in future: