Hello TensorFlow Community!
I am making an ML model that can determine direction from stereo audio signals and having quite a bit of trouble setting up my inputs/outputs properly. My first model is just a proof of concept that should be able to tell if the sound is coming from the right or left (1 or 0). My input data consists of 318 audio files (159 with sound coming from the right and 159 with sound coming from the left) of all different sounds and lengths, so my input is a list of NumPy arrays of sizes Nx2 with each column representing the left and right channel. My output is a NumPy array of 1’s and 0’s representing right and left respectively.
Here is my code:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from datetime import datetime
from sklearn import metrics
from scipy.io import wavfile
import os
import glob
# Load in Right Side .WAV Data.
X1 = []
count1 = 0
database_path = "C:\\Users\\andre\\OneDrive\\Documents\\ESI2022\\MLDatabases\\Right\\"
for filename in glob.glob(os.path.join(database_path, '*.wav')):
X1.append(wavfile.read(filename)[1])
count1 = count1 + 1
# Create Output data that is the same length as the input data.
Y1 = np.ones([X1.__len__()],dtype='float32').tolist()
# Load in Left side .WAV Data.
X2 = []
count2 = 0
database_path2 = "C:\\Users\\andre\\OneDrive\\Documents\\ESI2022\\MLDatabases\\Right\\"
for filename2 in glob.glob(os.path.join(database_path2, '*.wav')):
X2.append(wavfile.read(filename2)[1])
count2 = count2 + 1
# Create Output data that is the same length as the input data.
Y2 = np.zeros([X2.__len__()],dtype='float32').tolist()
# Concatenate Left and Right .WAV data and output data as numpy arrays.
X = np.asarray(X1+X2)
Y = np.asarray(Y1+Y2).astype(np.float32)
X = X.tolist()
# Split data into test training data.
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=0,shuffle=True)
'''
print(X[1])
time = np.linspace(0.,33792, 33792)
plt.plot(time, X[1][:,1], label="Left channel")
plt.plot(time, X[1][:,0], label="Right channel")
plt.legend()
plt.xlabel("Time [s]")
plt.ylabel("Amplitude")
plt.show()
'''
# Create the Model
model = Sequential()
# Add a LSTM layer with 1 output, and ambiguous input data length.
model.add(layers.LSTM(1,batch_input_shape=(len(X_train),None,1),return_sequences=True))
model.add(layers.LSTM(1,return_sequences=False))
# Compile Model
history = model.compile(loss='mean_absolute_error', metrics=['accuracy'],optimizer='adam')
model.summary()
# Define Training Parameters
num_epochs = 200
num_batch_size = 1
# Save the most accurate model to file. (Verbosity Gives more information)
checkpointer = ModelCheckpoint(filepath="SavedModels/checkpointModel.hdf5", verbose=1,save_best_only=True)
# Start the timer
start = datetime.now()
# Train the model
model.fit(X_train,Y_train,batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test,Y_test), callbacks=[checkpointer],verbose=1)
# Get and Print Model Validation Accuracy
test_accuracy=model.evaluate(X_test,Y_test,verbose=0)
print(test_accuracy[1])
And Here is the error I am getting:
Data cardinality is ambiguous:
x sizes: 11264, 419840, 30720, 24576, 22528, 25600, 24576, 27648, 55296, 30720, 18432, 72704, 143360, 22528, 22528, 72704, 33792, 17408, 17408, 25600, 27648, 27648, 69632, 29696, 25600, 110592, 21504, 27648, 27648, 10240, 22528, 143360, 9216, 22528, 80896, 361472, 71680, 9216, 20480, 21504, 17408, 30720, 24576, 10240, 59392, 28672, 13312, 27648, 59392, 254976, 7168, 25600, 8192, 18432, 254976, 14336, 66560, 23552, 12288, 58368, 33792, 7168, 419840, 143360, 23552, 59392, 23552, 19456, 27648, 75776, 47104, 114688, 254976, 22528, 19456, 3072, 25600, 25600, 24576, 14336, 14336, 24576, 25600, 41984, 19456, 45056, 19456, 19456, 25600, 21504, 75776, 86016, 10240, 8192, 24576, 25600, 24576, 22528, 6144, 22528, 72704, 12288, 23552, 32768, 21504, 22528, 20480, 41984, 41984, 13312, 21504, 254976, 69632, 24576, 21504, 15360, 19456, 171008, 80896, 26624, 27648, 55296, 66560, 19456, 22528, 89088, 45056, 25600, 21504, 12288, 33792, 30720, 55296, 110592, 177152, 19456, 25600, 80896, 19456, 23552, 23552, 10240, 71680, 9216, 71680, 22528, 220160, 110592, 171008, 41984, 134144, 10240, 12288, 24576, 19456, 143360, 19456, 10240, 8192, 27648, 254976, 20480, 19456, 15360, 96256, 58368, 8192, 21504, 220160, 30720, 24576, 45056, 3072, 80896, 11264, 14336, 23552, 177152, 11264, 13312, 9216, 25600, 21504, 110592, 254976, 29696, 22528, 10240, 143360, 78848, 19456, 62464, 7168, 143360, 14336, 33792, 96256, 28672, 77824, 14336, 25600, 27648, 10240, 27648, 12288, 22528, 27648, 22528, 30720, 27648, 84992, 9216, 13312, 86016, 22528, 14336, 110592, 24576, 24576, 80896, 9216, 67584, 84992, 22528, 72704, 19456, 47104, 10240, 22528, 14336, 27648, 14336, 19456, 9216, 21504, 23552, 39936, 22528, 21504, 27648, 71680, 134144, 110592, 29696, 14336, 24576, 124928, 17408, 19456, 33792, 25600, 19456, 11264, 32768
y sizes: 254
Make sure all arrays contain the same number of samples.
It seems to think the output array is data for 1 input as opposed to each data point in my output array being the output for each input file. Does anyone know how to fix this? I saw a few similar questions on Stack Overflow but none that had the answers for what I was looking for.