I have 8+26 gb memory, 90% free… so there are 30GB free RAM memory… and this happens…
Same:
I did everything they suggest, … can anybody reproduce this=?
Same as:
import h5py
import os
import librosa
import wave
import numpy as np
import tensorflow as tf
import scipy
import numpy
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense
import subprocess
import mel_features
import math
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from librosa import display
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore', category=UserWarning)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
def samples_to_numpy(file_path):
sr, wav_file = scipy.io.wavfile.read(file_path)#capable of float32
one_array = numpy.full((len(wav_file)), 0, dtype=np.int8)
two_array = numpy.full((len(wav_file)), 0, dtype=np.int8)
three_array = numpy.full((len(wav_file)), 0, dtype=np.int8)
for i in range(len(wav_file)):
float_32 = wav_file[i]
#the trick
one = np.int8(0)
two = np.int8(0)
three = np.int8(0)
if float_32 > 0 or float_32 == 0:
while (float_32 > 1) and (one < 128):
one += np.int8(1) # weight 1
float_32 -= 1
while (float_32 > (1/128) and (two < 128)):
two += np.int8(1) #weight 1/128
float_32 -= (1/128)
while (float_32 > (1/16384) and (three < 128)):
three += np.int8(1) # weight 1/16384
float_32 -= 1/16384
elif float_32 < 0:
while (float_32 < (-1) and (one > (-127))):
one -= np.int8(1)#weight 1
float_32 += 1
while (float_32 < (-(1/128)) and (two > (-127))):
two -= np.int8(1) #weight 1/128
float_32 += (1/128)
while (float_32 < (-(1/16384)) and (three > (-127))):
three -= np.int8(1)#weight 1/16384
float_32 += 1/16384
one_array[i] = one
two_array[i] = two
three_array[i] = three
return one_array, two_array, three_array
def normalize(samples):
#min/max
j = 999
k = -999
for i in samples:
if i < j:
j = i
if i > k:
k = i
if j >= k:
print('ERROR, invalid bounds. Cannot Normalize!!!')
print('what is wrong? +++')
if k == 0 and j == 0:
return
factor1 = 128/k
factor2 = (-127)/j
normal = []
if factor1 > factor2 and factor2 != 0:
for i in samples:
integer1 = round(i*factor2)
normal.append(int(integer1))
elif factor1 != 0:
for i in samples:
integer1 = round(i*factor1)
normal.append(int(integer1))
else:
return
norm = np.asarray(normal, dtype=np.int8)
return norm
def compute_spectrogram(audio_samples, audio_sample_rate_hz):
"""Compute log-mel spectrogram and scale it to uint8."""
samples = audio_samples.flatten() / float(2**15)
#samples = normalize(audio_samples)
spectrogram = 30 * (
mel_features.log_mel_spectrogram(
audio_samples,
audio_sample_rate_hz,
log_offset=0.001,
window_length_secs=0.025,
hop_length_secs=0.010,
num_mel_bins=256,
lower_edge_hertz=10,
upper_edge_hertz=1000) - np.log(1e-3))
return spectrogram
def load_and_preprocess_data(data_dir, classes, target_shape=(128, 128)):
data = []
labels = []
for i, class_name in enumerate(classes):
class_dir = os.path.join(data_dir, class_name)
for filename in os.listdir(class_dir):
if filename.endswith('.wav'):
file_path = os.path.join(class_dir, filename)
audio_data, sample_rate = librosa.load(file_path, sr=None)
# Perform preprocessing (e.g., convert to Mel spectrogram and resize)
mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
data.append(mel_spectrogram)
labels.append(i)
return np.array(data), np.array(labels)
def train2():
feature_list = []
label_list = []
file = 'final0.wav'
audio_path = os.path.join('/home/audio/', file)
audio, sr = librosa.load(path=audio_path, sr=None)
audio = librosa.effects.time_stretch(y=audio, rate=len(audio)/sr)
# Calculate features and get the label from the filename
mels = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=2048, hop_length=512)
mels_db = librosa.power_to_db(S=mels, ref=1.0)
feature_list.append(mels_db.reshape((256, 256, 1)))
label_list.append(file[0])
features = np.array(feature_list)
labels = np.array(label_list)
model = keras.Sequential(layers=[
keras.layers.InputLayer(input_shape=features[0].shape),
keras.layers.Conv2D(16, 3, padding='same', activation=keras.activations.relu),
keras.layers.MaxPooling2D(),
keras.layers.Conv2D(32, 3, padding='same', activation=keras.activations.relu),
keras.layers.MaxPooling2D(),
keras.layers.Flatten(),
keras.layers.Dropout(0.3),
keras.layers.Dense(64, activation=keras.activations.relu),
keras.layers.Dense(10, activation=keras.activations.softmax)
])
model.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
print(model.summary())
# Set parameters for data splitting and training
TEST_SIZE = 0.3
BATCH_SIZE = 64
EPOCHS = 50
# Encode Labels
encoded_labels = tf.one_hot(indices=labels, depth=10)
# Split dataset to train and test data
X_train, X_test, y_train, y_test = train_test_split(features, encoded_labels.numpy(), test_size=TEST_SIZE)
# Train the model
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(x=X_train, y=y_train, validation_split=TEST_SIZE, epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[early_stopping])
# Plot the training history
fig, axs = plt.subplots(2)
fig.set_size_inches(12, 8)
fig.suptitle('Training History', fontsize=16)
axs[0].plot(history.epoch, history.history['loss'], history.history['val_loss'])
axs[0].set(title='Loss', xlabel='Epoch', ylabel='Loss')
axs[0].legend(['loss', 'val_loss'])
axs[1].plot(history.epoch, history.history['accuracy'], history.history['val_accuracy'])
axs[1].set(title='Accuracy', xlabel='Epoch', ylabel='Accuracy')
axs[1].legend(['accuracy', 'val_accuracy'])
plt.show()
def train():
#Split data into training and testing sets
#data, labels = load_and_preprocess_data(data_dir, classes)
no_epochs = 30
learning_rate = 0.001
no_classes = 10
verbosity = 1
batch_size = 1
data1, data2, data3 = samples_to_numpy('/home/final0.wav')
mel1 = compute_spectrogram(audio_samples=data1,audio_sample_rate_hz=192000)
mel2 = compute_spectrogram(audio_samples=data2,audio_sample_rate_hz=192000)
mel3 = compute_spectrogram(audio_samples=data3,audio_sample_rate_hz=192000)
# Create a neural network model
# Create the model)
y = np.zeros(len(mel1))
x = np.array(mel1,np.uint8)
x = x.reshape(len(mel1), 256, -1 )
for i in range(len(mel1)):
y[i] = i
model = Sequential(layers=[
keras.layers.InputLayer(input_shape=x.shape),
keras.layers.Conv2D(16,3, padding='same', activation=keras.activations.relu),
keras.layers.MaxPooling2D(),
keras.layers.Conv2D(32,3, padding='same', activation=keras.activations.relu),
keras.layers.MaxPooling2D(),
keras.layers.Flatten(),
keras.layers.Dropout(0.3),
keras.layers.Dense(64, activation=keras.activations.relu),
keras.layers.Dense(10, activation=keras.activations.softmax)
])
print(mel1)
model.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.categorical_crossentropy,experimental_run_tf_function = False, metrics=['accuracy'])
history = model.fit(x,y, batch_size=batch_size, epochs=no_epochs, verbose=verbosity)
model.save('/home/model_full.h5')
model = tf.keras.models.load_model('/home/model_full.h5', compile=False)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
with tf.io.gfile.GFile('/home/model.tflite', 'wb') as f:
f.write(tflite_model)
def test_audio(file_path, model):
# Load and preprocess the audio file
audio_data, sample_rate = librosa.load(file_path, sr=None)
mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
mel_spectrogram = tf.reshape(mel_spectrogram, (1,) + target_shape + (1,))
# Make predictions
predictions = model.predict(mel_spectrogram)
# Get the class probabilities
class_probabilities = predictions[0]
# Get the predicted class index
predicted_class_index = np.argmax(class_probabilities)
return class_probabilities, predicted_class_index
# Test an audio file
#test_audio_file = 'dog_barking_4.wav'
#class_probabilities, predicted_class_index = test_audio(test_audio_file, model)
# Display results for all classes
def display():
for i, class_label in enumerate(classes):
probability = class_probabilities[i]
print(f'Class: {class_label}, Probability: {probability:.4f}')
# Calculate and display the predicted class and accuracy
predicted_class = classes[predicted_class_index]
accuracy = class_probabilities[predicted_class_index]
print(f'The audio is classified as: {predicted_class}')
print(f'Accuracy: {accuracy:.4f}')
if __name__=="__main__":
train()
subprocess.run(["edgetpu_compiler", "/home/model.tflite"])
[quote="user517, post:1, topic:49620, full:true"]
'''
import h5py
import os
import librosa
import wave
import numpy as np
import tensorflow as tf
import scipy
import numpy
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense
import subprocess
import mel_features
import math
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from librosa import display
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore', category=UserWarning)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
def samples_to_numpy(file_path):
sr, wav_file = scipy.io.wavfile.read(file_path)#capable of float32
one_array = numpy.full((len(wav_file)), 0, dtype=np.int8)
two_array = numpy.full((len(wav_file)), 0, dtype=np.int8)
three_array = numpy.full((len(wav_file)), 0, dtype=np.int8)
for i in range(len(wav_file)):
float_32 = wav_file[i]
#the trick
one = np.int8(0)
two = np.int8(0)
three = np.int8(0)
if float_32 > 0 or float_32 == 0:
while (float_32 > 1) and (one < 128):
one += np.int8(1) # weight 1
float_32 -= 1
while (float_32 > (1/128) and (two < 128)):
two += np.int8(1) #weight 1/128
float_32 -= (1/128)
while (float_32 > (1/16384) and (three < 128)):
three += np.int8(1) # weight 1/16384
float_32 -= 1/16384
elif float_32 < 0:
while (float_32 < (-1) and (one > (-127))):
one -= np.int8(1)#weight 1
float_32 += 1
while (float_32 < (-(1/128)) and (two > (-127))):
two -= np.int8(1) #weight 1/128
float_32 += (1/128)
while (float_32 < (-(1/16384)) and (three > (-127))):
three -= np.int8(1)#weight 1/16384
float_32 += 1/16384
one_array[i] = one
two_array[i] = two
three_array[i] = three
return one_array, two_array, three_array
def normalize(samples):
#min/max
j = 999
k = -999
for i in samples:
if i < j:
j = i
if i > k:
k = i
if j >= k:
print('ERROR, invalid bounds. Cannot Normalize!!!')
print('what is wrong? +++')
if k == 0 and j == 0:
return
factor1 = 128/k
factor2 = (-127)/j
normal = []
if factor1 > factor2 and factor2 != 0:
for i in samples:
integer1 = round(i*factor2)
normal.append(int(integer1))
elif factor1 != 0:
for i in samples:
integer1 = round(i*factor1)
normal.append(int(integer1))
else:
return
norm = np.asarray(normal, dtype=np.int8)
return norm
def compute_spectrogram(audio_samples, audio_sample_rate_hz):
"""Compute log-mel spectrogram and scale it to uint8."""
samples = audio_samples.flatten() / float(2**15)
#samples = normalize(audio_samples)
spectrogram = 30 * (
mel_features.log_mel_spectrogram(
audio_samples,
audio_sample_rate_hz,
log_offset=0.001,
window_length_secs=0.025,
hop_length_secs=0.010,
num_mel_bins=256,
lower_edge_hertz=10,
upper_edge_hertz=1000) - np.log(1e-3))
return spectrogram
def load_and_preprocess_data(data_dir, classes, target_shape=(128, 128)):
data = []
labels = []
for i, class_name in enumerate(classes):
class_dir = os.path.join(data_dir, class_name)
for filename in os.listdir(class_dir):
if filename.endswith('.wav'):
file_path = os.path.join(class_dir, filename)
audio_data, sample_rate = librosa.load(file_path, sr=None)
# Perform preprocessing (e.g., convert to Mel spectrogram and resize)
mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
data.append(mel_spectrogram)
labels.append(i)
return np.array(data), np.array(labels)
def train2():
feature_list = []
label_list = []
file = 'final0.wav'
audio_path = os.path.join('/home/audio/', file)
audio, sr = librosa.load(path=audio_path, sr=None)
audio = librosa.effects.time_stretch(y=audio, rate=len(audio)/sr)
# Calculate features and get the label from the filename
mels = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=2048, hop_length=512)
mels_db = librosa.power_to_db(S=mels, ref=1.0)
feature_list.append(mels_db.reshape((256, 256, 1)))
label_list.append(file[0])
features = np.array(feature_list)
labels = np.array(label_list)
model = keras.Sequential(layers=[
keras.layers.InputLayer(input_shape=features[0].shape),
keras.layers.Conv2D(16, 3, padding='same', activation=keras.activations.relu),
keras.layers.MaxPooling2D(),
keras.layers.Conv2D(32, 3, padding='same', activation=keras.activations.relu),
keras.layers.MaxPooling2D(),
keras.layers.Flatten(),
keras.layers.Dropout(0.3),
keras.layers.Dense(64, activation=keras.activations.relu),
keras.layers.Dense(10, activation=keras.activations.softmax)
])
model.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
print(model.summary())
# Set parameters for data splitting and training
TEST_SIZE = 0.3
BATCH_SIZE = 64
EPOCHS = 50
# Encode Labels
encoded_labels = tf.one_hot(indices=labels, depth=10)
# Split dataset to train and test data
X_train, X_test, y_train, y_test = train_test_split(features, encoded_labels.numpy(), test_size=TEST_SIZE)
# Train the model
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(x=X_train, y=y_train, validation_split=TEST_SIZE, epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[early_stopping])
# Plot the training history
fig, axs = plt.subplots(2)
fig.set_size_inches(12, 8)
fig.suptitle('Training History', fontsize=16)
axs[0].plot(history.epoch, history.history['loss'], history.history['val_loss'])
axs[0].set(title='Loss', xlabel='Epoch', ylabel='Loss')
axs[0].legend(['loss', 'val_loss'])
axs[1].plot(history.epoch, history.history['accuracy'], history.history['val_accuracy'])
axs[1].set(title='Accuracy', xlabel='Epoch', ylabel='Accuracy')
axs[1].legend(['accuracy', 'val_accuracy'])
plt.show()
def train():
#Split data into training and testing sets
#data, labels = load_and_preprocess_data(data_dir, classes)
no_epochs = 30
learning_rate = 0.001
no_classes = 10
verbosity = 1
batch_size = 1
data1, data2, data3 = samples_to_numpy('/home/final0.wav')
mel1 = compute_spectrogram(audio_samples=data1,audio_sample_rate_hz=192000)
mel2 = compute_spectrogram(audio_samples=data2,audio_sample_rate_hz=192000)
mel3 = compute_spectrogram(audio_samples=data3,audio_sample_rate_hz=192000)
# Create a neural network model
# Create the model)
y = np.zeros(len(mel1))
x = np.array(mel1,np.uint8)
x = x.reshape(len(mel1), 256, -1 )
for i in range(len(mel1)):
y[i] = i
model = Sequential(layers=[
keras.layers.InputLayer(input_shape=x.shape),
keras.layers.Conv2D(16,3, padding='same', activation=keras.activations.relu),
keras.layers.MaxPooling2D(),
keras.layers.Conv2D(32,3, padding='same', activation=keras.activations.relu),
keras.layers.MaxPooling2D(),
keras.layers.Flatten(),
keras.layers.Dropout(0.3),
keras.layers.Dense(64, activation=keras.activations.relu),
keras.layers.Dense(10, activation=keras.activations.softmax)
])
print(mel1)
model.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.categorical_crossentropy,experimental_run_tf_function = False, metrics=['accuracy'])
history = model.fit(x,y, batch_size=batch_size, epochs=no_epochs, verbose=verbosity)
model.save('/home/model_full.h5')
model = tf.keras.models.load_model('/home/model_full.h5', compile=False)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
with tf.io.gfile.GFile('/home/model.tflite', 'wb') as f:
f.write(tflite_model)
def test_audio(file_path, model):
# Load and preprocess the audio file
audio_data, sample_rate = librosa.load(file_path, sr=None)
mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
mel_spectrogram = tf.reshape(mel_spectrogram, (1,) + target_shape + (1,))
# Make predictions
predictions = model.predict(mel_spectrogram)
# Get the class probabilities
class_probabilities = predictions[0]
# Get the predicted class index
predicted_class_index = np.argmax(class_probabilities)
return class_probabilities, predicted_class_index
# Test an audio file
#test_audio_file = 'dog_barking_4.wav'
#class_probabilities, predicted_class_index = test_audio(test_audio_file, model)
# Display results for all classes
def display():
for i, class_label in enumerate(classes):
probability = class_probabilities[i]
print(f'Class: {class_label}, Probability: {probability:.4f}')
# Calculate and display the predicted class and accuracy
predicted_class = classes[predicted_class_index]
accuracy = class_probabilities[predicted_class_index]
print(f'The audio is classified as: {predicted_class}')
print(f'Accuracy: {accuracy:.4f}')
if __name__=="__main__":
train()
subprocess.run(["edgetpu_compiler", "/home/model.tflite"])
Same Error:
import h5py
import keras
import os
import librosa
import wave
import numpy as np
import tensorflow as tf
import scipy
import numpy
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense
import subprocess
import mel_features
def samples_to_numpy(file_path):
sr, wav_file = scipy.io.wavfile.read(file_path)#capable of float32
one_array = numpy.full((len(wav_file)), 0, dtype=np.int8)
two_array = numpy.full((len(wav_file)), 0, dtype=np.int8)
three_array = numpy.full((len(wav_file)), 0, dtype=np.int8)
for i in range(len(wav_file)):
float_32 = wav_file[i]
#the trick
one = np.int8(0)
two = np.int8(0)
three = np.int8(0)
if float_32 > 0 or float_32 == 0:
while (float_32 > 1) and (one < 128):
one += np.int8(1) # weight 1
float_32 -= 1
while (float_32 > (1/128) and (two < 128)):
two += np.int8(1) #weight 1/128
float_32 -= (1/128)
while (float_32 > (1/16384) and (three < 128)):
three += np.int8(1) # weight 1/16384
float_32 -= 1/16384
elif float_32 < 0:
while (float_32 < (-1) and (one > (-127))):
one -= np.int8(1)#weight 1
float_32 += 1
while (float_32 < (-(1/128)) and (two > (-127))):
two -= np.int8(1) #weight 1/128
float_32 += (1/128)
while (float_32 < (-(1/16384)) and (three > (-127))):
three -= np.int8(1)#weight 1/16384
float_32 += 1/16384
one_array[i] = one
two_array[i] = two
three_array[i] = three
return one_array, two_array, three_array
def compute_spectrogram(audio_samples, audio_sample_rate_hz):
"""Compute log-mel spectrogram and scale it to uint8."""
#samples = audio_samples.flatten() / float(2**15)
spectrogram = 30 * (
mel_features.log_mel_spectrogram(
audio_samples,
audio_sample_rate_hz,
log_offset=0.001,
window_length_secs=0.025,
hop_length_secs=0.010,
num_mel_bins=32,
lower_edge_hertz=10,
upper_edge_hertz=1000) - np.log(1e-3))
return spectrogram
def load_and_preprocess_data(data_dir, classes, target_shape=(128, 128)):
data = []
labels = []
for i, class_name in enumerate(classes):
class_dir = os.path.join(data_dir, class_name)
for filename in os.listdir(class_dir):
if filename.endswith('.wav'):
file_path = os.path.join(class_dir, filename)
audio_data, sample_rate = librosa.load(file_path, sr=None)
# Perform preprocessing (e.g., convert to Mel spectrogram and resize)
mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
data.append(mel_spectrogram)
labels.append(i)
return np.array(data), np.array(labels)
def train():
#Split data into training and testing sets
#data, labels = load_and_preprocess_data(data_dir, classes)
no_epochs = 30
learning_rate = 0.001
no_classes = 10
verbosity = 1
batch_size=100
data1, data2, data3 = samples_to_numpy('/home/sound1.wav')
mel1 = compute_spectrogram(audio_samples=data1,audio_sample_rate_hz=192000)
mel2 = compute_spectrogram(audio_samples=data2,audio_sample_rate_hz=192000)
mel3 = compute_spectrogram(audio_samples=data3,audio_sample_rate_hz=192000)
sample_shape = (len(data1),len(data2),len(data3), 3)
# Create a neural network model
# Create the model
model = Sequential()
model.add(Conv3D(12, kernel_size=(3, 3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=sample_shape))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.compile(loss=tf.losses.categorical_crossentropy,
optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
metrics=['accuracy'])
#dataset = tf.data.Dataset.from_tensor_slices([mel1,mel2,mel3])
line = np.zeros((len(data1)))
for i in range(len(data1)):
line[i] = i
xarrays = np.zeros((len(line,))*3)
xarrays.reshape((3,len(line))).transpose()
xarrays = numpy.insert(xarrays, 0, data1, axis=0)
xarrays = numpy.insert(xarrays, 1, data2, axis=0)
xarrays = numpy.insert(xarrays, 2, data3, axis=0)
yarrays = np.zeros((len(line,))*3)
yarrays.reshape((3,len(line))).transpose()
yarrays = numpy.insert(yarrays, 0, line, axis=0)
yarrays = numpy.insert(yarrays, 1, line, axis=0)
yarrays = numpy.insert(yarrays, 2, line, axis=0)
x_3d = xarrays
y_3d = yarrays
model.fit(x_3d, y_3d, batch_size=batch_size, epochs=no_epochs, verbose=verbosity)
model.save('/home/model_full.h5')
model = tf.keras.models.load_model('/home/model_full.h5', compile=False)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
with tf.io.gfile.GFile('/home/model.tflite', 'wb') as f:
f.write(tflite_model)
def test_audio(file_path, model):
# Load and preprocess the audio file
audio_data, sample_rate = librosa.load(file_path, sr=None)
mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
mel_spectrogram = tf.reshape(mel_spectrogram, (1,) + target_shape + (1,))
# Make predictions
predictions = model.predict(mel_spectrogram)
# Get the class probabilities
class_probabilities = predictions[0]
# Get the predicted class index
predicted_class_index = np.argmax(class_probabilities)
return class_probabilities, predicted_class_index
# Test an audio file
#test_audio_file = 'dog_barking_4.wav'
#class_probabilities, predicted_class_index = test_audio(test_audio_file, model)
# Display results for all classes
def display():
for i, class_label in enumerate(classes):
probability = class_probabilities[i]
print(f'Class: {class_label}, Probability: {probability:.4f}')
# Calculate and display the predicted class and accuracy
predicted_class = classes[predicted_class_index]
accuracy = class_probabilities[predicted_class_index]
print(f'The audio is classified as: {predicted_class}')
print(f'Accuracy: {accuracy:.4f}')
if __name__=="__main__":
train()
subprocess.run(["edgetpu_compiler", "/home/model.tflite"])