I am struggling with this for a week now.
If anyone can fix this so it works, you are a wizard in my eyes.
Lilypond error right now, but it really dont need it. Can remove generate music sheet if necessary.
I believe it has incremental learning code correct.
My main goals right now is:
- Input midi file (This works and it loads)
- Output midi filesProject file
- Save the model so i can load it for incremental learning
import tensorflow as tf
import numpy as np
import pandas as pd
from collections import Counter
import random
import os
from IPython.display import Image
from IPython.display import display
import IPython
from IPython.display import Audio
from music21 import *
from music21.note import Note
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import Adamax
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import sys
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter("ignore")
np.random.seed(42)
# Loading the list of Chopin's MIDI files as a stream
filepath = r"C:/Users/adria/OneDrive/Desktop/test/"
# Getting MIDI files
all_midis = []
for i in os.listdir(filepath):
if i.endswith(".mid"):
tr = filepath + i
midi = converter.parse(tr)
all_midis.append(midi)
# Helping function
def extract_notes(file):
notes = []
pick = None
for j in file:
songs = instrument.partitionByInstrument(j)
for part in songs.parts:
pick = part.recurse()
for element in pick:
if isinstance(element, note.Note):
notes.append(str(element.pitch))
elif isinstance(element, chord.Chord):
notes.append(".".join(str(n) for n in element.normalOrder))
return notes
# Getting the list of notes as Corpus
Corpus = extract_notes(all_midis)
print("Total notes in all the Chopin MIDI files in the dataset:", len(Corpus))
print("First fifty values in the Corpus:", Corpus[:50])
# First, let's write some functions to look into the data
def show(music):
display(Image(str(music.write("lily.png"))))
def chords_n_notes(Snippet):
Melody = []
offset = 0 # Incremental
for i in Snippet:
# If it is a chord
if ("." in i or i.isdigit()):
chord_notes = i.split(".") # Separating the notes in the chord
notes = []
for j in chord_notes:
inst_note = int(j)
note_snip = note.Note(inst_note)
notes.append(note_snip)
chord_snip = chord.Chord(notes)
chord_snip.offset = offset
Melody.append(chord_snip)
# If it is a note
else:
note_snip = note.Note(i)
note_snip.offset = offset
Melody.append(note_snip)
# Increase offset each iteration so that notes do not stack
offset += 1
Melody_midi = stream.Stream(Melody)
return Melody_midi
Melody_Snippet = chords_n_notes(Corpus[:100])
show(Melody_Snippet)
# To play audio or corpus
print("Sample Audio From Data")
IPython.display.Audio("../input/music-generated-lstm/Corpus_Snippet.wav")
# Creating a count dictionary
count_num = Counter(Corpus)
print("Total unique notes in the Corpus:", len(count_num))
# Exploring the notes dictionary
Notes = list(count_num.keys())
Recurrence = list(count_num.values())
# Average recurrence for a note in Corpus
def Average(lst):
return sum(lst) / len(lst)
print("Average recurrence for a note in Corpus:", Average(Recurrence))
print("Most frequent note in Corpus appeared:", max(Recurrence), "times")
print("Least frequent note in Corpus appeared:", min(Recurrence), "time")
# Plotting the distribution of Notes
plt.figure(figsize=(18, 3), facecolor="#97BACB")
bins = np.arange(0, (max(Recurrence)), 50)
plt.hist(Recurrence, bins=bins, color="#97BACB")
plt.axvline(x=100, color="#DBACC1")
plt.title("Frequency Distribution Of Notes In The Corpus")
plt.xlabel("Frequency Of Chords in Corpus")
plt.ylabel("Number Of Chords")
plt.show()
# Getting a list of rare chords
rare_note = []
for index, (key, value) in enumerate(count_num.items()):
if value < 100:
m = key
rare_note.append(m)
print("Total number of notes that occur less than 100 times:", len(rare_note))
# Eliminating the rare notes
Corpus = [element for element in Corpus if element not in rare_note]
print("Length of Corpus after eliminating the rare notes:", len(Corpus))
# Storing all the unique characters present in my corpus to build a mapping dictionary
symb = sorted(list(set(Corpus)))
L_corpus = len(Corpus) # Length of corpus
L_symb = len(symb) # Length of total unique characters
# Building dictionary to access the vocabulary from indices and vice versa
mapping = dict((c, i) for i, c in enumerate(symb))
reverse_mapping = dict((i, c) for i, c in enumerate(symb))
print("Total number of characters:", L_corpus)
print("Number of unique characters:", L_symb)
# Splitting the Corpus into equal length strings and output targets
length = 40
features = []
targets = []
for i in range(0, L_corpus - length, 1):
feature = Corpus[i:i + length]
target = Corpus[i + length]
features.append([mapping[j] for j in feature])
targets.append(mapping[target])
L_datapoints = len(targets)
print("Total number of sequences in the Corpus:", L_datapoints)
# Reshape X and normalize
X = (np.reshape(features, (L_datapoints, length, 1))) / float(L_symb)
# One hot encode the output variable
y = tf.keras.utils.to_categorical(targets)
# Taking out a subset of data to be used as seed
X_train, X_seed, y_train, y_seed = train_test_split(X, y, test_size=0.2, random_state=42)
# Initializing the Model
model = Sequential()
# Adding layers
model.add(LSTM(512, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(256))
model.add(Dense(256))
model.add(Dropout(0.1))
model.add(Dense(y.shape[1], activation='softmax'))
# Compiling the model for training
opt = Adamax(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=opt)
# Model's Summary
model.summary()
# Training the Model
history = model.fit(X_train, y_train, batch_size=256, epochs=200)
# Plotting the learnings
history_df = pd.DataFrame(history.history)
fig = plt.figure(figsize=(15, 4), facecolor="#97BACB")
fig.suptitle("Learning Plot of Model for Loss")
plt.plot(history_df['loss'], label='Training Loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()
# Generating music using the seed
start = np.random.randint(0, len(X_seed)-1)
pattern = X_seed[start]
print("Seed:")
print("\"", ''.join([reverse_mapping[value] for value in pattern]), "\"")
# Generate 500 notes
generated_notes = []
for i in range(500):
x = np.reshape(pattern, (1, len(pattern), 1))
x = x / float(L_symb)
prediction = model.predict(x, verbose=0)
index = np.argmax(prediction)
result = reverse_mapping[index]
generated_notes.append(result)
pattern.append(index)
pattern = pattern[1:len(pattern)]
print("\nGenerated Notes:")
print(''.join(generated_notes))
# Converting generated notes into MIDI file
offset = 0
output_notes = []
for pattern in generated_notes:
# If it is a chord
if ('.' in pattern or pattern.isdigit()):
notes_in_chord = pattern.split('.')
notes = []
for current_note in notes_in_chord:
new_note = note.Note(int(current_note))
new_note.storedInstrument = instrument.Piano()
notes.append(new_note)
new_chord = chord.Chord(notes)
new_chord.offset = offset
output_notes.append(new_chord)
# If it is a note
else:
new_note = note.Note(pattern)
new_note.offset = offset
new_note.storedInstrument = instrument.Piano()
output_notes.append(new_note)
# Increase offset each iteration so that notes do not stack
offset += 0.5
midi_stream = stream.Stream(output_notes)
midi_stream.write('midi', fp=r'C:\Users\adria\OneDrive\Desktop\test\generated_music.mid')