Hello, I am new to machine learning and have a question as to why the model fit function is producing a nan value for loss using the sparse categorical cross entropy loss function. the data is scaled so that there are no negative numbers in the dataset and contains no NAN values. The target values are binary 1 or zero and are stored as floats in a numpy arra. Thank you!
def add_indicators(kline_df):
BollingerBands(kline_df)
calculate_macd(kline_df)
calculate_rsi(kline_df)
calculate_ema(kline_df, 150)
calculate_ema(kline_df, 300)
calculate_ema(kline_df, 450)
calculate_ema(kline_df, 600)
calculate_stochastic_oscillator(kline_df)
kline_df = kline_df.dropna()
return kline_df
def target(df, future_window=10, greater=1):
Shift the ‘Close’ column by the future_window value
df[‘Future’] = df[‘Close’].shift(-future_window)
Define the classify function
def classify(current, future):
if current < future * greater:
return 1
else:
return 0
Create a new column ‘Target’ using the classify function and the ‘Close’ and ‘Future’ columns
df[‘Target’] = df.apply(lambda x: classify(x[‘Close’], x[‘Future’]), axis=1)
Drop the ‘Future’ column
df = df.drop(columns=[‘Future’], axis=1)
return df
def scale_dataset(data):
minimum = np.min(data)
return data - minimum
def preprocess(df, seq_len=60):
skip_cols = [‘Target’] #, ‘RSI’, ‘MACD’, ‘MACD Signal’, ‘MACD Histogram’, ‘stochastic_oscillator_k’, ‘stochastic_oscillator_d’
for col in df.columns: # go through all of the columns
if col not in skip_cols: # normalize all ... except for the target
df[col] = df[col].pct_change() # pct change "normalizes"
df.dropna(inplace=True) # remove the nas created by pct_change
df[col] = scale_dataset(df[col].values)
df.fillna(0, inplace=True)
sequential_data =
prev_periods = deque(maxlen=seq_len)
for i in df.values:
prev_periods.append([n for n in i[:-1]])
if len(prev_periods) == seq_len:
sequential_data.append([np.array(prev_periods) , i[-1]])
random.shuffle(sequential_data)
buys =
sells =
for seq, target in sequential_data:
if target == 0:
sells.append([seq, target])
else:
buys.append([seq, target])
random.shuffle(buys)
random.shuffle(sells)
lower = min(len(buys), len(sells))
buys = buys[:lower]
sells = sells[:lower]
sequential_data = buys + sells
random.shuffle(sequential_data)
X =
y=
for seq, target in sequential_data:
X.append(seq)
y.append(target)
return np.array(X), y
def create_model(train_x):
Define the network architecture
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LSTM(128, input_shape=(train_x.shape[1:])))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(32, activation=‘relu’))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(2, activation=‘softmax’))
Compile the model
model.compile(loss=‘sparse_categorical_crossentropy’, optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=[‘accuracy’])
return model
def train_model(train_x, train_y, test_x, test_y, batch_size=128, epochs=10, seq_length=60):
train_x = np.asarray(train_x)
train_y = np.asarray(train_y)
test_x = np.asarray(test_x)
test_y = np.asarray(test_y)
name = f’{seq_length}-{batch_size}-BTC-Indicator-{datetime.now()}’
tensorboard = TensorBoard(log_dir=f’/content/drive/MyDrive/ML Models/Logs/{name}')
filepath = “RNN_Final-{epoch:02d}-{val_acc:.3f}”
#checkpoint = ModelCheckpoint(‘/content/drive/MyDrive/ML Models/Models/{}.model’.format(filepath, monitor=‘val_acc’, verbose=1, save_best_only=True, mode=‘max’))
model = create_model(train_x)
history = model.fit(train_x, train_y, batch_size=batch_size, epochs=epochs, validation_data=(test_x, test_y), callbacks=[tensorboard])
return model
def run_all(data, seq_len=60, future_window=10, batch_size=128, epochs=10, greater_v=1):
data_indic = add_indicators(data)
data_indic = target(data_indic, future_window=future_window, greater=greater_v)
train_size = int(0.9 * len(data_indic))
train_df = data_indic.iloc[:train_size]
test_df = data_indic.iloc[train_size:]
train_x, train_y = preprocess(train_df, seq_len=seq_len)
test_x, test_y = preprocess(test_df, seq_len=seq_len)
model = create_model(train_x)
model = train_model(train_x, train_y, test_x, test_y, batch_size=batch_size, epochs=epochs, seq_length=seq_len)
kline_indic = kline_indic.iloc[int(0.6 * len(kline_indic)):]
run_all(kline_indic)