Validation Split is not supported for Tensor or Numpy

Hi all. I’m learning Machine Learning with TensorFlow. But , i got a trouble with Validation Split when i used model.fit() API.
It gave me a log that is:

Traceback (most recent call last):
  File "c:\Users\dell\OneDrive - khang06\My-Workspace\Programming\CLB-Stem-TamPhu\KhoaHocKyThuat-THPT-TamPhu\MachineLearning\Multi-class-Classification\Multi-class-Classification.py", line 84, in <module>
    epochs , hist = train_model(my_model , x_train_normalized , y_train,
  File "c:\Users\dell\OneDrive - khang06\My-Workspace\Programming\CLB-Stem-TamPhu\KhoaHocKyThuat-THPT-TamPhu\MachineLearning\Multi-class-Classification\Multi-class-Classification.py", line 48, in train_model
    history = model.fit(x=train_features , y=train_model, batch_size=batch_size , epochs=epochs , shuffle=True ,
  File "C:\Users\dell\OneDrive - khang06\My-Workspace\TSC\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "C:\Users\dell\OneDrive - khang06\My-Workspace\TSC\lib\site-packages\keras\engine\data_adapter.py", line 1664, in train_validation_split
    raise ValueError(
ValueError: `validation_split` is only supported for Tensors or NumPy arrays, found following types in the input: [<class 'function'>]

Could u please help me to resolve it? Thanks a lot.

The key thing in that error message is the last line:

So somewhere you are passing in a function rather than the actual data the function is providing. It’s hard to say exactly what’s wrong without seeing your code, but you could be doing something like this:

data = load_data
model.fit(data, validation_split=0.1)

instead of

data = load_data()
model.fit(data, validation_split=0.1)

If its still not obvious, share your code specifically around loading data

1 Like

Oh my bad :frowning: , i’m really sorry for the lack of information.

Here is my code:

from venv import create

import numpy as np

import pandas as pd

import tensorflow as tf

from matplotlib import pyplot as plt

pd.set_option("display.max_rows" , 10)

pd.set_option("display.float_format" , "{:.1f}".format)

np.set_printoptions(linewidth= 200)

(x_train , y_train) , (x_test , y_test) = tf.keras.datasets.mnist.load_data()

def create_model(my_learning_rate):

    """Create and compile a deep neural net."""

    model = tf.keras.models.Sequential()

   

    # The features are stored in a two-dimensional 28X28 array.

    # Flatten that two-dimensional array into a one-dimensional

    # 784-element array.

    model.add(tf.keras.layers.Flatten(input_shape=(28,28)))

   

    # Define the first hidden layer

    model.add(tf.keras.layers.Dense(units=32 , activation='relu'))

   

    # Define a dropout regularization layer.

    model.add(tf.keras.layers.Dropout(rate=0.2))

   

    # Define the output layer. The units parameter is set to 10 because

    # the model must choose among 10 possible output values (representing

    # the digits from 0 to 9 , inclusive)

    #

    # Don't change this layer

    model.add(tf.keras.layers.Dense(units=10 , activation='softmax'))

   

    # Construct the layers into a model that TensorFlow can execute.

    # Notice that the loss function for multi-class classification

    # is different than the loss function for binary classification

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=my_learning_rate) ,

                  loss="sparse_categorical_crossentropy" ,

                  metrics=['accuracy'])

   

    return model

def train_model(model , train_features , train_label , epochs ,

                batch_size=None , validation_split=0.1):

    """Train the model by feeding it data"""

    history = model.fit(x=train_features , y=train_model, batch_size=batch_size ,

                        epochs=epochs , shuffle=True ,

                        validation_split=validation_split)

   

    # To track the progression of training , gather a snapshot

    # of the model's metrics at each epoch.

    epochs = history.epoch

    hist = pd.DataFrame(history.history)

   

    return epochs , history

       

def plot_curve(epochs , hist , list_of_metrics):

    """Plot a curve of one or more classification metrics vs. epoch"""

   

    plt.figure()

    plt.xlabel("Epoch")

    plt.ylabel("Value")

   

    for m in list_of_metrics:

        x = hist[m]

        plt.plot(epochs[1:] , x[1:] , label=m)

   

    plt.legend()

   

if __name__ == '__main__':

    learning_rate = 0.003

    epochs = 50

    batch_size = 4000

    validation_split = 0.2

   

    x_train_normalized = x_train / 255.0

    x_test_normalized = x_test / 255.0

   

    # Establish the model's topography

    my_model = create_model(learning_rate)

   

    # Train the model on the normalized training set.

    epochs , hist = train_model(my_model , x_train_normalized , y_train,

                                epochs , batch_size , validation_split)

   

    # Plot a graph of the metric vs. epochs

    list_of_metrics_to_plot = ['accuracy']

    plot_curve(epochs , hist , list_of_metrics_to_plot)

   

    # Evaluate against the test set

    print("\n Evaluate the new model against the test set:")

    my_model.evaluate(x=x_test_normalized , y=y_test , batch_size=batch_size)

Can u help me to fix it :frowning: Thanks a lot

ok, here’s your problem:

def train_model(model, train_features, train_label, epochs,

                batch_size=None, validation_split=0.1):
    """Train the model by feeding it data"""

    history = model.fit(x=train_features, y=train_model, batch_size=batch_size,

                        epochs=epochs, shuffle=True,

                        validation_split=validation_split)

You can see this is a function called train_model() and you are passing that into model.fit() with y=train_model. This should be train_label (I guess?)

1 Like

oh my gah , thanks a lot :frowning: why i didnt see this mistake , it’s so eazy to see :frowning: my bad , thank u a lot.