Hello to all. I am trying to train a model for testing purposes but am facing the issue:
python mnist_distributed.py
Traceback (most recent call last):
File “C:\Users\pc\Desktop\New folder\mnist_distributed.py”, line 3, in
import tensorflow as tf
File “C:\Users\pc\AppData\Local\Programs\Python\Python39\lib\site-packages\tensorflow_init_.py”, line 45, in
from tensorflow.python import tf2 as _tf2
File “C:\Users\pc\AppData\Local\Programs\Python\Python39\lib\site-packages\tensorflow\python\tf2.py”, line 21, in
from tensorflow.python.platform import _pywrap_tf2
ImportError: DLL load failed while importing _pywrap_tf2: A dynamic link library (DLL) initialization routine failed.
Done everything I have found in the internet, including C++ redistribuitable, but no success. Am using Python 3.9.11 and tensorflow 2.15.0. The thing is that it was working fine before.
This is the script for the training:
import os
import datetime
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard, Callback
# Atualizado para usar a nova API de precisão mista
from tensorflow.keras.mixed_precision import set_global_policy
# Configuração para precisão mista
set_global_policy('mixed_float16') # Atualizado para definir a política global
# Configuração de logging para visualizar o processo de distribuição
tf.get_logger().setLevel('INFO')
# Defina a estratégia de treinamento distribuído
strategy = tf.distribute.MultiWorkerMirroredStrategy()
print("Number of devices: {}".format(strategy.num_replicas_in_sync))
class CustomCallback(Callback):
def on_epoch_begin(self, epoch, logs=None):
print(f"Iniciating epoch number {epoch+1}")
def on_epoch_end(self, epoch, logs=None):
print(f"End of epoch {epoch+1}")
with strategy.scope():
# Construa o modelo com precisão mista
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, dtype='float32') # Garantir a última camada com float32
])
# Compilação do modelo com ajustes para treinamento distribuído
# Atualizado para usar a função de perda recomendada
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
# Função para preprocessamento dos dados
def preprocess(image, label):
image = tf.cast(image, tf.float32) / 255.0
return image, label
# Carregamento e preparação do dataset MNIST com tf.data
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
# Aplicação do preprocessamento e batching com prefetching
BATCH_SIZE = 64 * strategy.num_replicas_in_sync # Ajuste o tamanho do lote conforme o número de réplicas
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).map(preprocess).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).map(preprocess).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
# Configuração do TensorBoard para monitoramento
log_dir = os.path.join("logs", "fit", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
# Treinamento do modelo com o dataset preparado e CustomCallback para monitoramento
model.fit(train_dataset, epochs=10, validation_data=test_dataset, callbacks=[tensorboard_callback, CustomCallback()])
Appreciate any help!