I have constructed a pipeline using TFX for audio processing. It starts with a custom ExampleGen for audio processing and goes on the common way - statistics_gen, Transform, and Trainer. Everything goes fine but increasing the number of audio files has caused an OOM at the Transform step - what gets me very surprised once it is supposed to be projected for processing large amounts of data. Then I decided to run component by component and reinitialize the kernel, since running one after the another is filling up my memory. For that reason I have saved the ExampleGen artifact using the following code:
context = InteractiveContext()
# Update the input_config to include train and eval patterns
input_config = example_gen_pb2.Input(splits=[
example_gen_pb2.Input.Split(name='train', pattern='train/*.pcap'),
example_gen_pb2.Input.Split(name='eval', pattern='eval/*.pcap'),
])
#ADICIONAR OUTPUT CONFIG
example_gen = FileBasedExampleGen(
input_base="/home/marlon/Área de Trabalho/telnyx/audio_classification/data/",
input_config=input_config,
custom_executor_spec=executor_spec.ExecutorClassSpec(BaseExampleGenExecutor)
)
artifact_store_path = "/home/marlon/Área de Trabalho/telnyx/audio_classification"
# Initialize the InteractiveContext with the artifact store path
context = InteractiveContext(pipeline_root=artifact_store_path)
context.run(example_gen)
And then I try to load it using:
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.types.standard_artifacts import Examples
from tfx.types import Channel
from tfx.components import StatisticsGen
from ml_metadata.metadata_store import metadata_store
from ml_metadata.proto import metadata_store_pb2
from tfx.orchestration.metadata import Metadata
from tfx.orchestration.metadata import sqlite_metadata_connection_config
# Set up the InteractiveContext with your metadata store and artifact store paths
metadata_store_path = "/home/marlon/Área de Trabalho/telnyx/audio_classification/metadata.sqlite"
artifact_store_path = "/home/marlon/Área de Trabalho/telnyx/audio_classification"
connection_config = sqlite_metadata_connection_config(metadata_store_path)
context = InteractiveContext(pipeline_root=artifact_store_path, metadata_connection_config=connection_config)
# Retrieve the latest artifact of type 'Examples' from the metadata store
with Metadata(connection_config) as metadata_handler:
artifacts = metadata_handler.store.get_artifacts_by_type(Examples.TYPE_NAME)
latest_artifact = max(artifacts, key=lambda artifact: artifact.id)
artifact_uri = latest_artifact.uri
# Print information about the latest artifact
print("Artifact URI:", artifact_uri)
import os
print("Listing files in the artifact directory:")
print(os.listdir(artifact_uri))
print("Artifact Name:", latest_artifact.name)
print("Artifact Properties:", latest_artifact.properties)
print("Artifact Custom Properties:", latest_artifact.custom_properties)
# Create an Examples channel based on the latest artifact
example_artifact = Examples()
example_artifact.uri = artifact_uri
example_channel = Channel(type=Examples)
example_channel.artifacts = [example_artifact]
# Set up and run the StatisticsGen component
statistics_gen = StatisticsGen(examples=example_channel)
context.run(statistics_gen)
But then I get this error:
Artifact URI: /home/marlon/Área de Trabalho/telnyx/audio_classification/FileBasedExampleGen/examples/1
Listing files in the artifact directory:
['Split-eval', 'Split-train']
Artifact Name:
Artifact Properties: {'split_names': string_value: "[\"train\", \"eval\"]"
}
Artifact Custom Properties: {'span': int_value: 0
, 'tfx_version': string_value: "1.13.0"
, 'input_fingerprint': string_value: "split:train,num_files:4091,total_bytes:354502712,xor_checksum:1693317426,sum_checksum:6927260006922\nsplit:eval,num_files:1063,total_bytes:97049364,xor_checksum:1691675502,sum_checksum:1799906200016"
, 'payload_format': string_value: "FORMAT_TF_EXAMPLE"
, 'file_format': string_value: "tfrecords_gzip"
}
Traceback (most recent call last):
File ~/anaconda3/envs/tfx/lib/python3.9/site-packages/spyder_kernels/py3compat.py:356 in compat_exec
exec(code, globals, locals)
File ~/Área de Trabalho/telnyx/audio_classification/untitled0.py:47
context.run(statistics_gen)
File ~/anaconda3/envs/tfx/lib/python3.9/site-packages/tfx/orchestration/experimental/interactive/notebook_utils.py:31 in run_if_ipython
return fn(*args, **kwargs)
File ~/anaconda3/envs/tfx/lib/python3.9/site-packages/tfx/orchestration/experimental/interactive/interactive_context.py:164 in run
execution_id = launcher.launch().execution_id
File ~/anaconda3/envs/tfx/lib/python3.9/site-packages/tfx/orchestration/launcher/base_component_launcher.py:206 in launch
self._run_executor(execution_decision.execution_id,
File ~/anaconda3/envs/tfx/lib/python3.9/site-packages/tfx/orchestration/launcher/in_process_component_launcher.py:73 in _run_executor
executor.Do(
File ~/anaconda3/envs/tfx/lib/python3.9/site-packages/tfx/components/statistics_gen/executor.py:90 in Do
examples = artifact_utils.get_single_instance(
File ~/anaconda3/envs/tfx/lib/python3.9/site-packages/tfx/types/artifact_utils.py:77 in get_single_instance
raise ValueError(
ValueError: expected list length of one but got 0
How can I load this guy?