Skip to content

Module: tts_inference_workflow

Module containing a TTS (text-to-speech) Inference Workflow object.

AudioInferenceResult

Bases: BaseModel

Base workflow object for text-to-speech inference.

Source code in src/infernet_ml/workflows/inference/tts_inference_workflow.py
class AudioInferenceResult(BaseModel):
    """
    Base workflow object for text-to-speech inference.
    """

    # since audio_array is a numpy array, we need to allow arbitrary types.
    model_config = ConfigDict(arbitrary_types_allowed=True)

    # using ConfigDict

    """
    audio_array: numpy array containing the audio data. Most TTS models' output will be
    a numpy array.
    """
    audio_array: np.ndarray[Any, Any]

    @model_validator(mode="after")
    def check_is_array(self) -> "AudioInferenceResult":
        if not isinstance(self.audio_array, np.ndarray):
            raise ValueError("audio_array must be a numpy ndarray")
        return self

model_config = ConfigDict(arbitrary_types_allowed=True) class-attribute instance-attribute

a numpy array.

TTSInferenceWorkflow

Bases: BaseInferenceWorkflow

Base workflow object for text-to-speech inference.

Source code in src/infernet_ml/workflows/inference/tts_inference_workflow.py
class TTSInferenceWorkflow(BaseInferenceWorkflow):
    """
    Base workflow object for text-to-speech inference.
    """

    """
    Sample Rate: The sample rate of the audio data. This is required for writing the
    audio data to a wav file.
    """
    SAMPLE_RATE: int

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super().__init__(*args, **kwargs)

    @abstractmethod
    def do_postprocessing(
        self, input_data: Any, audio_array: torch.Tensor
    ) -> AudioInferenceResult:
        """
        Post-processing of the audio array. This method should be implemented by the
        child class. In most cases it will include generation of a wav file.
        """
        pass

do_postprocessing(input_data, audio_array) abstractmethod

Post-processing of the audio array. This method should be implemented by the child class. In most cases it will include generation of a wav file.

Source code in src/infernet_ml/workflows/inference/tts_inference_workflow.py
@abstractmethod
def do_postprocessing(
    self, input_data: Any, audio_array: torch.Tensor
) -> AudioInferenceResult:
    """
    Post-processing of the audio array. This method should be implemented by the
    child class. In most cases it will include generation of a wav file.
    """
    pass