viernes, 7 de junio de 2024

google python speech recog

 import argparse

import os


from google.cloud import speech_v1p1beta1

from google.oauth2 import service_account


def transcribe_file(speech_file: str, credentials_file: str) -> speech_v1p1beta1.types.RecognizeResponse:

    """Transcribe the given audio file."""

    # Authentication

    credentials = service_account.Credentials.from_service_account_file(credentials_file)

    client = speech_v1p1beta1.SpeechClient(credentials=credentials)


    with open(speech_file, "rb") as audio_file:

        content = audio_file.read()


    audio = speech_v1p1beta1.RecognitionAudio(content=content)

    config = speech_v1p1beta1.RecognitionConfig(

        encoding=speech_v1p1beta1.RecognitionConfig.AudioEncoding.LINEAR16,

        sample_rate_hertz=8000,

        language_code="en-US",

    )


    response = client.recognize(config=config, audio=audio)


    # Print the transcription

    for result in response.results:

        print(f"Transcript: {result.alternatives[0].transcript}")


    return response


# Execute the transcription function

if __name__ == "__main__":

    parser = argparse.ArgumentParser(description="Transcribe audio file using Google Cloud Speech-to-Text API")

    parser.add_argument("speech_file", help="Path to the audio file to transcribe")

    parser.add_argument("--credentials_file", default="/home/ambiorixg12/mycodes/google_speech/voice.json", help="Path to the JSON file containing Google Cloud credentials")

    args = parser.parse_args()


    transcribe_file(args.speech_file, args.credentials_file)


No hay comentarios:

Publicar un comentario