viernes, 7 de junio de 2024

google speech recog from uri for audio longer than 1 minute

 from google.cloud import speech

from google.oauth2 import service_account  # Import for service account credentials



def run_quickstart(audio_uri: str) -> speech.RecognizeResponse:

    """Transcribes audio from a Google Cloud Storage URI using the Speech-to-Text API.


    Args:

        audio_uri (str): The URI of the audio file in Google Cloud Storage.


    Returns:

        speech.RecognizeResponse: The response object containing the transcription results.


    Raises:

        RuntimeError: If an error occurs during transcription or credential setup.

    """


    # Explicit credential handling

    credential_path = "/home/ambiorixg12/mycodes/google_speech/voice.json"  # Replace with your actual path

    try:

        credentials = service_account.Credentials.from_service_account_file(credential_path)

        print(f"Using Explicit Credentials from {credential_path}")

    except Exception as e:

        print(f"Error loading explicit credentials: {e}")

        raise RuntimeError("Failed to load explicit credentials")


    # Instantiates a client (use credentials)

    client = speech.SpeechClient(credentials=credentials)


    try:

        # The name of the audio file to transcribe

        audio = speech.RecognitionAudio(uri=audio_uri)


        config = speech.RecognitionConfig(

            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,

            sample_rate_hertz=16000,

            language_code="en-US",

        )


        # Detects speech in the audio file

        response = client.recognize(config=config, audio=audio)


        for result in response.results:

            print(f"Transcript: {result.alternatives[0].transcript}")


        return response


    except Exception as e:

        print(f"Transcription error: {e}")

        raise RuntimeError("Error occurred during transcription")



if __name__ == "__main__":

    audio_file_path = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"

    run_quickstart(audio_file_path)



https://cloud.google.com/speech-to-text/docs/async-recognize#:~:text=Attempting%20to%20transcribe%20local%20audio,the%20operation%20using%20the%20google.

https://codelabs.developers.google.com/codelabs/cloud-speech-text-python3#3

No hay comentarios:

Publicar un comentario