How to process audio into text?

Problem 😱

You want to process audio file into text.

Solution 🤓

We will use gapi which offers ability to transform audio files into text.

import json
import subprocess

from import speech
from import cloud_speech_pb2

speech_client = speech.SpeechClient()

path_to_file = './input.mp4'
output_path_to_file = './out.flac'

# command = 'ffmpeg -i {input_file} -ac 1 -c:a  flac answer_11.flac'['ffmpeg', '-i', '{input_file}'.format(input_file=path_to_file), '-ac', '1', '-c:a', 'flac', output_path_to_file])

def parse_speech_recognition_result(speech_recognition_result):
    data = {'data': []}
    for result in speech_recognition_result.results:
        for alternative in result.alternatives:
                'transcript': alternative.transcript,
                'confidence': alternative.confidence,
    return data

with open(output_path_to_file, 'rb') as recording_file:
    recording_bytes =
    audio = cloud_speech_pb2.RecognitionAudio(content=recording_bytes)
    config = cloud_speech_pb2.RecognitionConfig(encoding="FLAC", sample_rate_hertz=44100, language_code="en-US")
    speech_recognition_result = speech_client.recognize(config=config, audio=audio)
    result = parse_speech_recognition_result(speech_recognition_result)

    dump_result = json.dumps(result)

By @PythonicNinja in
Tags : #python, #speach-to-text, #gapi, #google, #speach, #text,