@@ -229,6 +229,55 @@ def analyze_shots(path):
229229 # [END video_analyze_shots]
230230
231231
232+ def speech_transcription (path ):
233+ # [START video_speech_transcription]
234+ """Transcribe speech from a video stored on GCS."""
235+ from google .cloud import videointelligence
236+
237+ video_client = videointelligence .VideoIntelligenceServiceClient ()
238+ features = [videointelligence .enums .Feature .SPEECH_TRANSCRIPTION ]
239+
240+ config = videointelligence .types .SpeechTranscriptionConfig (
241+ language_code = 'en-US' ,
242+ enable_automatic_punctuation = True )
243+ video_context = videointelligence .types .VideoContext (
244+ speech_transcription_config = config )
245+
246+ operation = video_client .annotate_video (
247+ path , features = features ,
248+ video_context = video_context )
249+
250+ print ('\n Processing video for speech transcription.' )
251+
252+ result = operation .result (timeout = 600 )
253+
254+ # There is only one annotation_result since only
255+ # one video is processed.
256+ annotation_results = result .annotation_results [0 ]
257+ for speech_transcription in annotation_results .speech_transcriptions :
258+
259+ # The number of alternatives for each transcription is limited by
260+ # SpeechTranscriptionConfig.max_alternatives.
261+ # Each alternative is a different possible transcription
262+ # and has its own confidence score.
263+ for alternative in speech_transcription .alternatives :
264+ print ('Alternative level information:' )
265+
266+ print ('Transcript: {}' .format (alternative .transcript ))
267+ print ('Confidence: {}\n ' .format (alternative .confidence ))
268+
269+ print ('Word level information:' )
270+ for word_info in alternative .words :
271+ word = word_info .word
272+ start_time = word_info .start_time
273+ end_time = word_info .end_time
274+ print ('\t {}s - {}s: {}' .format (
275+ start_time .seconds + start_time .nanos * 1e-9 ,
276+ end_time .seconds + end_time .nanos * 1e-9 ,
277+ word ))
278+ # [END video_speech_transcription]
279+
280+
232281if __name__ == '__main__' :
233282 parser = argparse .ArgumentParser (
234283 description = __doc__ ,
@@ -246,6 +295,9 @@ def analyze_shots(path):
246295 analyze_shots_parser = subparsers .add_parser (
247296 'shots' , help = analyze_shots .__doc__ )
248297 analyze_shots_parser .add_argument ('path' )
298+ transcribe_speech_parser = subparsers .add_parser (
299+ 'transcribe' , help = speech_transcription .__doc__ )
300+ transcribe_speech_parser .add_argument ('path' )
249301
250302 args = parser .parse_args ()
251303
@@ -257,3 +309,5 @@ def analyze_shots(path):
257309 analyze_shots (args .path )
258310 if args .command == 'explicit_content' :
259311 analyze_explicit_content (args .path )
312+ if args .command == 'transcribe' :
313+ speech_transcription (args .path )
0 commit comments