Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions video/cloud-client/analyze/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,55 @@ def analyze_shots(path):
# [END video_analyze_shots]


def speech_transcription(path):
# [START video_speech_transcription]
"""Transcribe speech from a video stored on GCS."""
from google.cloud import videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION]

config = videointelligence.types.SpeechTranscriptionConfig(
language_code='en-US',
enable_automatic_punctuation=True)
video_context = videointelligence.types.VideoContext(
speech_transcription_config=config)

operation = video_client.annotate_video(
path, features=features,
video_context=video_context)

print('\nProcessing video for speech transcription.')

result = operation.result(timeout=600)

# There is only one annotation_result since only
# one video is processed.
annotation_results = result.annotation_results[0]
for speech_transcription in annotation_results.speech_transcriptions:

# The number of alternatives for each transcription is limited by
# SpeechTranscriptionConfig.max_alternatives.
# Each alternative is a different possible transcription
# and has its own confidence score.
for alternative in speech_transcription.alternatives:
print('Alternative level information:')

print('Transcript: {}'.format(alternative.transcript))
print('Confidence: {}\n'.format(alternative.confidence))

print('Word level information:')
for word_info in alternative.words:
word = word_info.word
start_time = word_info.start_time
end_time = word_info.end_time
print('\t{}s - {}s: {}'.format(
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9,
word))
# [END video_speech_transcription]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
Expand All @@ -246,6 +295,9 @@ def analyze_shots(path):
analyze_shots_parser = subparsers.add_parser(
'shots', help=analyze_shots.__doc__)
analyze_shots_parser.add_argument('path')
transcribe_speech_parser = subparsers.add_parser(
'transcribe', help=speech_transcription.__doc__)
transcribe_speech_parser.add_argument('path')

args = parser.parse_args()

Expand All @@ -257,3 +309,5 @@ def analyze_shots(path):
analyze_shots(args.path)
if args.command == 'explicit_content':
analyze_explicit_content(args.path)
if args.command == 'transcribe':
speech_transcription(args.path)
8 changes: 8 additions & 0 deletions video/cloud-client/analyze/analyze_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,11 @@ def test_analyze_explicit_content(capsys):
analyze.analyze_explicit_content('gs://demomaker/cat.mp4')
out, _ = capsys.readouterr()
assert 'pornography' in out


@pytest.mark.slow
def test_speech_transcription(capsys):
analyze.speech_transcription(
'gs://python-docs-samples-tests/video/googlework_short.mp4')
out, _ = capsys.readouterr()
assert 'cultural' in out