diff --git a/video/cloud-client/analyze/README.rst b/video/cloud-client/analyze/README.rst index a2dad1c3292..82b8cee3642 100644 --- a/video/cloud-client/analyze/README.rst +++ b/video/cloud-client/analyze/README.rst @@ -100,6 +100,46 @@ To run this sample: +beta samples ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=video/cloud-client/analyze/beta_snippets.py,video/cloud-client/analyze/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python beta_snippets.py + + usage: beta_snippets.py [-h] + {transcription,video-text-gcs,video-text,track-objects-gcs,track-objects} + ... + + This application demonstrates speech transcription using the + Google Cloud API. + + Usage Examples: + python beta_snippets.py transcription gs://python-docs-samples-tests/video/googlework_short.mp4 + python beta_snippets.py video-text-gcs gs://python-docs-samples-tests/video/googlework_short.mp4 + python beta_snippets.py track-objects /resources/cat.mp4 + + positional arguments: + {transcription,video-text-gcs,video-text,track-objects-gcs,track-objects} + transcription Transcribe speech from a video stored on GCS. + video-text-gcs Detect text in a video stored on GCS. + video-text Detect text in a local video. + track-objects-gcs Object Tracking. + track-objects Object Tracking. + + optional arguments: + -h, --help show this help message and exit + + + The client library diff --git a/video/cloud-client/analyze/README.rst.in b/video/cloud-client/analyze/README.rst.in index 5dc673c1fe1..a01d163f930 100644 --- a/video/cloud-client/analyze/README.rst.in +++ b/video/cloud-client/analyze/README.rst.in @@ -16,6 +16,9 @@ samples: - name: analyze file: analyze.py show_help: True +- name: beta samples + file: beta_snippets.py + show_help: True cloud_client_library: true diff --git a/video/cloud-client/analyze/beta_snippets.py b/video/cloud-client/analyze/beta_snippets.py index 1e9ab2d7329..d50f41d5549 100644 --- a/video/cloud-client/analyze/beta_snippets.py +++ b/video/cloud-client/analyze/beta_snippets.py @@ -18,18 +18,22 @@ Google Cloud API. Usage Examples: - python beta_snippets.py \ - transcription gs://python-docs-samples-tests/video/googlework_short.mp4 + python beta_snippets.py transcription \ + gs://python-docs-samples-tests/video/googlework_short.mp4 + python beta_snippets.py video-text-gcs \ + gs://python-docs-samples-tests/video/googlework_short.mp4 + python beta_snippets.py track-objects /resources/cat.mp4 """ import argparse +import io -from google.cloud import videointelligence_v1p1beta1 as videointelligence - -# [START video_speech_transcription_gcs_beta] def speech_transcription(input_uri): + # [START video_speech_transcription_gcs_beta] """Transcribe speech from a video stored on GCS.""" + from google.cloud import videointelligence_v1p1beta1 as videointelligence + video_client = videointelligence.VideoIntelligenceServiceClient() features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION] @@ -66,7 +70,202 @@ def speech_transcription(input_uri): start_time.seconds + start_time.nanos * 1e-9, end_time.seconds + end_time.nanos * 1e-9, word)) -# [END video_speech_transcription_gcs_beta] + # [END video_speech_transcription_gcs_beta] + + +def video_detect_text_gcs(input_uri): + # [START video_detect_text_gcs_beta] + """Detect text in a video stored on GCS.""" + from google.cloud import videointelligence_v1p2beta1 as videointelligence + + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.enums.Feature.TEXT_DETECTION] + + operation = video_client.annotate_video( + input_uri=input_uri, + features=features) + + print('\nProcessing video for text detection.') + result = operation.result(timeout=300) + + # The first result is retrieved because a single video was processed. + annotation_result = result.annotation_results[0] + + # Get only the first result + text_annotation = annotation_result.text_annotations[0] + print('\nText: {}'.format(text_annotation.text)) + + # Get the first text segment + text_segment = text_annotation.segments[0] + start_time = text_segment.segment.start_time_offset + end_time = text_segment.segment.end_time_offset + print('start_time: {}, end_time: {}'.format( + start_time.seconds + start_time.nanos * 1e-9, + end_time.seconds + end_time.nanos * 1e-9)) + + print('Confidence: {}'.format(text_segment.confidence)) + + # Show the result for the first frame in this segment. + frame = text_segment.frames[0] + time_offset = frame.time_offset + print('Time offset for the first frame: {}'.format( + time_offset.seconds + time_offset.nanos * 1e-9)) + print('Rotated Bounding Box Vertices:') + for vertex in frame.rotated_bounding_box.vertices: + print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y)) + # [END video_detect_text_gcs_beta] + return annotation_result.text_annotations + + +def video_detect_text(path): + # [START video_detect_text_beta] + """Detect text in a local video.""" + from google.cloud import videointelligence_v1p2beta1 as videointelligence + + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.enums.Feature.TEXT_DETECTION] + video_context = videointelligence.types.VideoContext() + + with io.open(path, 'rb') as file: + input_content = file.read() + + operation = video_client.annotate_video( + input_content=input_content, # the bytes of the video file + features=features, + video_context=video_context) + + print('\nProcessing video for text detection.') + result = operation.result(timeout=300) + + # The first result is retrieved because a single video was processed. + annotation_result = result.annotation_results[0] + + # Get only the first result + text_annotation = annotation_result.text_annotations[0] + print('\nText: {}'.format(text_annotation.text)) + + # Get the first text segment + text_segment = text_annotation.segments[0] + start_time = text_segment.segment.start_time_offset + end_time = text_segment.segment.end_time_offset + print('start_time: {}, end_time: {}'.format( + start_time.seconds + start_time.nanos * 1e-9, + end_time.seconds + end_time.nanos * 1e-9)) + + print('Confidence: {}'.format(text_segment.confidence)) + + # Show the result for the first frame in this segment. + frame = text_segment.frames[0] + time_offset = frame.time_offset + print('Time offset for the first frame: {}'.format( + time_offset.seconds + time_offset.nanos * 1e-9)) + print('Rotated Bounding Box Vertices:') + for vertex in frame.rotated_bounding_box.vertices: + print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y)) + # [END video_detect_text_beta] + return annotation_result.text_annotations + + +def track_objects_gcs(gcs_uri): + # [START video_object_tracking_gcs_beta] + """Object Tracking.""" + from google.cloud import videointelligence_v1p2beta1 as videointelligence + + # It is recommended to use location_id as 'us-east1' for the best latency + # due to different types of processors used in this region and others. + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.enums.Feature.OBJECT_TRACKING] + operation = video_client.annotate_video( + input_uri=gcs_uri, features=features, location_id='us-east1') + print('\nProcessing video for object annotations.') + + result = operation.result(timeout=300) + print('\nFinished processing.\n') + + # The first result is retrieved because a single video was processed. + object_annotations = result.annotation_results[0].object_annotations + + # Get only the first annotation for demo purposes. + object_annotation = object_annotations[0] + print('Entity description: {}'.format( + object_annotation.entity.description)) + if object_annotation.entity.entity_id: + print('Entity id: {}'.format(object_annotation.entity.entity_id)) + + print('Segment: {}s to {}s'.format( + object_annotation.segment.start_time_offset.seconds + + object_annotation.segment.start_time_offset.nanos / 1e9, + object_annotation.segment.end_time_offset.seconds + + object_annotation.segment.end_time_offset.nanos / 1e9)) + + print('Confidence: {}'.format(object_annotation.confidence)) + + # Here we print only the bounding box of the first frame in this segment + frame = object_annotation.frames[0] + box = frame.normalized_bounding_box + print('Time offset of the first frame: {}s'.format( + frame.time_offset.seconds + frame.time_offset.nanos / 1e9)) + print('Bounding box position:') + print('\tleft : {}'.format(box.left)) + print('\ttop : {}'.format(box.top)) + print('\tright : {}'.format(box.right)) + print('\tbottom: {}'.format(box.bottom)) + print('\n') + # [END video_object_tracking_gcs_beta] + return object_annotations + + +def track_objects(path): + # [START video_object_tracking_beta] + """Object Tracking.""" + from google.cloud import videointelligence_v1p2beta1 as videointelligence + + video_client = videointelligence.VideoIntelligenceServiceClient() + features = [videointelligence.enums.Feature.OBJECT_TRACKING] + + with io.open(path, 'rb') as file: + input_content = file.read() + + # It is recommended to use location_id as 'us-east1' for the best latency + # due to different types of processors used in this region and others. + operation = video_client.annotate_video( + input_content=input_content, features=features, location_id='us-east1') + print('\nProcessing video for object annotations.') + + result = operation.result(timeout=300) + print('\nFinished processing.\n') + + # The first result is retrieved because a single video was processed. + object_annotations = result.annotation_results[0].object_annotations + + # Get only the first annotation for demo purposes. + object_annotation = object_annotations[0] + print('Entity description: {}'.format( + object_annotation.entity.description)) + if object_annotation.entity.entity_id: + print('Entity id: {}'.format(object_annotation.entity.entity_id)) + + print('Segment: {}s to {}s'.format( + object_annotation.segment.start_time_offset.seconds + + object_annotation.segment.start_time_offset.nanos / 1e9, + object_annotation.segment.end_time_offset.seconds + + object_annotation.segment.end_time_offset.nanos / 1e9)) + + print('Confidence: {}'.format(object_annotation.confidence)) + + # Here we print only the bounding box of the first frame in this segment + frame = object_annotation.frames[0] + box = frame.normalized_bounding_box + print('Time offset of the first frame: {}s'.format( + frame.time_offset.seconds + frame.time_offset.nanos / 1e9)) + print('Bounding box position:') + print('\tleft : {}'.format(box.left)) + print('\ttop : {}'.format(box.top)) + print('\tright : {}'.format(box.right)) + print('\tbottom: {}'.format(box.bottom)) + print('\n') + # [END video_object_tracking_beta] + return object_annotations if __name__ == '__main__': @@ -79,7 +278,31 @@ def speech_transcription(input_uri): 'transcription', help=speech_transcription.__doc__) speech_transcription_parser.add_argument('gcs_uri') + video_text_gcs_parser = subparsers.add_parser( + 'video-text-gcs', help=video_detect_text_gcs.__doc__) + video_text_gcs_parser.add_argument('gcs_uri') + + video_text_parser = subparsers.add_parser( + 'video-text', help=video_detect_text.__doc__) + video_text_parser.add_argument('path') + + video_object_tracking_gcs_parser = subparsers.add_parser( + 'track-objects-gcs', help=track_objects_gcs.__doc__) + video_object_tracking_gcs_parser.add_argument('gcs_uri') + + video_object_tracking_parser = subparsers.add_parser( + 'track-objects', help=track_objects.__doc__) + video_object_tracking_parser.add_argument('path') + args = parser.parse_args() if args.command == 'transcription': speech_transcription(args.gcs_uri) + elif args.command == 'video-text-gcs': + video_detect_text_gcs(args.gcs_uri) + elif args.command == 'video-text': + video_detect_text(args.path) + elif args.command == 'track-objects-gcs': + track_objects_gcs(args.gcs_uri) + elif args.command == 'track-objects': + track_objects(args.path) diff --git a/video/cloud-client/analyze/beta_snippets_test.py b/video/cloud-client/analyze/beta_snippets_test.py index 3583edcd6fd..7dcec8e6481 100644 --- a/video/cloud-client/analyze/beta_snippets_test.py +++ b/video/cloud-client/analyze/beta_snippets_test.py @@ -18,6 +18,9 @@ import beta_snippets +POSSIBLE_TEXTS = ['Google', 'SUR', 'SUR', 'ROTO', 'Vice President', '58oo9', + 'LONDRES', 'OMAR', 'PARIS', 'METRO', 'RUE', 'CARLO'] + @pytest.mark.slow def test_speech_transcription(capsys): @@ -25,3 +28,57 @@ def test_speech_transcription(capsys): 'gs://python-docs-samples-tests/video/googlework_short.mp4') out, _ = capsys.readouterr() assert 'cultural' in out + + +@pytest.mark.slow +def test_detect_text(): + in_file = './resources/googlework_short.mp4' + text_annotations = beta_snippets.video_detect_text(in_file) + + text_exists = False + for text_annotation in text_annotations: + for possible_text in POSSIBLE_TEXTS: + if possible_text.upper() in text_annotation.text.upper(): + text_exists = True + assert text_exists + + +@pytest.mark.slow +def test_detect_text_gcs(): + in_file = 'gs://python-docs-samples-tests/video/googlework_short.mp4' + text_annotations = beta_snippets.video_detect_text_gcs(in_file) + + text_exists = False + for text_annotation in text_annotations: + for possible_text in POSSIBLE_TEXTS: + if possible_text.upper() in text_annotation.text.upper(): + text_exists = True + assert text_exists + + +@pytest.mark.slow +def test_track_objects(): + in_file = './resources/cat.mp4' + object_annotations = beta_snippets.track_objects(in_file) + + text_exists = False + for object_annotation in object_annotations: + if 'CAT' in object_annotation.entity.description.upper(): + text_exists = True + assert text_exists + assert object_annotations[0].frames[0].normalized_bounding_box.left >= 0.0 + assert object_annotations[0].frames[0].normalized_bounding_box.left <= 1.0 + + +@pytest.mark.slow +def test_track_objects_gcs(): + in_file = 'gs://demomaker/cat.mp4' + object_annotations = beta_snippets.track_objects_gcs(in_file) + + text_exists = False + for object_annotation in object_annotations: + if 'CAT' in object_annotation.entity.description.upper(): + text_exists = True + assert text_exists + assert object_annotations[0].frames[0].normalized_bounding_box.left >= 0.0 + assert object_annotations[0].frames[0].normalized_bounding_box.left <= 1.0 diff --git a/video/cloud-client/analyze/requirements.txt b/video/cloud-client/analyze/requirements.txt index 3e462bd856a..d3cf0047ed8 100644 --- a/video/cloud-client/analyze/requirements.txt +++ b/video/cloud-client/analyze/requirements.txt @@ -1 +1 @@ -google-cloud-videointelligence==1.3.0 +google-cloud-videointelligence==1.5.0 diff --git a/video/cloud-client/analyze/resources/cat.mp4 b/video/cloud-client/analyze/resources/cat.mp4 new file mode 100644 index 00000000000..0e071b9ec67 Binary files /dev/null and b/video/cloud-client/analyze/resources/cat.mp4 differ diff --git a/video/cloud-client/analyze/resources/googlework_short.mp4 b/video/cloud-client/analyze/resources/googlework_short.mp4 new file mode 100644 index 00000000000..be0f40f8ad6 Binary files /dev/null and b/video/cloud-client/analyze/resources/googlework_short.mp4 differ