1919
2020Usage Examples:
2121
22- python analyze.py labels gs://cloud-ml-sandbox /video/chicago.mp4
22+ python analyze.py labels gs://cloud-samples-data /video/chicago.mp4
2323 python analyze.py labels_file resources/cat.mp4
24- python analyze.py shots gs://demomaker/gbikes_dinosaur.mp4
25- python analyze.py explicit_content gs://demomaker/gbikes_dinosaur.mp4
26-
24+ python analyze.py shots gs://cloud-samples-data/video/gbikes_dinosaur.mp4
25+ python analyze.py explicit_content \
26+ gs://cloud-samples-data/video/gbikes_dinosaur.mp4
27+ python analyze.py text_gcs \
28+ gs://cloud-samples-data/video/googlework_short.mp4
29+ python analyze.py text_file resources/googlework_short.mp4
30+ python analyze.py objects_gcs gs://cloud-samples-data/video/cat.mp4
31+ python analyze.py objects_file resources/cat.mp4
2732"""
2833
2934import argparse
@@ -278,27 +283,232 @@ def speech_transcription(path):
278283 # [END video_speech_transcription_gcs]
279284
280285
286+ def video_detect_text_gcs (input_uri ):
287+ # [START video_detect_text_gcs]
288+ """Detect text in a video stored on GCS."""
289+ from google .cloud import videointelligence
290+
291+ video_client = videointelligence .VideoIntelligenceServiceClient ()
292+ features = [videointelligence .enums .Feature .TEXT_DETECTION ]
293+
294+ operation = video_client .annotate_video (
295+ input_uri = input_uri ,
296+ features = features )
297+
298+ print ('\n Processing video for text detection.' )
299+ result = operation .result (timeout = 300 )
300+
301+ # The first result is retrieved because a single video was processed.
302+ annotation_result = result .annotation_results [0 ]
303+
304+ for text_annotation in annotation_result .text_annotations :
305+ print ('\n Text: {}' .format (text_annotation .text ))
306+
307+ # Get the first text segment
308+ text_segment = text_annotation .segments [0 ]
309+ start_time = text_segment .segment .start_time_offset
310+ end_time = text_segment .segment .end_time_offset
311+ print ('start_time: {}, end_time: {}' .format (
312+ start_time .seconds + start_time .nanos * 1e-9 ,
313+ end_time .seconds + end_time .nanos * 1e-9 ))
314+
315+ print ('Confidence: {}' .format (text_segment .confidence ))
316+
317+ # Show the result for the first frame in this segment.
318+ frame = text_segment .frames [0 ]
319+ time_offset = frame .time_offset
320+ print ('Time offset for the first frame: {}' .format (
321+ time_offset .seconds + time_offset .nanos * 1e-9 ))
322+ print ('Rotated Bounding Box Vertices:' )
323+ for vertex in frame .rotated_bounding_box .vertices :
324+ print ('\t Vertex.x: {}, Vertex.y: {}' .format (vertex .x , vertex .y ))
325+ # [END video_detect_text_gcs]
326+
327+
328+ def video_detect_text (path ):
329+ # [START video_detect_text]
330+ """Detect text in a local video."""
331+ from google .cloud import videointelligence
332+
333+ video_client = videointelligence .VideoIntelligenceServiceClient ()
334+ features = [videointelligence .enums .Feature .TEXT_DETECTION ]
335+ video_context = videointelligence .types .VideoContext ()
336+
337+ with io .open (path , 'rb' ) as file :
338+ input_content = file .read ()
339+
340+ operation = video_client .annotate_video (
341+ input_content = input_content , # the bytes of the video file
342+ features = features ,
343+ video_context = video_context )
344+
345+ print ('\n Processing video for text detection.' )
346+ result = operation .result (timeout = 300 )
347+
348+ # The first result is retrieved because a single video was processed.
349+ annotation_result = result .annotation_results [0 ]
350+
351+ for text_annotation in annotation_result .text_annotations :
352+ print ('\n Text: {}' .format (text_annotation .text ))
353+
354+ # Get the first text segment
355+ text_segment = text_annotation .segments [0 ]
356+ start_time = text_segment .segment .start_time_offset
357+ end_time = text_segment .segment .end_time_offset
358+ print ('start_time: {}, end_time: {}' .format (
359+ start_time .seconds + start_time .nanos * 1e-9 ,
360+ end_time .seconds + end_time .nanos * 1e-9 ))
361+
362+ print ('Confidence: {}' .format (text_segment .confidence ))
363+
364+ # Show the result for the first frame in this segment.
365+ frame = text_segment .frames [0 ]
366+ time_offset = frame .time_offset
367+ print ('Time offset for the first frame: {}' .format (
368+ time_offset .seconds + time_offset .nanos * 1e-9 ))
369+ print ('Rotated Bounding Box Vertices:' )
370+ for vertex in frame .rotated_bounding_box .vertices :
371+ print ('\t Vertex.x: {}, Vertex.y: {}' .format (vertex .x , vertex .y ))
372+ # [END video_detect_text]
373+
374+
375+ def track_objects_gcs (gcs_uri ):
376+ # [START video_object_tracking_gcs]
377+ """Object tracking in a video stored on GCS."""
378+ from google .cloud import videointelligence
379+
380+ video_client = videointelligence .VideoIntelligenceServiceClient ()
381+ features = [videointelligence .enums .Feature .OBJECT_TRACKING ]
382+ operation = video_client .annotate_video (
383+ input_uri = gcs_uri , features = features )
384+ print ('\n Processing video for object annotations.' )
385+
386+ result = operation .result (timeout = 300 )
387+ print ('\n Finished processing.\n ' )
388+
389+ # The first result is retrieved because a single video was processed.
390+ object_annotations = result .annotation_results [0 ].object_annotations
391+
392+ for object_annotation in object_annotations :
393+ print ('Entity description: {}' .format (
394+ object_annotation .entity .description ))
395+ if object_annotation .entity .entity_id :
396+ print ('Entity id: {}' .format (object_annotation .entity .entity_id ))
397+
398+ print ('Segment: {}s to {}s' .format (
399+ object_annotation .segment .start_time_offset .seconds +
400+ object_annotation .segment .start_time_offset .nanos / 1e9 ,
401+ object_annotation .segment .end_time_offset .seconds +
402+ object_annotation .segment .end_time_offset .nanos / 1e9 ))
403+
404+ print ('Confidence: {}' .format (object_annotation .confidence ))
405+
406+ # Here we print only the bounding box of the first frame in the segment
407+ frame = object_annotation .frames [0 ]
408+ box = frame .normalized_bounding_box
409+ print ('Time offset of the first frame: {}s' .format (
410+ frame .time_offset .seconds + frame .time_offset .nanos / 1e9 ))
411+ print ('Bounding box position:' )
412+ print ('\t left : {}' .format (box .left ))
413+ print ('\t top : {}' .format (box .top ))
414+ print ('\t right : {}' .format (box .right ))
415+ print ('\t bottom: {}' .format (box .bottom ))
416+ print ('\n ' )
417+ # [END video_object_tracking_gcs]
418+
419+
420+ def track_objects (path ):
421+ # [START video_object_tracking]
422+ """Object tracking in a local video."""
423+ from google .cloud import videointelligence
424+
425+ video_client = videointelligence .VideoIntelligenceServiceClient ()
426+ features = [videointelligence .enums .Feature .OBJECT_TRACKING ]
427+
428+ with io .open (path , 'rb' ) as file :
429+ input_content = file .read ()
430+
431+ operation = video_client .annotate_video (
432+ input_content = input_content , features = features )
433+ print ('\n Processing video for object annotations.' )
434+
435+ result = operation .result (timeout = 300 )
436+ print ('\n Finished processing.\n ' )
437+
438+ # The first result is retrieved because a single video was processed.
439+ object_annotations = result .annotation_results [0 ].object_annotations
440+
441+ # Get only the first annotation for demo purposes.
442+ object_annotation = object_annotations [0 ]
443+ print ('Entity description: {}' .format (
444+ object_annotation .entity .description ))
445+ if object_annotation .entity .entity_id :
446+ print ('Entity id: {}' .format (object_annotation .entity .entity_id ))
447+
448+ print ('Segment: {}s to {}s' .format (
449+ object_annotation .segment .start_time_offset .seconds +
450+ object_annotation .segment .start_time_offset .nanos / 1e9 ,
451+ object_annotation .segment .end_time_offset .seconds +
452+ object_annotation .segment .end_time_offset .nanos / 1e9 ))
453+
454+ print ('Confidence: {}' .format (object_annotation .confidence ))
455+
456+ # Here we print only the bounding box of the first frame in this segment
457+ frame = object_annotation .frames [0 ]
458+ box = frame .normalized_bounding_box
459+ print ('Time offset of the first frame: {}s' .format (
460+ frame .time_offset .seconds + frame .time_offset .nanos / 1e9 ))
461+ print ('Bounding box position:' )
462+ print ('\t left : {}' .format (box .left ))
463+ print ('\t top : {}' .format (box .top ))
464+ print ('\t right : {}' .format (box .right ))
465+ print ('\t bottom: {}' .format (box .bottom ))
466+ print ('\n ' )
467+ # [END video_object_tracking]
468+
469+
281470if __name__ == '__main__' :
282471 parser = argparse .ArgumentParser (
283472 description = __doc__ ,
284473 formatter_class = argparse .RawDescriptionHelpFormatter )
285474 subparsers = parser .add_subparsers (dest = 'command' )
475+
286476 analyze_labels_parser = subparsers .add_parser (
287477 'labels' , help = analyze_labels .__doc__ )
288478 analyze_labels_parser .add_argument ('path' )
479+
289480 analyze_labels_file_parser = subparsers .add_parser (
290481 'labels_file' , help = analyze_labels_file .__doc__ )
291482 analyze_labels_file_parser .add_argument ('path' )
483+
292484 analyze_explicit_content_parser = subparsers .add_parser (
293485 'explicit_content' , help = analyze_explicit_content .__doc__ )
294486 analyze_explicit_content_parser .add_argument ('path' )
487+
295488 analyze_shots_parser = subparsers .add_parser (
296489 'shots' , help = analyze_shots .__doc__ )
297490 analyze_shots_parser .add_argument ('path' )
491+
298492 transcribe_speech_parser = subparsers .add_parser (
299493 'transcribe' , help = speech_transcription .__doc__ )
300494 transcribe_speech_parser .add_argument ('path' )
301495
496+ detect_text_parser = subparsers .add_parser (
497+ 'text_gcs' , help = video_detect_text_gcs .__doc__ )
498+ detect_text_parser .add_argument ('path' )
499+
500+ detect_text_file_parser = subparsers .add_parser (
501+ 'text_file' , help = video_detect_text .__doc__ )
502+ detect_text_file_parser .add_argument ('path' )
503+
504+ tack_objects_parser = subparsers .add_parser (
505+ 'objects_gcs' , help = track_objects_gcs .__doc__ )
506+ tack_objects_parser .add_argument ('path' )
507+
508+ tack_objects_file_parser = subparsers .add_parser (
509+ 'objects_file' , help = track_objects .__doc__ )
510+ tack_objects_file_parser .add_argument ('path' )
511+
302512 args = parser .parse_args ()
303513
304514 if args .command == 'labels' :
@@ -311,3 +521,11 @@ def speech_transcription(path):
311521 analyze_explicit_content (args .path )
312522 if args .command == 'transcribe' :
313523 speech_transcription (args .path )
524+ if args .command == 'text_gcs' :
525+ video_detect_text_gcs (args .path )
526+ if args .command == 'text_file' :
527+ video_detect_text (args .path )
528+ if args .command == 'objects_gcs' :
529+ track_objects_gcs (args .path )
530+ if args .command == 'objects_file' :
531+ track_objects (args .path )
0 commit comments