1616
1717package com .example .vision ;
1818
19+ import com .google .api .gax .longrunning .OperationFuture ;
20+ import com .google .cloud .storage .Blob ;
21+ import com .google .cloud .storage .Bucket ;
22+ import com .google .cloud .storage .Storage ;
23+ import com .google .cloud .storage .Storage .BlobListOption ;
24+ import com .google .cloud .storage .StorageOptions ;
25+ import com .google .cloud .vision .v1 .AnnotateFileResponse ;
26+ import com .google .cloud .vision .v1 .AnnotateFileResponse .Builder ;
1927import com .google .cloud .vision .v1 .AnnotateImageRequest ;
2028import com .google .cloud .vision .v1 .AnnotateImageResponse ;
29+ import com .google .cloud .vision .v1 .AsyncAnnotateFileRequest ;
30+ import com .google .cloud .vision .v1 .AsyncAnnotateFileResponse ;
31+ import com .google .cloud .vision .v1 .AsyncBatchAnnotateFilesResponse ;
2132import com .google .cloud .vision .v1 .BatchAnnotateImagesResponse ;
2233import com .google .cloud .vision .v1 .Block ;
2334import com .google .cloud .vision .v1 .ColorInfo ;
2839import com .google .cloud .vision .v1 .FaceAnnotation ;
2940import com .google .cloud .vision .v1 .Feature ;
3041import com .google .cloud .vision .v1 .Feature .Type ;
42+ import com .google .cloud .vision .v1 .GcsDestination ;
43+ import com .google .cloud .vision .v1 .GcsSource ;
3144import com .google .cloud .vision .v1 .Image ;
3245import com .google .cloud .vision .v1 .ImageAnnotatorClient ;
3346import com .google .cloud .vision .v1 .ImageContext ;
3447import com .google .cloud .vision .v1 .ImageSource ;
48+ import com .google .cloud .vision .v1 .InputConfig ;
3549import com .google .cloud .vision .v1 .LocationInfo ;
50+ import com .google .cloud .vision .v1 .OperationMetadata ;
51+ import com .google .cloud .vision .v1 .OutputConfig ;
3652import com .google .cloud .vision .v1 .Page ;
3753import com .google .cloud .vision .v1 .Paragraph ;
3854import com .google .cloud .vision .v1 .SafeSearchAnnotation ;
4864
4965import com .google .protobuf .ByteString ;
5066
67+ import com .google .protobuf .util .JsonFormat ;
5168import java .io .FileInputStream ;
5269import java .io .IOException ;
5370import java .io .PrintStream ;
5471import java .util .ArrayList ;
5572import java .util .Arrays ;
5673import java .util .List ;
74+ import java .util .concurrent .TimeUnit ;
75+ import java .util .regex .Matcher ;
76+ import java .util .regex .Pattern ;
5777
5878public class Detect {
5979
@@ -78,11 +98,16 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
7898 out .println ("Usage:" );
7999 out .printf (
80100 "\t mvn exec:java -DDetect -Dexec.args=\" <command> <path-to-image>\" \n "
101+ + "\t mvn exec:java -DDetect -Dexec.args=\" ocr <path-to-file> <path-to-destination>\" "
102+ + "\n "
81103 + "Commands:\n "
82104 + "\t faces | labels | landmarks | logos | text | safe-search | properties"
83- + "| web | web-entities | web-entities-include-geo | crop \n "
105+ + "| web | web-entities | web-entities-include-geo | crop | ocr \n "
84106 + "Path:\n \t A file path (ex: ./resources/wakeupcat.jpg) or a URI for a Cloud Storage "
85- + "resource (gs://...)\n " );
107+ + "resource (gs://...)\n "
108+ + "Path to File:\n \t A path to the remote file on Cloud Storage (gs://...)\n "
109+ + "Path to Destination\n \t A path to the remote destination on Cloud Storage for the"
110+ + " file to be saved. (gs://BUCKET_NAME/PREFIX/)\n " );
86111 return ;
87112 }
88113 String command = args [0 ];
@@ -162,6 +187,9 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
162187 } else {
163188 detectDocumentText (path , out );
164189 }
190+ } else if (command .equals ("ocr" )) {
191+ String destPath = args .length > 2 ? args [2 ] : "" ;
192+ detectDocumentsGcs (path , destPath );
165193 }
166194 }
167195
@@ -1277,4 +1305,123 @@ public static void detectDocumentTextGcs(String gcsPath, PrintStream out) throws
12771305 }
12781306 }
12791307 // [END vision_detect_document_uri]
1308+
1309+ // [START vision_async_detect_document_ocr]
1310+ /**
1311+ * Performs document text OCR with PDF/TIFF as source files on Google Cloud Storage.
1312+ *
1313+ * @param gcsSourcePath The path to the remote file on Google Cloud Storage to detect document
1314+ * text on.
1315+ * @param gcsDestinationPath The path to the remote file on Google Cloud Storage to store the
1316+ * results on.
1317+ * @throws Exception on errors while closing the client.
1318+ */
1319+ public static void detectDocumentsGcs (String gcsSourcePath , String gcsDestinationPath ) throws
1320+ Exception {
1321+ try (ImageAnnotatorClient client = ImageAnnotatorClient .create ()) {
1322+ List <AsyncAnnotateFileRequest > requests = new ArrayList <>();
1323+
1324+ // Set the GCS source path for the remote file.
1325+ GcsSource gcsSource = GcsSource .newBuilder ()
1326+ .setUri (gcsSourcePath )
1327+ .build ();
1328+
1329+ // Create the configuration with the specified MIME (Multipurpose Internet Mail Extensions)
1330+ // types
1331+ InputConfig inputConfig = InputConfig .newBuilder ()
1332+ .setMimeType ("application/pdf" ) // Supported MimeTypes: "application/pdf", "image/tiff"
1333+ .setGcsSource (gcsSource )
1334+ .build ();
1335+
1336+ // Set the GCS destination path for where to save the results.
1337+ GcsDestination gcsDestination = GcsDestination .newBuilder ()
1338+ .setUri (gcsDestinationPath )
1339+ .build ();
1340+
1341+ // Create the configuration for the output with the batch size.
1342+ // The batch size sets how many pages should be grouped into each json output file.
1343+ OutputConfig outputConfig = OutputConfig .newBuilder ()
1344+ .setBatchSize (2 )
1345+ .setGcsDestination (gcsDestination )
1346+ .build ();
1347+
1348+ // Select the Feature required by the vision API
1349+ Feature feature = Feature .newBuilder ().setType (Feature .Type .DOCUMENT_TEXT_DETECTION ).build ();
1350+
1351+ // Build the OCR request
1352+ AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest .newBuilder ()
1353+ .addFeatures (feature )
1354+ .setInputConfig (inputConfig )
1355+ .setOutputConfig (outputConfig )
1356+ .build ();
1357+
1358+ requests .add (request );
1359+
1360+ // Perform the OCR request
1361+ OperationFuture <AsyncBatchAnnotateFilesResponse , OperationMetadata > response =
1362+ client .asyncBatchAnnotateFilesAsync (requests );
1363+
1364+ System .out .println ("Waiting for the operation to finish." );
1365+
1366+ // Wait for the request to finish. (The result is not used, since the API saves the result to
1367+ // the specified location on GCS.)
1368+ List <AsyncAnnotateFileResponse > result = response .get (180 , TimeUnit .SECONDS )
1369+ .getResponsesList ();
1370+
1371+ // Once the request has completed and the output has been
1372+ // written to GCS, we can list all the output files.
1373+ Storage storage = StorageOptions .getDefaultInstance ().getService ();
1374+
1375+ // Get the destination location from the gcsDestinationPath
1376+ Pattern pattern = Pattern .compile ("gs://([^/]+)/(.+)" );
1377+ Matcher matcher = pattern .matcher (gcsDestinationPath );
1378+
1379+ if (matcher .find ()) {
1380+ String bucketName = matcher .group (1 );
1381+ String prefix = matcher .group (2 );
1382+
1383+ // Get the list of objects with the given prefix from the GCS bucket
1384+ Bucket bucket = storage .get (bucketName );
1385+ com .google .api .gax .paging .Page <Blob > pageList = bucket .list (BlobListOption .prefix (prefix ));
1386+
1387+ Blob firstOutputFile = null ;
1388+
1389+ // List objects with the given prefix.
1390+ System .out .println ("Output files:" );
1391+ for (Blob blob : pageList .iterateAll ()) {
1392+ System .out .println (blob .getName ());
1393+
1394+ // Process the first output file from GCS.
1395+ // Since we specified batch size = 2, the first response contains
1396+ // the first two pages of the input file.
1397+ if (firstOutputFile == null ) {
1398+ firstOutputFile = blob ;
1399+ }
1400+ }
1401+
1402+ // Get the contents of the file and convert the JSON contents to an AnnotateFileResponse
1403+ // object. If the Blob is small read all its content in one request
1404+ // (Note: the file is a .json file)
1405+ // Storage guide: https://cloud.google.com/storage/docs/downloading-objects
1406+ String jsonContents = new String (firstOutputFile .getContent ());
1407+ Builder builder = AnnotateFileResponse .newBuilder ();
1408+ JsonFormat .parser ().merge (jsonContents , builder );
1409+
1410+ // Build the AnnotateFileResponse object
1411+ AnnotateFileResponse annotateFileResponse = builder .build ();
1412+
1413+ // Parse through the object to get the actual response for the first page of the input file.
1414+ AnnotateImageResponse annotateImageResponse = annotateFileResponse .getResponses (0 );
1415+
1416+ // Here we print the full text from the first page.
1417+ // The response contains more information:
1418+ // annotation/pages/blocks/paragraphs/words/symbols
1419+ // including confidence score and bounding boxes
1420+ System .out .format ("\n Text: %s\n " , annotateImageResponse .getFullTextAnnotation ().getText ());
1421+ } else {
1422+ System .out .println ("No MATCH" );
1423+ }
1424+ }
1425+ }
1426+ // [END vision_async_detect_document_ocr]
12801427}
0 commit comments