@@ -50,7 +50,7 @@ public static void main(String... args) throws Exception {
5050 System .out .printf (
5151 "\t java %s \" <command>\" \" <path-to-image>\" \n "
5252 + "Commands:\n "
53- + "\t syncrecognize | asyncrecognize | streamrecognize\n "
53+ + "\t syncrecognize | asyncrecognize | streamrecognize | wordoffsets \n "
5454 + "Path:\n \t A file path (ex: ./resources/audio.raw) or a URI "
5555 + "for a Cloud Storage resource (gs://...)\n " ,
5656 Recognize .class .getCanonicalName ());
@@ -66,6 +66,12 @@ public static void main(String... args) throws Exception {
6666 } else {
6767 syncRecognizeFile (path );
6868 }
69+ } else if (command .equals ("wordoffsets" )) {
70+ if (path .startsWith ("gs://" )) {
71+ asyncRecognizeWords (path );
72+ } else {
73+ syncRecognizeWords (path );
74+ }
6975 } else if (command .equals ("asyncrecognize" )) {
7076 if (path .startsWith ("gs://" )) {
7177 asyncRecognizeGcs (path );
@@ -113,6 +119,51 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept
113119 speech .close ();
114120 }
115121
122+ /**
123+ * Performs sync recognize and prints word time offsets.
124+ *
125+ * @param fileName the path to a PCM audio file to transcribe get offsets on.
126+ */
127+ public static void syncRecognizeWords (String fileName ) throws Exception , IOException {
128+ SpeechClient speech = SpeechClient .create ();
129+
130+ Path path = Paths .get (fileName );
131+ byte [] data = Files .readAllBytes (path );
132+ ByteString audioBytes = ByteString .copyFrom (data );
133+
134+ // Configure request with local raw PCM audio
135+ RecognitionConfig config = RecognitionConfig .newBuilder ()
136+ .setEncoding (AudioEncoding .LINEAR16 )
137+ .setLanguageCode ("en-US" )
138+ .setSampleRateHertz (16000 )
139+ .setEnableWordTimeOffsets (true )
140+ .build ();
141+ RecognitionAudio audio = RecognitionAudio .newBuilder ()
142+ .setContent (audioBytes )
143+ .build ();
144+
145+ // Use blocking call to get audio transcript
146+ RecognizeResponse response = speech .recognize (config , audio );
147+ List <SpeechRecognitionResult > results = response .getResultsList ();
148+
149+ for (SpeechRecognitionResult result : results ) {
150+ List <SpeechRecognitionAlternative > alternatives = result .getAlternativesList ();
151+ for (SpeechRecognitionAlternative alternative : alternatives ) {
152+ System .out .printf ("Transcription: %s%n" , alternative .getTranscript ());
153+ for (WordInfo wordInfo : alternative .getWordsList ()) {
154+ System .out .println (wordInfo .getWord ());
155+ System .out .printf ("\t %s.%s sec - %s.%s sec\n " ,
156+ wordInfo .getStartTime ().getSeconds (),
157+ wordInfo .getStartTime ().getNanos () / 100000000 ,
158+ wordInfo .getEndTime ().getSeconds (),
159+ wordInfo .getEndTime ().getNanos () / 100000000 );
160+ }
161+ }
162+ }
163+ speech .close ();
164+ }
165+
166+
116167 /**
117168 * Performs speech recognition on remote FLAC file and prints the transcription.
118169 *
@@ -193,11 +244,11 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
193244
194245 /**
195246 * Performs non-blocking speech recognition on remote FLAC file and prints
196- * the transcription.
247+ * the transcription as well as word time offsets .
197248 *
198249 * @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
199250 */
200- public static void asyncRecognizeGcs (String gcsUri ) throws Exception , IOException {
251+ public static void asyncRecognizeWords (String gcsUri ) throws Exception , IOException {
201252 // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
202253 SpeechClient speech = SpeechClient .create ();
203254
@@ -240,6 +291,47 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
240291 speech .close ();
241292 }
242293
294+ /**
295+ * Performs non-blocking speech recognition on remote FLAC file and prints
296+ * the transcription.
297+ *
298+ * @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
299+ */
300+ public static void asyncRecognizeGcs (String gcsUri ) throws Exception , IOException {
301+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
302+ SpeechClient speech = SpeechClient .create ();
303+
304+ // Configure remote file request for Linear16
305+ RecognitionConfig config = RecognitionConfig .newBuilder ()
306+ .setEncoding (AudioEncoding .FLAC )
307+ .setLanguageCode ("en-US" )
308+ .setSampleRateHertz (16000 )
309+ .build ();
310+ RecognitionAudio audio = RecognitionAudio .newBuilder ()
311+ .setUri (gcsUri )
312+ .build ();
313+
314+ // Use non-blocking call for getting file transcription
315+ OperationFuture <LongRunningRecognizeResponse , LongRunningRecognizeMetadata ,
316+ Operation > response =
317+ speech .longRunningRecognizeAsync (config , audio );
318+ while (!response .isDone ()) {
319+ System .out .println ("Waiting for response..." );
320+ Thread .sleep (10000 );
321+ }
322+
323+ List <SpeechRecognitionResult > results = response .get ().getResultsList ();
324+
325+ for (SpeechRecognitionResult result : results ) {
326+ List <SpeechRecognitionAlternative > alternatives = result .getAlternativesList ();
327+ for (SpeechRecognitionAlternative alternative : alternatives ) {
328+ System .out .printf ("Transcription: %s\n " ,alternative .getTranscript ());
329+ }
330+ }
331+ speech .close ();
332+ }
333+
334+
243335 /**
244336 * Performs streaming speech recognition on raw PCM audio data.
245337 *
0 commit comments