Skip to content

Commit a0431a4

Browse files
committed
Additional samples in GA - model-selection
1 parent ad4ef42 commit a0431a4

File tree

2 files changed

+90
-89
lines changed

2 files changed

+90
-89
lines changed

speech/beta/src/main/java/com/example/speech/Recognize.java

Lines changed: 2 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public static void main(String... args) throws Exception {
4747
System.out.printf(
4848
"\tjava %s \"<command>\" \"<path-to-image>\"\n"
4949
+ "Commands:\n"
50-
+ "\t model-selection | metadata | diarization | multi-channel |\n"
50+
+ "\t metadata | diarization | multi-channel |\n"
5151
+ "\t multi-language | word-level-conf\n"
5252
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
5353
+ "for a Cloud Storage resource (gs://...)\n",
@@ -58,13 +58,7 @@ public static void main(String... args) throws Exception {
5858
String path = args.length > 1 ? args[1] : "";
5959

6060
// Use command and GCS path pattern to invoke transcription.
61-
if (command.equals("model-selection")) {
62-
if (path.startsWith("gs://")) {
63-
transcribeModelSelectionGcs(path);
64-
} else {
65-
transcribeModelSelection(path);
66-
}
67-
} else if (command.equals("metadata")) {
61+
if (command.equals("metadata")) {
6862
transcribeFileWithMetadata(path);
6963
} else if (command.equals("diarization")) {
7064
if (path.startsWith("gs://")) {
@@ -93,86 +87,7 @@ public static void main(String... args) throws Exception {
9387
}
9488
}
9589

96-
// [START speech_transcribe_model_selection_beta]
97-
/**
98-
* Performs transcription of the given audio file synchronously with the selected model.
99-
*
100-
* @param fileName the path to a audio file to transcribe
101-
*/
102-
public static void transcribeModelSelection(String fileName) throws Exception {
103-
Path path = Paths.get(fileName);
104-
byte[] content = Files.readAllBytes(path);
105-
106-
try (SpeechClient speech = SpeechClient.create()) {
107-
// Configure request with video media type
108-
RecognitionConfig recConfig =
109-
RecognitionConfig.newBuilder()
110-
// encoding may either be omitted or must match the value in the file header
111-
.setEncoding(AudioEncoding.LINEAR16)
112-
.setLanguageCode("en-US")
113-
// sample rate hertz may be either be omitted or must match the value in the file
114-
// header
115-
.setSampleRateHertz(16000)
116-
.setModel("video")
117-
.build();
118-
119-
RecognitionAudio recognitionAudio =
120-
RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
121-
122-
RecognizeResponse recognizeResponse = speech.recognize(recConfig, recognitionAudio);
123-
// Just print the first result here.
124-
SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);
125-
// There can be several alternative transcripts for a given chunk of speech. Just use the
126-
// first (most likely) one here.
127-
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
128-
System.out.printf("Transcript : %s\n", alternative.getTranscript());
129-
}
130-
// [END speech_transcribe_model_selection_beta]
131-
}
132-
133-
// [START speech_transcribe_model_selection_gcs_beta]
134-
/**
135-
* Performs transcription of the remote audio file asynchronously with the selected model.
136-
*
137-
* @param gcsUri the path to the remote audio file to transcribe.
138-
*/
139-
public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
140-
try (SpeechClient speech = SpeechClient.create()) {
141-
142-
// Configure request with video media type
143-
RecognitionConfig config =
144-
RecognitionConfig.newBuilder()
145-
// encoding may either be omitted or must match the value in the file header
146-
.setEncoding(AudioEncoding.LINEAR16)
147-
.setLanguageCode("en-US")
148-
// sample rate hertz may be either be omitted or must match the value in the file
149-
// header
150-
.setSampleRateHertz(16000)
151-
.setModel("video")
152-
.build();
153-
154-
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
155-
156-
// Use non-blocking call for getting file transcription
157-
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
158-
speech.longRunningRecognizeAsync(config, audio);
15990

160-
while (!response.isDone()) {
161-
System.out.println("Waiting for response...");
162-
Thread.sleep(10000);
163-
}
164-
165-
List<SpeechRecognitionResult> results = response.get().getResultsList();
166-
167-
// Just print the first result here.
168-
SpeechRecognitionResult result = results.get(0);
169-
// There can be several alternative transcripts for a given chunk of speech. Just use the
170-
// first (most likely) one here.
171-
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
172-
System.out.printf("Transcript : %s\n", alternative.getTranscript());
173-
}
174-
}
175-
// [END speech_transcribe_model_selection_gcs_beta]
17691

17792
// [START speech_transcribe_recognition_metadata_beta]
17893
/**

speech/cloud-client/src/main/java/com/example/speech/Recognize.java

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public static void main(String... args) throws Exception {
6464
+ "Commands:\n"
6565
+ "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n"
6666
+ "\t| wordoffsets | auto-punctuation | stream-punctuation \n"
67-
+ "\t| enhanced-model \n"
67+
+ "\t| enhanced-model | model-selection\n"
6868
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
6969
+ "for a Cloud Storage resource (gs://...)\n",
7070
Recognize.class.getCanonicalName());
@@ -106,8 +106,14 @@ public static void main(String... args) throws Exception {
106106
streamingTranscribeWithAutomaticPunctuation(path);
107107
} else if (command.equals("enhanced-model")) {
108108
transcribeFileWithEnhancedModel(path);
109+
} else if (command.equals("model-selection")) {
110+
if (path.startsWith("gs://")) {
111+
transcribeModelSelectionGcs(path);
112+
} else {
113+
transcribeModelSelection(path);
114+
}
115+
}
109116
}
110-
}
111117

112118
// [START speech_transcribe_sync]
113119
/**
@@ -744,4 +750,84 @@ public static void transcribeFileWithEnhancedModel(String fileName) throws Excep
744750
}
745751
// [END speech_transcribe_enhanced_model]
746752

753+
// [START speech_transcribe_model_selection]
754+
/**
755+
* Performs transcription of the given audio file synchronously with the selected model.
756+
*
757+
* @param fileName the path to a audio file to transcribe
758+
*/
759+
public static void transcribeModelSelection(String fileName) throws Exception {
760+
Path path = Paths.get(fileName);
761+
byte[] content = Files.readAllBytes(path);
762+
763+
try (SpeechClient speech = SpeechClient.create()) {
764+
// Configure request with video media type
765+
RecognitionConfig recConfig =
766+
RecognitionConfig.newBuilder()
767+
// encoding may either be omitted or must match the value in the file header
768+
.setEncoding(AudioEncoding.LINEAR16)
769+
.setLanguageCode("en-US")
770+
// sample rate hertz may be either be omitted or must match the value in the file
771+
// header
772+
.setSampleRateHertz(16000)
773+
.setModel("video")
774+
.build();
775+
776+
RecognitionAudio recognitionAudio =
777+
RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
778+
779+
RecognizeResponse recognizeResponse = speech.recognize(recConfig, recognitionAudio);
780+
// Just print the first result here.
781+
SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);
782+
// There can be several alternative transcripts for a given chunk of speech. Just use the
783+
// first (most likely) one here.
784+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
785+
System.out.printf("Transcript : %s\n", alternative.getTranscript());
786+
}
787+
}
788+
// [END speech_transcribe_model_selection]
789+
790+
// [START speech_transcribe_model_selection_gcs]
791+
/**
792+
* Performs transcription of the remote audio file asynchronously with the selected model.
793+
*
794+
* @param gcsUri the path to the remote audio file to transcribe.
795+
*/
796+
public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
797+
try (SpeechClient speech = SpeechClient.create()) {
798+
799+
// Configure request with video media type
800+
RecognitionConfig config =
801+
RecognitionConfig.newBuilder()
802+
// encoding may either be omitted or must match the value in the file header
803+
.setEncoding(AudioEncoding.LINEAR16)
804+
.setLanguageCode("en-US")
805+
// sample rate hertz may be either be omitted or must match the value in the file
806+
// header
807+
.setSampleRateHertz(16000)
808+
.setModel("video")
809+
.build();
810+
811+
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
812+
813+
// Use non-blocking call for getting file transcription
814+
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
815+
speech.longRunningRecognizeAsync(config, audio);
816+
817+
while (!response.isDone()) {
818+
System.out.println("Waiting for response...");
819+
Thread.sleep(10000);
820+
}
821+
822+
List<SpeechRecognitionResult> results = response.get().getResultsList();
823+
824+
// Just print the first result here.
825+
SpeechRecognitionResult result = results.get(0);
826+
// There can be several alternative transcripts for a given chunk of speech. Just use the
827+
// first (most likely) one here.
828+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
829+
System.out.printf("Transcript : %s\n", alternative.getTranscript());
830+
}
831+
}
832+
// [END speech_transcribe_model_selection_gcs]
747833
}

0 commit comments

Comments
 (0)