Skip to content

Commit 0fa495c

Browse files
feat(stt): new params smart_formatting_version, force, mapping_only
1 parent 134fa6d commit 0fa495c

File tree

2 files changed

+133
-16
lines changed

2 files changed

+133
-16
lines changed

ibm_watson/speech_to_text_v1.py

Lines changed: 110 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# coding: utf-8
22

3-
# (C) Copyright IBM Corp. 2015, 2023.
3+
# (C) Copyright IBM Corp. 2015, 2024.
44
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
66
# you may not use this file except in compliance with the License.
@@ -209,6 +209,7 @@ def recognize(
209209
timestamps: Optional[bool] = None,
210210
profanity_filter: Optional[bool] = None,
211211
smart_formatting: Optional[bool] = None,
212+
smart_formatting_version: Optional[bool] = None,
212213
speaker_labels: Optional[bool] = None,
213214
grammar_name: Optional[str] = None,
214215
redaction: Optional[bool] = None,
@@ -446,6 +447,9 @@ def recognize(
446447
(all dialects) transcription only.
447448
See [Smart
448449
formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#smart-formatting).
450+
:param bool smart_formatting_version: (optional) Smart formatting version
451+
is for next-generation models and that is supported in US English,
452+
Brazilian Portuguese, French and German languages.
449453
:param bool speaker_labels: (optional) If `true`, the response includes
450454
labels that identify which words were spoken by which participants in a
451455
multi-person exchange. By default, the service returns no speaker labels.
@@ -618,6 +622,7 @@ def recognize(
618622
'timestamps': timestamps,
619623
'profanity_filter': profanity_filter,
620624
'smart_formatting': smart_formatting,
625+
'smart_formatting_version': smart_formatting_version,
621626
'speaker_labels': speaker_labels,
622627
'grammar_name': grammar_name,
623628
'redaction': redaction,
@@ -813,6 +818,7 @@ def create_job(
813818
timestamps: Optional[bool] = None,
814819
profanity_filter: Optional[bool] = None,
815820
smart_formatting: Optional[bool] = None,
821+
smart_formatting_version: Optional[bool] = None,
816822
speaker_labels: Optional[bool] = None,
817823
grammar_name: Optional[str] = None,
818824
redaction: Optional[bool] = None,
@@ -1100,6 +1106,9 @@ def create_job(
11001106
(all dialects) transcription only.
11011107
See [Smart
11021108
formatting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#smart-formatting).
1109+
:param bool smart_formatting_version: (optional) Smart formatting version
1110+
is for next-generation models and that is supported in US English,
1111+
Brazilian Portuguese, French and German languages.
11031112
:param bool speaker_labels: (optional) If `true`, the response includes
11041113
labels that identify which words were spoken by which participants in a
11051114
multi-person exchange. By default, the service returns no speaker labels.
@@ -1298,6 +1307,7 @@ def create_job(
12981307
'timestamps': timestamps,
12991308
'profanity_filter': profanity_filter,
13001309
'smart_formatting': smart_formatting,
1310+
'smart_formatting_version': smart_formatting_version,
13011311
'speaker_labels': speaker_labels,
13021312
'grammar_name': grammar_name,
13031313
'redaction': redaction,
@@ -1776,6 +1786,7 @@ def train_language_model(
17761786
word_type_to_add: Optional[str] = None,
17771787
customization_weight: Optional[float] = None,
17781788
strict: Optional[bool] = None,
1789+
force: Optional[bool] = None,
17791790
**kwargs,
17801791
) -> DetailedResponse:
17811792
"""
@@ -1863,6 +1874,15 @@ def train_language_model(
18631874
lists any invalid resources. By default (`true`), training of a custom
18641875
language model fails (status code 400) if the model contains one or more
18651876
invalid resources (corpus files, grammar files, or custom words).
1877+
:param bool force: (optional) If `true`, forces the training of the custom
1878+
language model regardless of whether it contains any changes (is in the
1879+
`ready` or `available` state). By default (`false`), the model must be in
1880+
the `ready` state to be trained. You can use the parameter to train and
1881+
thus upgrade a custom model that is based on an improved next-generation
1882+
model. *The parameter is available only for IBM Cloud, not for IBM Cloud
1883+
Pak for Data.*
1884+
See [Upgrading a custom language model based on an improved next-generation
1885+
model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-upgrade#custom-upgrade-language-ng).
18661886
:param dict headers: A `dict` containing the request headers
18671887
:return: A `DetailedResponse` containing the result, headers and HTTP status code.
18681888
:rtype: DetailedResponse with `dict` result representing a `TrainingResponse` object
@@ -1882,6 +1902,7 @@ def train_language_model(
18821902
'word_type_to_add': word_type_to_add,
18831903
'customization_weight': customization_weight,
18841904
'strict': strict,
1905+
'force': force,
18851906
}
18861907

18871908
if 'headers' in kwargs:
@@ -2491,6 +2512,13 @@ def add_words(
24912512
omit the `sounds_like` field, the service attempts to set the field to its
24922513
pronunciation of the word. It cannot generate a pronunciation for all words, so
24932514
you must review the word's definition to ensure that it is complete and valid.
2515+
* The `mapping_only` field provides parameter for custom words. You can use the
2516+
'mapping_only' key in custom words as a form of post processing. This key
2517+
parameter has a boolean value to determine whether 'sounds_like' (for non-Japanese
2518+
models) or word (for Japanese) is not used for the model fine-tuning, but for the
2519+
replacement for 'display_as'. This feature helps you when you use custom words
2520+
exclusively to map 'sounds_like' (or word) to 'display_as' value. When you use
2521+
custom words solely for post-processing purposes that does not need fine-tuning.
24942522
If you add a custom word that already exists in the words resource for the custom
24952523
model, the new definition overwrites the existing data for the word. If the
24962524
service encounters an error with the input data, it returns a failure code and
@@ -2580,6 +2608,7 @@ def add_word(
25802608
word_name: str,
25812609
*,
25822610
word: Optional[str] = None,
2611+
mapping_only: Optional[List[str]] = None,
25832612
sounds_like: Optional[List[str]] = None,
25842613
display_as: Optional[str] = None,
25852614
**kwargs,
@@ -2638,16 +2667,30 @@ def add_word(
26382667
request with credentials for the instance of the service that owns the
26392668
custom model.
26402669
:param str word_name: The custom word that is to be added to or updated in
2641-
the custom language model. Do not include spaces in the word. Use a `-`
2670+
the custom language model. Do not use characters that need to be
2671+
URL-encoded, for example, spaces, slashes, backslashes, colons, ampersands,
2672+
double quotes, plus signs, equals signs, or question marks. Use a `-`
26422673
(dash) or `_` (underscore) to connect the tokens of compound words.
26432674
URL-encode the word if it includes non-ASCII characters. For more
26442675
information, see [Character
26452676
encoding](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-corporaWords#charEncoding).
26462677
:param str word: (optional) For the [Add custom words](#addwords) method,
26472678
you must specify the custom word that is to be added to or updated in the
2648-
custom model. Do not include spaces in the word. Use a `-` (dash) or `_`
2649-
(underscore) to connect the tokens of compound words.
2679+
custom model. Do not use characters that need to be URL-encoded, for
2680+
example, spaces, slashes, backslashes, colons, ampersands, double quotes,
2681+
plus signs, equals signs, or question marks. Use a `-` (dash) or `_`
2682+
(underscore) to connect the tokens of compound words. A Japanese custom
2683+
word can include at most 25 characters, not including leading or trailing
2684+
spaces.
26502685
Omit this parameter for the [Add a custom word](#addword) method.
2686+
:param List[str] mapping_only: (optional) Parameter for custom words. You
2687+
can use the 'mapping_only' key in custom words as a form of post
2688+
processing. This key parameter has a boolean value to determine whether
2689+
'sounds_like' (for non-Japanese models) or word (for Japanese) is not used
2690+
for the model fine-tuning, but for the replacement for 'display_as'. This
2691+
feature helps you when you use custom words exclusively to map
2692+
'sounds_like' (or word) to 'display_as' value. When you use custom words
2693+
solely for post-processing purposes that does not need fine-tuning.
26512694
:param List[str] sounds_like: (optional) As array of sounds-like
26522695
pronunciations for the custom word. Specify how words that are difficult to
26532696
pronounce, foreign words, acronyms, and so on can be pronounced by users.
@@ -2660,7 +2703,9 @@ def add_word(
26602703
default pronunciation of a word; pronunciations you add augment the
26612704
pronunciation from the base vocabulary.
26622705
A word can have at most five sounds-like pronunciations. A pronunciation
2663-
can include at most 40 characters not including spaces.
2706+
can include at most 40 characters, not including leading or trailing
2707+
spaces. A Japanese pronunciation can include at most 25 characters, not
2708+
including leading or trailing spaces.
26642709
:param str display_as: (optional) An alternative spelling for the custom
26652710
word when it appears in a transcript. Use the parameter when you want the
26662711
word to have a spelling that is different from its usual representation or
@@ -2687,6 +2732,7 @@ def add_word(
26872732

26882733
data = {
26892734
'word': word,
2735+
'mapping_only': mapping_only,
26902736
'sounds_like': sounds_like,
26912737
'display_as': display_as,
26922738
}
@@ -6266,9 +6312,20 @@ class CustomWord:
62666312
62676313
:param str word: (optional) For the [Add custom words](#addwords) method, you
62686314
must specify the custom word that is to be added to or updated in the custom
6269-
model. Do not include spaces in the word. Use a `-` (dash) or `_` (underscore)
6270-
to connect the tokens of compound words.
6315+
model. Do not use characters that need to be URL-encoded, for example, spaces,
6316+
slashes, backslashes, colons, ampersands, double quotes, plus signs, equals
6317+
signs, or question marks. Use a `-` (dash) or `_` (underscore) to connect the
6318+
tokens of compound words. A Japanese custom word can include at most 25
6319+
characters, not including leading or trailing spaces.
62716320
Omit this parameter for the [Add a custom word](#addword) method.
6321+
:param List[str] mapping_only: (optional) Parameter for custom words. You can
6322+
use the 'mapping_only' key in custom words as a form of post processing. This
6323+
key parameter has a boolean value to determine whether 'sounds_like' (for
6324+
non-Japanese models) or word (for Japanese) is not used for the model
6325+
fine-tuning, but for the replacement for 'display_as'. This feature helps you
6326+
when you use custom words exclusively to map 'sounds_like' (or word) to
6327+
'display_as' value. When you use custom words solely for post-processing
6328+
purposes that does not need fine-tuning.
62726329
:param List[str] sounds_like: (optional) As array of sounds-like pronunciations
62736330
for the custom word. Specify how words that are difficult to pronounce, foreign
62746331
words, acronyms, and so on can be pronounced by users.
@@ -6280,7 +6337,9 @@ class CustomWord:
62806337
pronunciation of a word; pronunciations you add augment the pronunciation from
62816338
the base vocabulary.
62826339
A word can have at most five sounds-like pronunciations. A pronunciation can
6283-
include at most 40 characters not including spaces.
6340+
include at most 40 characters, not including leading or trailing spaces. A
6341+
Japanese pronunciation can include at most 25 characters, not including leading
6342+
or trailing spaces.
62846343
:param str display_as: (optional) An alternative spelling for the custom word
62856344
when it appears in a transcript. Use the parameter when you want the word to
62866345
have a spelling that is different from its usual representation or from its
@@ -6293,6 +6352,7 @@ def __init__(
62936352
self,
62946353
*,
62956354
word: Optional[str] = None,
6355+
mapping_only: Optional[List[str]] = None,
62966356
sounds_like: Optional[List[str]] = None,
62976357
display_as: Optional[str] = None,
62986358
) -> None:
@@ -6301,9 +6361,21 @@ def __init__(
63016361
63026362
:param str word: (optional) For the [Add custom words](#addwords) method,
63036363
you must specify the custom word that is to be added to or updated in the
6304-
custom model. Do not include spaces in the word. Use a `-` (dash) or `_`
6305-
(underscore) to connect the tokens of compound words.
6364+
custom model. Do not use characters that need to be URL-encoded, for
6365+
example, spaces, slashes, backslashes, colons, ampersands, double quotes,
6366+
plus signs, equals signs, or question marks. Use a `-` (dash) or `_`
6367+
(underscore) to connect the tokens of compound words. A Japanese custom
6368+
word can include at most 25 characters, not including leading or trailing
6369+
spaces.
63066370
Omit this parameter for the [Add a custom word](#addword) method.
6371+
:param List[str] mapping_only: (optional) Parameter for custom words. You
6372+
can use the 'mapping_only' key in custom words as a form of post
6373+
processing. This key parameter has a boolean value to determine whether
6374+
'sounds_like' (for non-Japanese models) or word (for Japanese) is not used
6375+
for the model fine-tuning, but for the replacement for 'display_as'. This
6376+
feature helps you when you use custom words exclusively to map
6377+
'sounds_like' (or word) to 'display_as' value. When you use custom words
6378+
solely for post-processing purposes that does not need fine-tuning.
63076379
:param List[str] sounds_like: (optional) As array of sounds-like
63086380
pronunciations for the custom word. Specify how words that are difficult to
63096381
pronounce, foreign words, acronyms, and so on can be pronounced by users.
@@ -6316,7 +6388,9 @@ def __init__(
63166388
default pronunciation of a word; pronunciations you add augment the
63176389
pronunciation from the base vocabulary.
63186390
A word can have at most five sounds-like pronunciations. A pronunciation
6319-
can include at most 40 characters not including spaces.
6391+
can include at most 40 characters, not including leading or trailing
6392+
spaces. A Japanese pronunciation can include at most 25 characters, not
6393+
including leading or trailing spaces.
63206394
:param str display_as: (optional) An alternative spelling for the custom
63216395
word when it appears in a transcript. Use the parameter when you want the
63226396
word to have a spelling that is different from its usual representation or
@@ -6326,6 +6400,7 @@ def __init__(
63266400
field.
63276401
"""
63286402
self.word = word
6403+
self.mapping_only = mapping_only
63296404
self.sounds_like = sounds_like
63306405
self.display_as = display_as
63316406

@@ -6335,6 +6410,8 @@ def from_dict(cls, _dict: Dict) -> 'CustomWord':
63356410
args = {}
63366411
if (word := _dict.get('word')) is not None:
63376412
args['word'] = word
6413+
if (mapping_only := _dict.get('mapping_only')) is not None:
6414+
args['mapping_only'] = mapping_only
63386415
if (sounds_like := _dict.get('sounds_like')) is not None:
63396416
args['sounds_like'] = sounds_like
63406417
if (display_as := _dict.get('display_as')) is not None:
@@ -6351,6 +6428,8 @@ def to_dict(self) -> Dict:
63516428
_dict = {}
63526429
if hasattr(self, 'word') and self.word is not None:
63536430
_dict['word'] = self.word
6431+
if hasattr(self, 'mapping_only') and self.mapping_only is not None:
6432+
_dict['mapping_only'] = self.mapping_only
63546433
if hasattr(self, 'sounds_like') and self.sounds_like is not None:
63556434
_dict['sounds_like'] = self.sounds_like
63566435
if hasattr(self, 'display_as') and self.display_as is not None:
@@ -8817,6 +8896,13 @@ class Word:
88178896
88188897
:param str word: A word from the custom model's words resource. The spelling of
88198898
the word is used to train the model.
8899+
:param List[str] mapping_only: (optional) (Optional) Parameter for custom words.
8900+
You can use the 'mapping_only' key in custom words as a form of post processing.
8901+
A boolean value that indicates whether the added word should be used to
8902+
fine-tune the mode for selected next-gen models. This field appears in the
8903+
response body only when it's 'For a custom model that is based on a
8904+
previous-generation model', the mapping_only field is populated with the value
8905+
set by the user, but would not be used.
88208906
:param List[str] sounds_like: An array of as many as five pronunciations for the
88218907
word.
88228908
* _For a custom model that is based on a previous-generation model_, in addition
@@ -8867,6 +8953,7 @@ def __init__(
88678953
count: int,
88688954
source: List[str],
88698955
*,
8956+
mapping_only: Optional[List[str]] = None,
88708957
error: Optional[List['WordError']] = None,
88718958
) -> None:
88728959
"""
@@ -8912,11 +8999,19 @@ def __init__(
89128999
shows only `user` for custom words that were added directly to the custom
89139000
model. Words from corpora and grammars are not added to the words resource
89149001
for custom models that are based on next-generation models.
9002+
:param List[str] mapping_only: (optional) (Optional) Parameter for custom
9003+
words. You can use the 'mapping_only' key in custom words as a form of post
9004+
processing. A boolean value that indicates whether the added word should be
9005+
used to fine-tune the mode for selected next-gen models. This field appears
9006+
in the response body only when it's 'For a custom model that is based on a
9007+
previous-generation model', the mapping_only field is populated with the
9008+
value set by the user, but would not be used.
89159009
:param List[WordError] error: (optional) If the service discovered one or
89169010
more problems that you need to correct for the word's definition, an array
89179011
that describes each of the errors.
89189012
"""
89199013
self.word = word
9014+
self.mapping_only = mapping_only
89209015
self.sounds_like = sounds_like
89219016
self.display_as = display_as
89229017
self.count = count
@@ -8932,6 +9027,8 @@ def from_dict(cls, _dict: Dict) -> 'Word':
89329027
else:
89339028
raise ValueError(
89349029
'Required property \'word\' not present in Word JSON')
9030+
if (mapping_only := _dict.get('mapping_only')) is not None:
9031+
args['mapping_only'] = mapping_only
89359032
if (sounds_like := _dict.get('sounds_like')) is not None:
89369033
args['sounds_like'] = sounds_like
89379034
else:
@@ -8966,6 +9063,8 @@ def to_dict(self) -> Dict:
89669063
_dict = {}
89679064
if hasattr(self, 'word') and self.word is not None:
89689065
_dict['word'] = self.word
9066+
if hasattr(self, 'mapping_only') and self.mapping_only is not None:
9067+
_dict['mapping_only'] = self.mapping_only
89699068
if hasattr(self, 'sounds_like') and self.sounds_like is not None:
89709069
_dict['sounds_like'] = self.sounds_like
89719070
if hasattr(self, 'display_as') and self.display_as is not None:

0 commit comments

Comments
 (0)