2424# project_id = 'YOUR_PROJECT_ID'
2525# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu'
2626# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample
27+ # processor_version = 'rc' # Refer to https://cloud.google.com/document-ai/docs/manage-processor-versions for more information
2728# file_path = '/path/to/local/pdf'
2829# mime_type = 'application/pdf' # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types
2930
3031
3132def process_document_ocr_sample (
32- project_id : str , location : str , processor_id : str , file_path : str , mime_type : str
33+ project_id : str ,
34+ location : str ,
35+ processor_id : str ,
36+ processor_version : str ,
37+ file_path : str ,
38+ mime_type : str ,
3339) -> None :
3440 # Online processing request to Document AI
3541 document = process_document (
36- project_id , location , processor_id , file_path , mime_type
42+ project_id , location , processor_id , processor_version , file_path , mime_type
3743 )
3844
3945 # For a full list of Document object attributes, please reference this page:
@@ -52,19 +58,30 @@ def process_document_ocr_sample(
5258 print_lines (page .lines , text )
5359 print_tokens (page .tokens , text )
5460
61+ # Currently supported in version pretrained-ocr-v1.1-2022-09-12
62+ if page .image_quality_scores :
63+ print_image_quality_scores (page .image_quality_scores )
64+
5565
5666def process_document (
57- project_id : str , location : str , processor_id : str , file_path : str , mime_type : str
67+ project_id : str ,
68+ location : str ,
69+ processor_id : str ,
70+ processor_version : str ,
71+ file_path : str ,
72+ mime_type : str ,
5873) -> documentai .Document :
5974 # You must set the api_endpoint if you use a location other than 'us', e.g.:
6075 opts = ClientOptions (api_endpoint = f"{ location } -documentai.googleapis.com" )
6176
6277 client = documentai .DocumentProcessorServiceClient (client_options = opts )
6378
64- # The full resource name of the processor, e.g.:
65- # projects/project_id/locations/location/processor/ processor_id
79+ # The full resource name of the processor version
80+ # e.g. projects/{ project_id} /locations/{ location}/processors/{ processor_id}/processorVersions/{processor_version_id}
6681 # You must create processors before running sample code.
67- name = client .processor_path (project_id , location , processor_id )
82+ name = client .processor_version_path (
83+ project_id , location , processor_id , processor_version
84+ )
6885
6986 # Read the file into memory
7087 with open (file_path , "rb" ) as image :
@@ -133,6 +150,16 @@ def print_tokens(tokens: Sequence[documentai.Document.Page.Token], text: str) ->
133150 print (f" Last token break type: { repr (last_token_break_type )} " )
134151
135152
153+ def print_image_quality_scores (
154+ image_quality_scores : documentai .Document .Page .ImageQualityScores ,
155+ ) -> None :
156+ print (f" Quality score: { image_quality_scores .quality_score :.1%} " )
157+ print (" Detected defects:" )
158+
159+ for detected_defect in image_quality_scores .detected_defects :
160+ print (f" { detected_defect .type_ } : { detected_defect .confidence :.1%} " )
161+
162+
136163def layout_to_text (layout : documentai .Document .Page .Layout , text : str ) -> str :
137164 """
138165 Document AI identifies text in different parts of the document by their
0 commit comments