@@ -393,31 +393,22 @@ def detect_document(path):
393393
394394 document = image .detect_full_text ()
395395
396- for b , page in enumerate (document .pages ):
397- page_text = ''
396+ for page in document .pages :
397+ for block in page .blocks :
398+ block_words = []
399+ for paragraph in block .paragraphs :
400+ block_words .extend (paragraph .words )
398401
399- for bb , block in enumerate (page .blocks ):
400- block_text = ''
401-
402- for p , paragraph in enumerate (block .paragraphs ):
403- para_text = ''
404-
405- for w , word in enumerate (paragraph .words ):
406- word_text = ''
407-
408- for s , symbol in enumerate (word .symbols ):
409- word_text = word_text + symbol .text
402+ block_symbols = []
403+ for word in block_words :
404+ block_symbols .extend (word .symbols )
410405
411- para_text = para_text + word_text
412-
413- block_text = block_text + para_text
414- print ('\n --\n Content Block: {}' .format (block_text ))
415- print ('Block Bounding Box:\n {}' .format (block .bounding_box ))
416-
417- page_text = page_text + block_text
406+ block_text = ''
407+ for symbol in block_symbols :
408+ block_text = block_text + symbol .text
418409
419- print ('Page Content:\n {}' .format (page_text ))
420- print ('Page Dimensions: w: {} h: {} ' .format (page . width , page . height ))
410+ print ('Block Content: {}' .format (block_text ))
411+ print ('Block Bounds: \n {} ' .format (block . bounding_box ))
421412
422413
423414def detect_document_uri (uri ):
@@ -428,31 +419,22 @@ def detect_document_uri(uri):
428419
429420 document = image .detect_full_text ()
430421
431- for b , page in enumerate (document .pages ):
432- page_text = ''
422+ for page in document .pages :
423+ for block in page .blocks :
424+ block_words = []
425+ for paragraph in block .paragraphs :
426+ block_words .extend (paragraph .words )
433427
434- for bb , block in enumerate (page .blocks ):
435- block_text = ''
436-
437- for p , paragraph in enumerate (block .paragraphs ):
438- para_text = ''
439-
440- for w , word in enumerate (paragraph .words ):
441- word_text = ''
442-
443- for s , symbol in enumerate (word .symbols ):
444- word_text = word_text + symbol .text
428+ block_symbols = []
429+ for word in block_words :
430+ block_symbols .extend (word .symbols )
445431
446- para_text = para_text + word_text
447-
448- block_text = block_text + para_text
449- print ('\n --\n Content Block: {}' .format (block_text ))
450- print ('Block Bounding Box:\n {}' .format (block .bounding_box ))
451-
452- page_text = page_text + block_text
432+ block_text = ''
433+ for symbol in block_symbols :
434+ block_text = block_text + symbol .text
453435
454- print ('Page Content:\n {}' .format (page_text ))
455- print ('Page Dimensions: w: {} h: {} ' .format (page . width , page . height ))
436+ print ('Block Content: {}' .format (block_text ))
437+ print ('Block Bounds: \n {} ' .format (block . bounding_box ))
456438
457439
458440def run_local (args ):
0 commit comments