fix afc e-contract
Showing
2 changed files
with
16 additions
and
1 deletions
| ... | @@ -9,8 +9,23 @@ from .get_char import Finder | ... | @@ -9,8 +9,23 @@ from .get_char import Finder |
| 9 | 9 | ||
| 10 | 10 | ||
| 11 | def predict(pdf_info): | 11 | def predict(pdf_info): |
| 12 | ocr_results = {} | ||
| 13 | for pno in pdf_info: | ||
| 14 | ocr_results[pno] = {} | ||
| 15 | for key, block in enumerate(pdf_info[pno]['blocks']): | ||
| 16 | if block['type'] != 0: | ||
| 17 | continue | ||
| 18 | for line in block['lines']: | ||
| 19 | for span in line['spans']: | ||
| 20 | bbox, text = span['bbox'], span['text'] | ||
| 21 | # print(text) | ||
| 22 | xmin, ymin, xmax, ymax = bbox | ||
| 23 | polygon = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax] | ||
| 24 | text = text.replace(":", ":").replace(" ", "") | ||
| 25 | ocr_results[pno][key] = [polygon, text] | ||
| 26 | |||
| 12 | # 输入是整个 PDF 中的信息 | 27 | # 输入是整个 PDF 中的信息 |
| 13 | f = Finder(pdf_info) | 28 | f = Finder(pdf_info, ocr_results=ocr_results) |
| 14 | results = f.get_info() | 29 | results = f.get_info() |
| 15 | 30 | ||
| 16 | return results | 31 | return results | ... | ... |
This diff is collapsed.
Click to expand it.
-
Please register or sign in to post a comment