Merge branch 'feature/uat-tmp' into 'master'

Feature/uat tmp See merge request !18

Merge branch 'feature/uat-tmp' into 'master'
Feature/uat tmp See merge request !18
周伟奇
Showing 19 changed files with 3341 additions and 48 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/models.py
src/apps/doc/named_enum.py
src/apps/doc/ocr/ecm.py
src/apps/doc/ocr/wb.py
src/apps/doc/views.py
src/celery_compare/tasks.py
src/common/electronic_afc_contract/afc_contract_ocr.py
src/common/electronic_afc_contract/get_char_fsm.py
src/common/electronic_hil_contract/get_char_fsm.py
src/common/electronic_hil_contract/hil_contract_ocr.py
src/common/fsm_econtract/const.py
src/common/fsm_econtract/fsm_contract_ocr.py
src/common/fsm_econtract/hmh_ocr.py
src/common/fsm_econtract/retriever.py
src/common/fsm_econtract/tools.py
src/common/tools/mssql_script24.py
src/pos/views.py
--- a/src/apps/doc/consts.py
View file @9f4b364
+++ b/src/apps/doc/consts.py
View file @9f4b364
@@ -10,8 +10,8 @@ PAGE_SIZE_DEFAULT = 10
 FIXED_APPLICATION_ID_PREFIX = 'CH-S'
-DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT']
+DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT', 'INSURANCE']
-DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT']
+DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT', 'OVP']
 COMPARE_DOC_SCHEME_LIST = ['CA', 'SE']
 HIL_PREFIX = 'HIL'
@@ -1057,7 +1057,25 @@ HIL_CONTRACT_2_CLASSIFY = 44
 HIL_CONTRACT_3_CN_NAME = '车辆处置协议'
 HIL_CONTRACT_3_CLASSIFY = 45
-CONTRACT_SET = {CONTRACT_QRS_CLASSIFY, CONTRACT_CLASSIFY, HIL_CONTRACT_1_CLASSIFY, HIL_CONTRACT_2_CLASSIFY, HIL_CONTRACT_3_CLASSIFY}
+FSM_CONTRACT_WEP_CN_NAME = '延长保修合同'
+FSM_CONTRACT_WEP_CLASSIFY = 51
+FSM_CONTRACT_MSI_CN_NAME = '长悦保养合同'
+FSM_CONTRACT_MSI_CLASSIFY = 52
+FSM_CONTRACT_SC_CN_NAME = '汽车销售合同'
+FSM_CONTRACT_SC_CLASSIFY = 53
+CONTRACT_SET = {
+    CONTRACT_QRS_CLASSIFY, 
+    CONTRACT_CLASSIFY, 
+    HIL_CONTRACT_1_CLASSIFY, 
+    HIL_CONTRACT_2_CLASSIFY, 
+    HIL_CONTRACT_3_CLASSIFY,
+    FSM_CONTRACT_WEP_CLASSIFY,
+    FSM_CONTRACT_MSI_CLASSIFY,
+    FSM_CONTRACT_SC_CLASSIFY, 
+}
 CONTRACT_MAP = {
    HIL_CONTRACT_1_CLASSIFY: HIL_CONTRACT_1_CN_NAME,
@@ -1065,8 +1083,13 @@ CONTRACT_MAP = {
    HIL_CONTRACT_3_CLASSIFY: HIL_CONTRACT_3_CN_NAME,
    CONTRACT_CLASSIFY: CONTRACT_CN_NAME,
    CONTRACT_QRS_CLASSIFY: CONTRACT_QRS_CN_NAME,
+    FSM_CONTRACT_WEP_CLASSIFY: FSM_CONTRACT_WEP_CN_NAME,
+    FSM_CONTRACT_MSI_CLASSIFY: FSM_CONTRACT_MSI_CN_NAME,
+    FSM_CONTRACT_SC_CLASSIFY: FSM_CONTRACT_SC_CN_NAME, 
 }
+FSM_CONTRACT_CLASSIFY_SET = {FSM_CONTRACT_WEP_CLASSIFY, FSM_CONTRACT_MSI_CLASSIFY, FSM_CONTRACT_SC_CLASSIFY}
 # 保单
 INSURANCE_CN_NAME = '保单'
 INSURANCE_CLASSIFY = 42
@@ -1215,6 +1238,11 @@ BS_FIELD = 'bss_ocr'
 HIL_CONTRACT_1_FIELD = 'hil_contract_1_ocr'
 HIL_CONTRACT_2_FIELD = 'hil_contract_2_ocr'
 HIL_CONTRACT_3_FIELD = 'hil_contract_3_ocr'
+FSM_CONTRACT_WEP_FIELD = 'fsm_wep_ocr'
+FSM_CONTRACT_MSI_FIELD = 'fsm_msi_ocr'
+FSM_CONTRACT_SC_FIELD = 'fsm_sc_ocr' 
 BS_CLASSIFY = 10089
 RESULT_MAPPING = {
@@ -1239,6 +1267,9 @@ RESULT_MAPPING = {
    HIL_CONTRACT_1_CLASSIFY: HIL_CONTRACT_1_FIELD,
    HIL_CONTRACT_2_CLASSIFY: HIL_CONTRACT_2_FIELD,
    HIL_CONTRACT_3_CLASSIFY: HIL_CONTRACT_3_FIELD,
+    FSM_CONTRACT_WEP_CLASSIFY: FSM_CONTRACT_WEP_FIELD,
+    FSM_CONTRACT_MSI_CLASSIFY: FSM_CONTRACT_MSI_FIELD,
+    FSM_CONTRACT_SC_CLASSIFY: FSM_CONTRACT_SC_FIELD, 
 }
 CA_ADD_COMPARE_FIELDS = (IC_OCR_FIELD, BL_OCR_FIELD, BS_FIELD)
@@ -1511,6 +1542,9 @@ SE_AFC_CON_MAP = {
    '还款账号': (2, 2, '还款账户', '账号'),
    '户名': (2, 2, '还款账户', '户名'),
    '开户行': (2, 2, '还款账户', '开户行'),
+    '收款账号': (2, 2, '借款人收款账户', '账号'),
+    '收款户名': (2, 2, '借款人收款账户', '户名'),
+    '收款开户行': (2, 2, '借款人收款账户', '开户行'),
    '借款人签字及时间': (1, 1, '借款人签字及时间', None),
@@ -1550,9 +1584,12 @@ SE_HIL_CON_1_MAP = {
    '融资成本总额': (5, 4, 7, '融资成本总额', None),
    '租期': (5, 4, 7, '租期', None),
    '还款计划表': (5, 5, 7, '付款计划表', None),
-    '还款账号': (5, 5, 7, '银行账户-银行账号', None),
+    '还款账号': (5, 6, 7, '银行账户-银行账号', None),
-    '户名': (5, 5, 7, '银行账户-户名', None),
+    '户名': (5, 6, 7, '银行账户-户名', None),
-    '开户行': (5, 5, 7, '银行账户-开户行', None),
+    '开户行': (5, 6, 7, '银行账户-开户行', None),
+    '收款账号': (5, 5, 7, '收款银行账户-银行账号', None),
+    '收款户名': (5, 5, 7, '收款银行账户-户名', None),
+    '收款开户行': (5, 5, 7, '收款银行账户-开户行', None),
    'ASP项目详情': (5, 4, 7, '车辆附加产品明细表', None),
    '承租人法定代表人或授权代表': (1, 1, 7, '承租人-法定代表人或授权代表', None),
    '共同承租人法定代表人或授权代表': (1, 1, 7, '共同承租人-法定代表人或授权代表', None),
@@ -1608,6 +1645,39 @@ SE_HIL_CON_MAP = {
    HIL_CONTRACT_3_CLASSIFY: SE_HIL_CON_3_MAP,
 }
+SE_FSM_WEP_MAP = {
+    '客户姓名': (1, '客户姓名'),
+    '证件类型': (1, '证件类型'),
+    '证件号码': (1, '证件号码'),
+    '合同价格（小写）': (1, '合同价格（小写）'),
+    '客户签名': (1, '客户签名'),
+    '签单日期': (1, '签单日期'),
+}
+SE_FSM_MSI_MAP = {
+    '客户姓名': (1, '客户姓名'),
+    '证件类型': (1, '证件类型'),
+    '证件号码': (1, '证件号码'),
+    '合同价格（小写）': (1, '合同价格（小写）'),
+    '客户签名': (2, '客户签名'),
+    '签单日期': (2, '签单日期'),
+}
+SE_FSM_SC_MAP = {
+    '姓名': (1, '姓名'),
+    '证件类型': (1, '证件类型'),
+    '证件号码': (1, '证件号码'),
+    '总价': (1, '总价'),
+    '客户签名': (12, '客户签名'),
+    '签单日期': (12, '签单日期'),
+}
+SE_FSM_CON_MAP = {
+    FSM_CONTRACT_WEP_CLASSIFY: SE_FSM_WEP_MAP,
+    FSM_CONTRACT_MSI_CLASSIFY: SE_FSM_MSI_MAP,
+    FSM_CONTRACT_SC_CLASSIFY: SE_FSM_SC_MAP,
+}
 SE_AFC_CON_QRS_FIELD = ['合同编号']
 SE_AFC_CON_FIELD = ['合同编号-每页', '所购车辆价格-小写-重要条款', '车架号-重要条款', '贷款本金金额-重要条款', '贷款期限-重要条款',
                    '车辆贷款本金金额-重要条款', '附加产品融资贷款本金总额-重要条款', '所购车辆价格', '车架号', '经销商',
@@ -2314,29 +2384,42 @@ APPLICANT_TYPE_MAP = {
 APPLICANT_TYPE_ORDER = ['Borrower', 'Co-Borrower', 'Guarantor', 'Mortgager']
-FILE_NAME_PREFIX_MAP = {
+# FILE_NAME_PREFIX_MAP = {
-    AFC_PREFIX: [
+#     AFC_PREFIX: [
-        ((CONTRACT_CLASSIFY, 0), '{0}_电子签署-汽车抵押贷款合同'),
+#         ((CONTRACT_CLASSIFY, 0), '{0}_电子签署-汽车抵押贷款合同'),
-        ((HMH_CLASSIFY, 0), '{0}_电子签署-抵押登记豁免函'),
+#         ((HMH_CLASSIFY, 0), '{0}_电子签署-抵押登记豁免函'),
-    ],
+#     ],
-    HIL_PREFIX: [
+#     HIL_PREFIX: [
-        ((HIL_CONTRACT_1_CLASSIFY, HIL_CONTRACT_3_CLASSIFY), '{0}_电子签署-售后回租合同'),
+#         ((HIL_CONTRACT_1_CLASSIFY, HIL_CONTRACT_3_CLASSIFY), '{0}_电子签署-售后回租合同'),
-        ((HIL_CONTRACT_2_CLASSIFY, 0), '{0}_电子签署-汽车租赁抵押合同'),
+#         ((HIL_CONTRACT_2_CLASSIFY, 0), '{0}_电子签署-汽车租赁抵押合同'),
-        ((HMH_CLASSIFY, 0), '{0}_电子签署-抵押登记豁免函'),
+#         ((HMH_CLASSIFY, 0), '{0}_电子签署-抵押登记豁免函'),
-    ]
+#     ]
-}
+# }
 ECONTRACT_KEYWORDS_MAP = {
    AFC_PREFIX: [
        ('抵押贷款合同', CONTRACT_CLASSIFY),
        ('送达地址确认书', CONTRACT_QRS_CLASSIFY),
-        # ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0),
+        ('抵押登记豁免函', HMH_CLASSIFY),
    ],
    HIL_PREFIX: [
        ('售后回租合同', HIL_CONTRACT_1_CLASSIFY),
        ('租赁抵押合同', HIL_CONTRACT_2_CLASSIFY),
        ('车辆处置协议', HIL_CONTRACT_3_CLASSIFY),
-        # ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0),
+        ('抵押登记豁免函', HMH_CLASSIFY),
+    ]
+}
+FSM_ECONTRACT_KEYWORDS_MAP = {
+    AFC_PREFIX: [
+        ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY),
+        ('长悦保养套餐服务合约', FSM_CONTRACT_MSI_CLASSIFY),
+        ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY),
+    ],
+    HIL_PREFIX: [
+        ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY),
+        ('长悦保养套餐服务合同', FSM_CONTRACT_MSI_CLASSIFY),
+        ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY),
    ]
 }
@@ -2346,6 +2429,12 @@ HIL_CONTRACT_TYPE_MAP = {
    str(HIL_CONTRACT_3_CLASSIFY): 1,
 }
+FSM_CONTRACT_TYPE_MAP = {
+    str(FSM_CONTRACT_WEP_CLASSIFY): 0,
+    str(FSM_CONTRACT_MSI_CLASSIFY): 1,
+    str(FSM_CONTRACT_SC_CLASSIFY): 2,
+}
 RESULT_MAP = {
    0: None,
    1: True,
@@ -2379,3 +2468,26 @@ MPOS_MAP = {
 }
 FOLDER_WSC_CLASSIFY = 199
+FSM_BEFORE_ACTIVITED_STATUS = {
+    "APSVD": "Saved",
+    "APEAE": "E-app Editing",
+    "APADA": "Awaiting Dealer Action",
+    "APAPR": "Acceptance Processing",
+    "APPSB": "Pre-submit Processed",
+    "APSBT": "Submitted",
+    "APAPP": "Approved",
+    "APHOC": "Held Offer-Docs",
+    "APHOD": "Held Offer-Data",
+    "APINI": "Initiated",
+    "APSEP": "Settlement Processing"
+}
+FSM_ACTIVITED_STATUS = {
+    "APADF": "Activated-Document Follow up",
+    "APASC": "Activated-Awaiting Settlement Check",
+    "APIPN": "Activated-Invoice Passed-Non PT",
+    "APIPP": "Activated-Invoice Passed-PT Doc Required",
+    "APARD": "Activated-Review done",
+}
\ No newline at end of file
--- a/src/apps/doc/management/commands/ocr_process.py
View file @9f4b364
+++ b/src/apps/doc/management/commands/ocr_process.py
View file @9f4b364
@@ -20,6 +20,8 @@ from common.tools.file_tools import get_pwd_list_from_str, extract_zip_or_rar, g
 from common.tools.pdf_to_img import PDFHandler
 from common.electronic_afc_contract.afc_contract_ocr import predict as afc_predict
 from common.electronic_hil_contract.hil_contract_ocr import predict as hil_predict
+from common.fsm_econtract.fsm_contract_ocr import predict as fsm_predict
+from common.fsm_econtract.hmh_ocr import predict as hmh_predict
 from apps.doc import consts
 # from apps.doc.ocr.edms import EDMS, rh
 from apps.doc.ocr.ecm import ECM, rh
@@ -40,8 +42,10 @@ from apps.doc.models import (
    DDARecords,
    IDBCRecords,
    Configs,
+    AFCCmsStatusInfo,
+    HILCmsStatusInfo,
 )
-from celery_compare.tasks import compare
+from celery_compare.tasks import compare, fsm_compare
 class Command(BaseCommand, LoggerMixin):
@@ -996,7 +1000,7 @@ class Command(BaseCommand, LoggerMixin):
                    res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
                        consts.ALL_POSITION_KEY, {}).get(key1, [])
                license_summary[classify] = [res]
-            else:
+            elif classify in consts.SE_HIL_CON_MAP:
                res = {}
                for key, (pno1, pno2, end_idx, key1, key2) in consts.SE_HIL_CON_MAP[classify].items():
                    if pno1 is None:
@@ -1020,7 +1024,14 @@ class Command(BaseCommand, LoggerMixin):
                        res[key] = tmp_res
                        res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get(
                            consts.IMG_PATH_KEY, '')
+                license_summary[classify] = [res]
+            elif classify in consts.SE_FSM_CON_MAP:
+                res = {}
+                for key, (pno1, key1) in consts.SE_FSM_CON_MAP[classify].items():
+                    res[key] = page_info_dict.get(str(pno1), {}).get(key1)
+                    res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno1), {}).get(
+                        consts.IMG_PATH_KEY, '')
                license_summary[classify] = [res]
    def rebuild_bs_summary(self, bs_summary, unknown_summary):
@@ -1442,7 +1453,7 @@ class Command(BaseCommand, LoggerMixin):
                                self.log_base, traceback.format_exc()))
                            error_list.append(1)
                            return
-                else:  # e-contract
+                else:  # e-contract or or e-fsm-contract or e-hmh
                    try:
                        # pdf下载 处理 图片存储 识别
                        for times in range(consts.RETRY_TIMES):
@@ -1472,8 +1483,10 @@ class Command(BaseCommand, LoggerMixin):
                            self.online_log.error('{0} [process error (db save)] [error={1}]'.format(
                                self.log_base, traceback.format_exc()))
+                        # AFC合同
                        if classify_1_str == str(consts.CONTRACT_CLASSIFY):
-                            ocr_result = afc_predict(pdf_handler.pdf_info)
+                            is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3]
+                            ocr_result = afc_predict(pdf_handler.pdf_info, is_fsm=is_fsm)
                            page_res = {}
                            for page_num, page_info in ocr_result.get('page_info', {}).items():
                                if isinstance(page_num, str) and page_num.startswith('page_'):
@@ -1483,6 +1496,7 @@ class Command(BaseCommand, LoggerMixin):
                                        'page_num': page_num,
                                        'page_info': page_info
                                    }
+                        # 送达地址确认书
                        elif classify_1_str == str(consts.CONTRACT_QRS_CLASSIFY):
                            ocr_result = afc_predict(pdf_handler.pdf_info, is_qrs=True)
                            page_num = 'page_1'
@@ -1493,9 +1507,11 @@ class Command(BaseCommand, LoggerMixin):
                                    'page_info': ocr_result.pop(page_num, {}) 
                                }
                            }
-                        else:
+                        # HIL合同
+                        elif classify_1_str in consts.HIL_CONTRACT_TYPE_MAP:
+                            is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3]
                            file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str)
-                            ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1)
+                            ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1, is_fsm=is_fsm)
                            rebuild_res_1 = {}
                            page_res = {}
                            for field_name, field_info in ocr_result_1.items():
@@ -1508,9 +1524,36 @@ class Command(BaseCommand, LoggerMixin):
                                        'page_num': page_num,
                                        'page_info': page_info
                                    }
+                        # FSM合同 WEP MSI SC
+                        elif classify_1_str in consts.FSM_CONTRACT_TYPE_MAP:
+                            file_type = consts.FSM_CONTRACT_TYPE_MAP.get(classify_1_str)
+                            ocr_result = fsm_predict(pdf_handler.pdf_info, file_type) 
+                            page_res = {}
+                            for page_num, page_info in ocr_result.items():
+                                if isinstance(page_num, str) and page_num.startswith('page_'):
+                                    page_res[page_num] = {
+                                        'classify': int(classify_1_str),
+                                        'page_num': page_num,
+                                        'page_info': page_info
+                                    }
+                        # hmh
+                        # else:
+                        #     pass
                        contract_res = {}
                        for img_path_tmp, page_key in pdf_handler.img_path_pno_list:
+                            if classify_1_str == str(consts.HMH_CLASSIFY):
+                                img_contract_res = {
+                                        'code': 1,
+                                        'data': [
+                                            {
+                                                'classify': consts.HMH_CLASSIFY,
+                                                'data': hmh_predict(pdf_handler.pdf_info)
+                                            }
+                                        ]
+                                    }
+                            else:
                                if page_key in page_res:
                                    img_contract_res = {
                                        'code': 1,
@@ -1966,6 +2009,9 @@ class Command(BaseCommand, LoggerMixin):
                                    report_list[5] = BSCheckResult.CHECK_FAILED.value
                        finally:
+                            self.online_log.info('{0} [task={1}] [license_summary={2}] '
+                                             '[contract_result_compare={3}]'.format(self.log_base, task_str,
+                                                                     license_summary, contract_result_compare))
                            self.rebuild_contract(license_summary, contract_result_compare)
                            bs_rebuild = self.rebuild_bs(merged_bs_summary)
@@ -2015,6 +2061,16 @@ class Command(BaseCommand, LoggerMixin):
                                        self.log_base, task_str, res_obj.id))
                                    # 触发比对
                                    try:
+                                        # 是否fsm
+                                        cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo
+                                        cms_status_info = cms_status_class.objects.filter(application_id=doc.application_id).first()
+                                        is_fsm = cms_status_info is not None and cms_status_info.is_fsm == 1
+                                        self.online_log.info('{0} [isfsm] [task={1}] [true or false={2}]'.format(
+                                        self.log_base, task_str, is_fsm))
+                                        if is_fsm:
+                                            fsm_compare.apply_async((doc.application_id, business_type, None, res_obj.id, is_ca, True),
+                                                queue='queue_compare')
+                                        else:
                                            # pass
                                            compare.apply_async((doc.application_id, business_type, None, res_obj.id,
                                                             is_ca, True), queue='queue_compare')
--- a/src/apps/doc/models.py
View file @9f4b364
+++ b/src/apps/doc/models.py
View file @9f4b364
@@ -329,6 +329,11 @@ class AFCOCRResult(models.Model):
    hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2")
    hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3")
    qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书")
+    fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同")
+    fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同")
+    fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同")
+    fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1：激活")
    update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间')
    create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
@@ -366,6 +371,11 @@ class HILOCRResult(models.Model):
    hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3")
    qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书")
+    fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同")
+    fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同")
+    fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同")
+    fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1：激活")
    update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间')
    create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
@@ -401,6 +411,11 @@ class AFCSEOCRResult(models.Model):
    hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3")
    qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书")
+    fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同")
+    fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同")
+    fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同")
+    fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1：激活")
    update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间')
    create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
@@ -436,6 +451,10 @@ class HILSEOCRResult(models.Model):
    hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2")
    hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3")
    qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书")
+    fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同")
+    fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同")
+    fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同")
+    fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1：激活")
    update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间')
    create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
@@ -1042,3 +1061,41 @@ class AFCCompareReportNew(models.Model):
        managed = False
        db_table = 'afc_compare_report_new'
        situ_db_label = 'afc'
+class NscInvoice(models.Model):
+    id = models.AutoField(primary_key=True, verbose_name="id")  # 主键
+    vin = models.CharField(max_length=64, verbose_name="车架号")  # 索引
+    content = models.TextField(null=True, verbose_name="nsc发票信息")
+    create_time = models.DateTimeField(verbose_name='创建时间')
+    class Meta:
+        managed = False
+        db_table = 'nsc_invoice'
+class AFCCmsStatusInfo(models.Model):
+    id = models.AutoField(primary_key=True, verbose_name="id")  # 主键
+    application_id = models.CharField(max_length=64, verbose_name="订单id")  # 索引
+    business_type = models.CharField(max_length=64, verbose_name="业务类型")
+    is_fsm = models.SmallIntegerField(null=False, default=0, verbose_name="是否fsm流程 1:是")
+    update_time = models.DateTimeField(verbose_name='更新时间')
+    create_time = models.DateTimeField(verbose_name='创建时间')
+    class Meta:
+        managed = False
+        db_table = 'afc_cms_status_info'
+        situ_db_label = 'afc'
+class HILCmsStatusInfo(models.Model):
+    id = models.AutoField(primary_key=True, verbose_name="id")  # 主键
+    application_id = models.CharField(max_length=64, verbose_name="订单id")  # 索引
+    business_type = models.CharField(max_length=64, verbose_name="业务类型")
+    is_fsm = models.SmallIntegerField(null=False, default=0, verbose_name="是否fsm流程 1:是")
+    update_time = models.DateTimeField(verbose_name='更新时间')
+    create_time = models.DateTimeField(verbose_name='创建时间')
+    class Meta:
+        managed = False
+        db_table = 'hil_cms_status_info'
--- a/src/apps/doc/named_enum.py
View file @9f4b364
+++ b/src/apps/doc/named_enum.py
View file @9f4b364
@@ -27,6 +27,7 @@ class RequestTeam(NamedEnum):
    SETTLEMENT = (1, 'SETTLEMENT')
    CONTRACTMANAGEMENT = (2, 'CONTRACTMANAGEMENT')
    CONTROLLING = (3, 'CONTROLLING')
+    INSURANCE = (4, 'INSURANCE')
 class RequestTrigger(NamedEnum):
@@ -36,6 +37,7 @@ class RequestTrigger(NamedEnum):
    DOCUPLOAD = (3, 'Document Upload')
    SUBMITING = (4, 'Submiting')
    UPLOADING = (5, 'Uploading')
+    OVP = (6, 'OVP')
 class FailureReason(NamedEnum):
--- a/src/apps/doc/ocr/ecm.py
View file @9f4b364
+++ b/src/apps/doc/ocr/ecm.py
View file @9f4b364
@@ -34,6 +34,7 @@ class ECM:
            'ACCEPTANCE': ('acceptance', conf.ECM_FOLDER_CA, conf.ECM_FOLDER_CA_HIL),
            'SETTLEMENT': (self.settlement_type, conf.ECM_FOLDER_SE, conf.ECM_FOLDER_SE_HIL),
            'CONTRACTMANAGEMENT': ('contract_management', conf.ECM_FOLDER_CA, conf.ECM_FOLDER_CA_HIL),
+            'INSURANCE': ('insurance', conf.ECM_FOLDER_SE, conf.ECM_FOLDER_SE_HIL),
        }
        self.doc_base_map = {
            'AFC': 'SF5_CN',
--- a/src/apps/doc/ocr/wb.py
View file @9f4b364
+++ b/src/apps/doc/ocr/wb.py
View file @9f4b364
@@ -808,10 +808,12 @@ class BSWorkbook(Workbook):
            if field_str is not None:
                count_list.append((field_str, count))
-    def contract_rebuild(self, contract_result_dict):
+    def contract_rebuild(self, contract_result_dict, is_ca=False):
        for classify, contract_result in contract_result_dict.items():
            if len(contract_result) == 0:
                continue
+            if is_ca and classify not in consts.FSM_CONTRACT_CLASSIFY_SET:
+                continue
            ws = self.create_sheet(consts.CONTRACT_MAP.get(classify))
            for i in range(30):
                if str(i) in contract_result:
@@ -934,6 +936,7 @@ class BSWorkbook(Workbook):
        else:
            self.bs_rebuild(bs_summary, res_count_tuple, metadata)
            self.license_rebuild(license_summary, document_scheme, count_list)
+            self.contract_rebuild(contract_result, True)
        self.move_res_sheet()
        self.remove_base_sheet()
        return count_list, self.need_follow
--- a/src/apps/doc/views.py
View file @9f4b364
+++ b/src/apps/doc/views.py
View file @9f4b364
@@ -48,14 +48,23 @@ from .models import (
    MposReport,
    GenericOCRReport,
    InterfaceReport,
+    HILOCRResult,
+    HILSEOCRResult,
+    AFCOCRResult,
+    AFCSEOCRResult,
+    HILCmsStatusInfo,
+    AFCCmsStatusInfo
 )
-from .named_enum import ErrorType, AutoResult, WholeResult, RPAResult, SystemName
+from .named_enum import ErrorType, AutoResult, WholeResult, RPAResult, SystemName, RequestTeam
 from .mixins import DocHandler, MPOSHandler, PreSEHandler
 from . import consts
 from apps.account.authentication import OAuth2AuthenticationWithUser
-from celery_compare.tasks import compare
+from celery_compare.tasks import compare, fsm_compare
+from prese.compare import get_empty_result
 import time
 class CustomDate(fields.Date):
    def _deserialize(self, value, attr, data, **kwargs):
@@ -248,6 +257,7 @@ se_compare_content = {
    'fsmSpecialCar': fields.Boolean(required=False),
    'fsmBestPrice': fields.Boolean(required=False),
    'isAutoSettlement': fields.Boolean(required=False),
+    'fsmLandingDealer': fields.Str(required=False, validate=validate.Length(max=1024)),
    'individualCusInfo': fields.List(fields.Nested(se_individual_args),
                                     required=True, validate=validate.Length(min=1, max=4)),
@@ -551,6 +561,7 @@ class UploadDocView(GenericView, DocHandler):
    # authentication_classes = []
    permission_classes = [IsAuthenticated]
    authentication_classes = [OAuth2AuthenticationWithUser]
    # required_scopes = ['write']
    # 上传（接收）文件接口
@@ -563,6 +574,8 @@ class UploadDocView(GenericView, DocHandler):
        document = args.get('document')
        business_type = document.get('businessType')
        application_id = application_data.get('applicationId')
+        # 包含FSM 激活状态
+        application_status = application_data.get('applicationStatus', '')
        document_scheme = document.get('documentScheme')
        data_source = document.get('dataSource')
        document_name = document.get('documentName', '')
@@ -571,6 +584,34 @@ class UploadDocView(GenericView, DocHandler):
        data_source = self.fix_data_source(data_source)
        document_scheme = self.fix_scheme(document_scheme)
+        # fsm激活状态, 更新ocr_result 表fsm状态
+        self.running_log.info('[doc upload applicationId-{0}] [applicationStatus-{1}, activated-{2}]'
+                              .format(application_id, application_status,
+                                      True if consts.FSM_ACTIVITED_STATUS.get(application_status) else False))
+        if consts.FSM_ACTIVITED_STATUS.get(application_status):
+            result_class = None
+            if business_type == consts.HIL_PREFIX:
+                if document_scheme == RequestTeam.ACCEPTANCE.name:
+                    result_class = HILOCRResult
+                elif document_scheme == RequestTeam.SETTLEMENT.name or document_scheme == RequestTeam.INSURANCE.name:
+                    result_class = HILSEOCRResult
+            elif business_type == consts.AFC_PREFIX:
+                if document_scheme == RequestTeam.ACCEPTANCE.name:
+                    result_class = AFCOCRResult
+                elif document_scheme == RequestTeam.SETTLEMENT.name or document_scheme == RequestTeam.INSURANCE.name:
+                    result_class = AFCSEOCRResult
+            ocr_result_obj = result_class.objects.filter(application_id=application_id).first()
+            if ocr_result_obj:
+                ocr_result_obj.fsm_activited = 1
+                ocr_result_obj.save()
+            else:
+                ocr_result_obj = result_class()
+                ocr_result_obj.application_id = application_id
+                ocr_result_obj.fsm_activited = 1
+                ocr_result_obj.save()
+        self.running_log.info('[doc upload applicationId-{0}] [ocr result saved]'.format(application_id))
        if data_source == consts.DATA_SOURCE_LIST[1]:
            if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'):
                self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args))
@@ -602,13 +643,22 @@ class UploadDocView(GenericView, DocHandler):
        is_zip = False
        classify_1 = 0
-        # 电子合同
+        # 电子合同 Econtract or OVP(FSM)
-        if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]:
+        if data_source == consts.DATA_SOURCE_LIST[2] or data_source == consts.DATA_SOURCE_LIST[3]:  
+            if document_scheme == consts.DOC_SCHEME_LIST[1]:
                for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix):
                    if keyword in document_name:
                        classify_1 = classify_1_tmp
                        break
-        elif document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \
+        # FSM合同：WEP/MSI/SC
+        elif data_source == consts.DATA_SOURCE_LIST[0] and document_scheme == consts.DOC_SCHEME_LIST[0]:
+            for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix):
+                if keyword in document_name:
+                    classify_1 = classify_1_tmp
+                    break 
+        if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \
                or document_name.endswith('.RAR'):
            is_zip = True
@@ -809,6 +859,9 @@ class CompareView(GenericView):
        '''
+pre_fsm_url = conf.PRE_FSM_URL
 class SECompareView(GenericView, PreSEHandler):
    permission_classes = [IsAuthenticated]
    authentication_classes = [OAuth2AuthenticationWithUser]
@@ -829,7 +882,52 @@ class SECompareView(GenericView, PreSEHandler):
        fsm_flag = content.get('fsmFlag', False)
        fsm_special_car = content.get('fsmSpecialCar', False)
        fsm_best_price = content.get('fsmBestPrice', False)
+        fsm_landing_dealer = content.get('fsmLandingDealer')
+        if fsm_special_car:
+            compare_result = {
+                "is_pass": False,
+                "particulars": [{
+                    "object_name": "",
+                    "fields": [{
+                        "input": "",
+                        "ocr": "",
+                        "field_is_pass": False,
+                        "comments": "此申请为FSM 特殊申请，暂不支持预放款流程"
+                    }]
+                }]
+            }
+        elif fsm_best_price:
+            compare_result = {
+                "is_pass": False,
+                "particulars": [{
+                    "object_name": "",
+                    "fields": [{
+                        "input": "",
+                        "ocr": "",
+                        "field_is_pass": False,
+                        "comments": "此申请为FSM 特殊申请，暂不支持预放款流程"
+                    }]
+                }]
+            }
+        elif fsm_flag and (not fsm_special_car or not fsm_best_price):
+            # 调用Java pre fsm接口
+            try:
+                self.running_log.info("{0} request java pre fsm api, url:{1}, body:{2}".format(log_base, pre_fsm_url, json.dumps(content)))
+                headers = {
+                    'Content-Type': 'application/json'
+                }
+                resp = requests.post(pre_fsm_url, headers=headers, json=content)
+                self.running_log.info("{0} response from java pre fsm api, resp:{1}".format(log_base, resp.text))
+                result = json.loads(resp.text)
+                compare_result = result.get("result")
+                if not compare_result:
+                    compare_result = get_empty_result()
+            except Exception as e:
+                self.running_log.error("{0} pre fsm request to java error, url:{1}, param:{2}, errorMsg:{3}".format(
+                    log_base, pre_fsm_url, json.dumps(content), traceback.format_exc()))
+                compare_result = get_empty_result()
+        elif not fsm_flag:
            # 存库, 用于银行卡比对
            try:
                bank_class = HILbankVerification if business_type in consts.HIL_SET else AFCbankVerification
@@ -853,7 +951,8 @@ class SECompareView(GenericView, PreSEHandler):
            # preSettlement比对
            compare_result = self.pre_compare_entrance(content)
            self.running_log.info('{0} [prese completed] [applicationEntity={1}] [application_id={2}] [uniq_seq={3}] '
-                              '[result={4}]'.format(log_base, business_type, application_id, uniq_seq, compare_result))
+                                  '[result={4}]'.format(log_base, business_type, application_id, uniq_seq,
+                                                        compare_result))
        try:
            end_time = time.time()
@@ -956,10 +1055,10 @@ class DocView(GenericView, DocHandler):
        application_id_query = Q(application_id__contains=application_id) if application_id is not None else Q()
        data_source_query = Q(data_source=data_source) if data_source is not None else Q()
        upload_finish_time_query = Q(upload_finish_time__gte=upload_time_start,
-                                     upload_finish_time__lt=upload_time_end + datetime.timedelta(days=1))\
+                                     upload_finish_time__lt=upload_time_end + datetime.timedelta(days=1)) \
            if upload_time_start is not None and upload_time_end is not None else Q()
        create_time_query = Q(create_time__gte=create_time_start,
-                              create_time__lt=create_time_end + datetime.timedelta(days=1))\
+                              create_time__lt=create_time_end + datetime.timedelta(days=1)) \
            if create_time_start is not None and create_time_end is not None else Q()
        query = application_id_query & status_query & data_source_query & upload_finish_time_query & create_time_query
        val_tuple = ('id', 'application_id', 'upload_finish_time', 'create_time', 'document_scheme', 'data_source',
@@ -971,10 +1070,11 @@ class DocView(GenericView, DocHandler):
        if start_index >= total > 0:
            raise self.invalid_params('页数不存在')
-        doc_queryset = doc_class.objects.filter(query).values(*val_tuple).order_by('-create_time')[start_index: end_index]
+        doc_queryset = doc_class.objects.filter(query).values(*val_tuple).order_by('-create_time')[
+                       start_index: end_index]
        # doc_list = self.get_doc_list(doc_queryset, prefix)
        for doc_dict in doc_queryset:
-            tmp_scheme = consts.COMPARE_DOC_SCHEME_LIST[0] if doc_dict['document_scheme'] == consts.DOC_SCHEME_LIST[0]\
+            tmp_scheme = consts.COMPARE_DOC_SCHEME_LIST[0] if doc_dict['document_scheme'] == consts.DOC_SCHEME_LIST[0] \
                else consts.COMPARE_DOC_SCHEME_LIST[1]
            application_link = '{0}/showList/showList?entity={1}&scheme={2}&case_id={3}'.format(
                conf.BASE_URL, prefix, tmp_scheme, doc_dict['application_id'])
@@ -1021,7 +1121,6 @@ class DocView(GenericView, DocHandler):
            #     os.remove(tmp_save_path)
            #     raise self.invalid_params(msg='invalid params: PDF file XSS')
        file.close()
        # 1. 上传信息记录
        application_id = '{0}{1}'.format(consts.FIXED_APPLICATION_ID_PREFIX, metadata_version_id)
@@ -1104,7 +1203,8 @@ class CompareResultView(GenericView):
                latest_compared_time = ''
            else:
                whole_result = consts.RESULT_Y if result_obj.ocr_auto_result_pass else consts.RESULT_N
-                latest_compared_time = '' if result_obj.ocr_latest_comparison_time is None else result_obj.ocr_latest_comparison_time.strftime('%Y-%m-%d %H:%M')
+                latest_compared_time = '' if result_obj.ocr_latest_comparison_time is None else result_obj.ocr_latest_comparison_time.strftime(
+                    '%Y-%m-%d %H:%M')
            source = consts.INFO_SOURCE[1]
            version = comments = ''
@@ -1120,7 +1220,8 @@ class CompareResultView(GenericView):
                'source': source,
                'version': version,
                'comments': comments,
-                'result': [] if result_obj is None or not result_obj.ocr_auto_result else json.loads(result_obj.ocr_auto_result)
+                'result': [] if result_obj is None or not result_obj.ocr_auto_result else json.loads(
+                    result_obj.ocr_auto_result)
            }
            return response.ok(data=compare_result)
@@ -1155,7 +1256,8 @@ class CompareResultView(GenericView):
            'id': 0 if result_obj is None else result_obj.id,
            'application_id': case_id,
            'entity': entity,
-            'scheme': consts.DOC_SCHEME_LIST[0] if scheme == consts.COMPARE_DOC_SCHEME_LIST[0] else consts.DOC_SCHEME_LIST[1],
+            'scheme': consts.DOC_SCHEME_LIST[0] if scheme == consts.COMPARE_DOC_SCHEME_LIST[0] else
+            consts.DOC_SCHEME_LIST[1],
            'whole_result': whole_result,
            'latest_compared_time': '' if result_obj is None else result_obj.update_time.strftime('%Y-%m-%d %H:%M'),
            'source': source,
@@ -1328,7 +1430,8 @@ class SECMSView(GenericView):
        args = request.data
        cms_info = args.get('content', {})
-        business_type = consts.AFC_PREFIX if cms_info.get('financeCompany', '').startswith('宝马') else consts.HIL_PREFIX
+        business_type = consts.AFC_PREFIX if cms_info.get('financeCompany', '').startswith(
+            '宝马') else consts.HIL_PREFIX
        src_application_id = cms_info.get('settlemnetVerification', {}).get('applicationNo', '')
        application_id = src_application_id[:src_application_id.rfind('-')]
@@ -1363,6 +1466,32 @@ class SECMSView(GenericView):
            content=content_str,
        )
+        # 检查是否fsm流程(SE)
+        fsm_contract = cms_info.get('FSMContract', False)
+        fsm_best_price = cms_info.get('FSMBestPrice', False)
+        if fsm_contract:
+            # 记录fsm 流程的cms 提交
+            try:
+                cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo
+                cms_status_info = cms_status_class.objects.filter(application_id=application_id).first()
+                if cms_status_info:
+                    cms_status_info.is_fsm = 1
+                    cms_status_info.update_time = datetime.datetime.now()
+                    cms_status_info.save()
+                else:
+                    cms_status_info = cms_status_class()
+                    cms_status_info.application_id = application_id
+                    cms_status_info.business_type = business_type
+                    cms_status_info.is_fsm = 1
+                    cms_status_info.update_time = datetime.datetime.now()
+                    cms_status_info.create_time = datetime.datetime.now()
+                    cms_status_info.save()
+            except Exception as e:
+                self.exception_log.exception(
+                    '[cms view] [cms_status_info db save failed] [error={0}]'.format(traceback.format_exc()))
+            fsm_compare.apply_async((application_id, business_type, None, None, False, True),
+                                    queue='queue_compare')
+        else:
            # 触发比对
            compare.apply_async((application_id, business_type, None, None, False, True),
                                queue='queue_compare')
@@ -1458,7 +1587,7 @@ class AutoSettlementView(GenericView):
        whole_result_query = Q(ocr_whole_result_pass=whole_result) if not isinstance(whole_result, str) else Q()
        rpa_result_query = Q(rpa_result=rpa_result) if not isinstance(rpa_result, str) else Q()
        time1_query = Q(rpa_get_case_from_ocr_time__gte=get_case_from_ocr_time_start,
-                        rpa_get_case_from_ocr_time__lt=get_case_from_ocr_time_end + datetime.timedelta(days=1))\
+                        rpa_get_case_from_ocr_time__lt=get_case_from_ocr_time_end + datetime.timedelta(days=1)) \
            if get_case_from_ocr_time_start is not None and get_case_from_ocr_time_end is not None else Q()
        time2_query = Q(rpa_activated_time__gte=activated_time_start,
                        rpa_activated_time__lt=activated_time_end + datetime.timedelta(days=1)) \
--- a/src/celery_compare/tasks.py
View file @9f4b364
+++ b/src/celery_compare/tasks.py
View file @9f4b364
@@ -7,6 +7,9 @@ import traceback
 import numpy as np
 from datetime import datetime, timedelta
 from collections import OrderedDict
+import requests
 from . import app
 from settings import conf
 from apps.doc.models import (
@@ -3266,6 +3269,33 @@ def se_compare(application_id, application_entity, ocr_res_id, last_obj, ocr_res
 @app.task
+def fsm_compare(application_id, application_entity, uniq_seq, ocr_res_id, is_ca=True, is_cms=False):
+    compare_log.info('{0} [receive fsm task] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] [is_ca={5}] '
+                     '[is_cms={6}]'.format(log_base, application_entity, application_id, uniq_seq, ocr_res_id,
+                                           is_ca, is_cms))
+    # 调用java fsm 比对流程接口（http）
+    # 调用Java fsm 比对流程接口, fsm 是se流程, ca可以暂时忽略
+    url = conf.FSM_URL
+    body = {
+            'applicationId': application_id,
+            'businessType': application_entity,
+            'ocrResId': ocr_res_id,
+            'isCa': is_ca,
+            'isCms': is_cms
+    }
+    try:
+        compare_log.info("request java fsm api, url:{0}, body:{1}".format(url, json.dumps(body)))
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        resp = requests.post(url, headers=headers, json=body)
+        compare_log.info("response from fsm api, resp:{0}".format(resp.text))
+    except Exception as e:
+        compare_log.error("fsm full request to java error, url:{0}, param:{1}, errorMsg:{2}".format(
+            url, json.dumps(body), traceback.format_exc()))
+@app.task
 def compare(application_id, application_entity, uniq_seq, ocr_res_id, is_ca=True, is_cms=False):
    # POS: application_id, application_entity, uniq_seq, None
    # OCR: application_id, business_type(application_entity), None, ocr_res_id
--- a/src/common/electronic_afc_contract/afc_contract_ocr.py
View file @9f4b364
+++ b/src/common/electronic_afc_contract/afc_contract_ocr.py
View file @9f4b364
@@ -6,6 +6,7 @@
 # @Description   :
 from .get_char import Finder
+from .get_char_fsm import Finder as FSMFinder
 import numpy as np
@@ -23,7 +24,7 @@ def extract_info(ocr_results):
    return {'page_1': {'合同编号': contract_no}}
-def predict(pdf_info, is_qrs=False):
+def predict(pdf_info, is_qrs=False, is_fsm=False):
    ocr_results = {}
    for pno in pdf_info:
        ocr_results[pno] = {}
@@ -50,6 +51,9 @@ def predict(pdf_info, is_qrs=False):
        results = extract_info(ocr_results)
    else:
        # 输入是整个 PDF 中的信息
+        if is_fsm:
+            f = FSMFinder(pdf_info, ocr_results=ocr_results) 
+        else:
            f = Finder(pdf_info, ocr_results=ocr_results)
        results = f.get_info()
    return results
--- a/src/common/electronic_afc_contract/get_char_fsm.py 0 → 100644
View file @9f4b364
+++ b/src/common/electronic_afc_contract/get_char_fsm.py 0 → 100644
View file @9f4b364
+import re
+import numpy as np
+from fuzzywuzzy import fuzz
+from shapely.geometry import Polygon
+class Finder:
+    def __init__(self, pdf_info, ocr_results):
+        self.pdf_info = pdf_info
+        self.ocr_results = ocr_results
+        self.is_asp = False
+        self.item = {"words": None,
+                     "position": None,
+                    }
+    def gen_init_result(self, is_asp):
+        # 格式化算法输出
+        self.init_result = {"page_1": {"合同编号": self.item,
+                                       "所购车辆价格": self.item,
+                                       "车架号": self.item,
+                                       "贷款本金金额": {"大写": self.item,
+                                                      "小写": self.item,
+                                                      "车辆贷款本金金额": self.item,
+                                                      "附加产品融资贷款本金总金额": self.item,
+                                                     },
+                                       "贷款期限": self.item,
+                                       "附加产品融资贷款本金总金额明细": self.item,
+                                       "借款人签字及时间": self.item,
+                                      },
+                            "page_2": {"合同编号": self.item,
+                                       "借款人及抵押人": {"name": self.item,
+                                                       "id": self.item,
+                                                       },
+                                       "共同借款人及共同抵押人": {"name": self.item,
+                                                              "id": self.item,
+                                                              },
+                                       "保证人1": {"name": self.item,
+                                                  "id": self.item,
+                                                  },
+                                       "保证人2": {"name": self.item,
+                                                  "id": self.item,
+                                                  },
+                                       "所购车辆价格": self.item,
+                                       "车架号": self.item,
+                                       "经销商": self.item,
+                                       "贷款本金金额": {"大写": self.item,
+                                                      "小写": self.item,
+                                                      "车辆贷款本金金额": self.item,
+                                                      "附加产品融资贷款本金总金额": self.item,
+                                                      },
+                                       "贷款期限": self.item,
+                                       "标准利率": self.item,
+                                       "借款人收款账户": {"账号": self.item,
+                                                        "户名": self.item,
+                                                        "开户行": self.item,
+                                                        },
+                                       "还款账户": {"账号": self.item,
+                                                   "户名": self.item,
+                                                   "开户行": self.item,
+                                                  },
+                                      },
+                            "page_3": {"合同编号": self.item,
+                                       "还款计划表": self.item,
+                                       "车辆代理商": self.item,
+                                      },
+                            "page_4": {"合同编号": self.item,
+                                       "附加产品融资贷款本金总金额明细": self.item,
+                                      },
+                            "page_5": {"合同编号": self.item,
+                                      },
+                            "page_6": {"合同编号": self.item,
+                                      },
+                            }
+        if self.is_asp:
+            self.init_result["page_7"] = {"合同编号": self.item,
+                                            }
+            self.init_result["page_8"] = {"合同编号": self.item,
+                                            "主借人签字": {"签字": self.item,
+                                                        "日期": self.item,
+                                                        },
+                                            "共借人签字": {"签字": self.item,
+                                                        "日期": self.item,
+                                                        },
+                                            "保证人1签字": {"签字": self.item,
+                                                        "日期": self.item,
+                                                        },
+                                            "保证人2签字": {"签字": self.item,
+                                                        "日期": self.item,
+                                                        },
+                                            "见证人签字": {"签字": self.item,
+                                                        "日期": self.item,
+                                                        },
+                                            }
+        else:
+            self.init_result["page_7"] = {"合同编号": self.item,
+                                            "主借人签字": {"签字": self.item,
+                                                        "日期": self.item,
+                                                        },
+                                            "共借人签字": {"签字": self.item,
+                                                        "日期": self.item,
+                                                        },
+                                            "保证人1签字": {"签字": self.item,
+                                                        "日期": self.item,
+                                                        },
+                                            "保证人2签字": {"签字": self.item,
+                                                        "日期": self.item,
+                                                        },
+                                            "见证人签字": {"签字": self.item,
+                                                        "日期": self.item,
+                                                        },
+                                            }
+    def get_top_iou(self, poly, ocr_result):
+        """传入一个多边形, 找到与之最匹配的多边形
+        Args:
+            poly (TYPE): Description
+        """
+        iou_list = []
+        for key in ocr_result:
+            bbox, text = ocr_result[key]
+            g = Polygon(np.array(bbox).reshape((-1, 2)))
+            p = Polygon(np.array(poly).reshape((-1, 2)))
+            if not g.is_valid or not p.is_valid:
+                continue
+            inter = Polygon(g).intersection(Polygon(p)).area
+            union = g.area + p.area - inter
+            iou = inter/union
+            iou_list.append([iou, key])
+        if len(iou_list) == 0:
+            return -1, -1
+        top_iou = sorted(iou_list, key=lambda x: x[0])[-1]
+        return top_iou
+    def poly_to_rectangle(self, poly):
+        xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax = poly
+        bbox = [xmin, ymin, xmax, ymax]
+        return bbox
+    def get_contract_no(self, page_num):
+        """传入页码,查看该页码右上角的编号
+        Args:
+            page_num (string): 
+        Returns:
+            sting: 
+        """
+        contract_no = self.item.copy()
+        # contract_no['words'] = ''
+        # contract_no['position'] = [-1, -1, -1, -1]
+        # 只看第一页
+        for key in self.ocr_results[page_num]:
+            bbox, text = self.ocr_results[page_num][key]
+            if '合同编号:' in text:
+                words = text.split(':')[-1]
+                location = self.poly_to_rectangle(bbox)
+                contract_no['words'] = words
+                contract_no['position'] = location
+        return contract_no
+    def get_vehicle_price(self, page_num='0'):
+        vehicle_price = self.item.copy()
+        # vehicle_price['words'] = ''
+        # vehicle_price['position'] = [-1, -1, -1, -1]
+        for key in self.ocr_results[page_num]:
+            bbox, text = self.ocr_results[page_num][key]
+            if '所购车辆价格为人民币' in text:
+                words = text.split('币')[-1]
+                location = self.poly_to_rectangle(bbox)
+                vehicle_price['words'] = words
+                vehicle_price['position'] = location
+        return vehicle_price
+    def get_vin(self, page_num='0'):
+        vin = self.item.copy()
+        # vin['words'] = ''
+        # vin['position'] = [-1, -1, -1, -1]
+        for key in self.ocr_results[page_num]:
+            bbox, text = self.ocr_results[page_num][key]
+            if '车架号:' in text:
+                words = text.split(':')[-1]
+                location = self.poly_to_rectangle(bbox)
+                vin['words'] = words
+                vin['position'] = location
+        return vin
+    def get_loan_principal(self, page_num='0'):
+        chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾',
+                            '佰', '仟', '万', '亿', '元', '角', '分', '零', '整']
+        upper = self.item.copy()
+        lower = self.item.copy()
+        asp_1 = self.item.copy()
+        asp_2 = self.item.copy()
+        anchor_bbox = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if fuzz.ratio(''.join(chinese_keywords), text) > 15:
+                        text = text.split('：')[-1].strip()
+                        upper['position'] = bbox
+                        upper['words'] = text
+                    if '小写：¥' in text:
+                        words = text.split('¥')[-1].strip()
+                        lower['position'] = bbox
+                        lower['words'] = words
+                    if '附加产品融资贷款本金总金额' == text:
+                        anchor_bbox = bbox
+        if anchor_bbox:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if np.mean(bbox[1::2]) < np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
+                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
+                            asp_1['position'] = bbox
+                            asp_1['words'] = words
+                        if np.mean(bbox[1::2]) > np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
+                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
+                            asp_2['position'] = bbox
+                            asp_2['words'] = words
+        return upper, lower, asp_1, asp_2
+    def get_loan_term(self, page_num='0'):
+        loan_term = self.item.copy()
+        all_text = ''
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    all_text += text
+        matchs = re.search(r'贷款期限(\d+)个月', all_text)
+        if matchs:
+            words = matchs.group(1)
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if f'{words}个月' in text:
+                            loan_term['position'] = bbox
+                            loan_term['words'] = words
+        return loan_term
+    def get_standard_rate(self, page_num='0'):
+        standard_rate = self.item.copy()
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    matchs = re.search(r'本合同当期的标准利率为(\S+)%/年', text)
+                    if matchs:
+                        standard_rate['position'] = bbox
+                        standard_rate['words'] = matchs.group(1)
+        return standard_rate
+    def mergelist(self, text_list):
+        pattern = re.compile("[^\u4e00-\u9fa5]")        # 匹配不是中文的其他字符
+        mergeindex = -1
+        for index, i in enumerate(text_list):
+            if '所购' in i and len(pattern.sub('', pattern.sub('', text_list[index+1]))) != 0:
+            # if '所购' in i and '.00' not in text_list[index+1]:
+                mergeindex = index
+        if mergeindex == -1:
+            return text_list
+        else:
+            new_text_list = text_list[:mergeindex] + [text_list[mergeindex] + text_list[mergeindex+1]] + text_list[mergeindex+2:]
+            return self.mergelist(new_text_list)   
+    def get_asp_details(self, page_num):
+        asp_details_table_term = self.item.copy()
+        asp_details_table = [['附加产品融资贷款本金总金额及贷款利率明细'], ['项目1', '用途总金额2', '贷款本金3']]
+        bbox_xm = None
+        bbox_ytzje = None
+        bbox_dkbj = None
+        bbox_total = None
+        for key in self.ocr_results[page_num]:
+            bbox, text = self.ocr_results[page_num][key]
+            if text == '项目1':
+                bbox_xm = bbox
+            if text == '用途总金额2':
+                bbox_ytzje = bbox
+            if text == '贷款本金3':
+                bbox_dkbj = bbox
+            if text in ['附加产品融资贷款本', '附加产品融资贷款本金', '附加产品融资贷']:
+                bbox_total = bbox
+        if bbox_xm:
+            for i in range(10):
+                rh = abs(bbox_xm[1]-bbox_xm[-1])
+                anchor = np.array(bbox_xm).reshape((-1 ,2))
+                anchor[:, 1] += int(rh*1.4)
+                _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num])
+                if _iou > 0:
+                    bbox, xm_text = self.ocr_results[page_num][_key]
+                    bbox_xm = bbox
+                    # 解决项目内容是两行的问题
+                    if not '所购' in xm_text:
+                        line = asp_details_table[-1]
+                        line[0] += xm_text
+                        asp_details_table[-1] = line
+                        continue
+                    # print(xm_text)
+                    anchor_1 = [bbox_ytzje[0], bbox[1], bbox_ytzje[2], bbox[3],
+                                bbox_ytzje[4], bbox[5], bbox_ytzje[6], bbox[7]]
+                    _iou, _key = self.get_top_iou(poly=anchor_1, ocr_result=self.ocr_results[page_num])
+                    bbox, ytzje_text = self.ocr_results[page_num][_key]
+                    # print(ytzje_text)
+                    anchor_2 = [bbox_dkbj[0], bbox[1], bbox_dkbj[2], bbox[3],
+                                bbox_dkbj[4], bbox[5], bbox_dkbj[6], bbox[7]]
+                    _iou, _key = self.get_top_iou(poly=anchor_2, ocr_result=self.ocr_results[page_num])
+                    bbox, dkbj_text = self.ocr_results[page_num][_key]
+                    # print(dkbj_text)
+                    if xm_text == ytzje_text:
+                        xm_text, ytzje_text = xm_text.split(' ')
+                    line = [xm_text, ytzje_text, dkbj_text]
+                    asp_details_table.append(line)
+                else:
+                    break
+        if bbox_total:
+            anchor = [bbox_dkbj[0], bbox_total[1], bbox_dkbj[2], bbox_total[3],
+                    bbox_dkbj[4], bbox_total[5], bbox_dkbj[6], bbox_total[7]]
+            _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num])
+            bbox, total_text = self.ocr_results[page_num][_key]
+            asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text])
+        asp_details_table_term['words'] = asp_details_table
+        return asp_details_table_term
+    def get_signature(self):
+        signature = self.item.copy()
+        for block in self.pdf_info['0']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '签署日期' in text:
+                        words = text
+                        signature['words'] = words
+                        signature['position'] = bbox
+        return signature
+    def get_somebody(self, top, bottom):
+        # 指定上下边界后,返回上下边界内的客户信息
+        _name = self.item.copy()
+        _id = self.item.copy()
+        # 只看第一页，先划定上下边界
+        y_top = 0
+        y_bottom = 0
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if top in text:
+                        y_top = bbox[3]
+                    if bottom in text:
+                        y_bottom = bbox[3]
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if y_top < bbox[3] < y_bottom:
+                        # print(top, bottom, text)
+                        if '姓名/名称' in text:
+                            words = text.split('：')[-1]
+                            _name['position'] = bbox
+                            _name['words'] = words
+                        if '自然人身份证件号码/法人执照号码' in text:
+                            words = text.split('：')[-1]
+                            _id['position'] = bbox
+                            _id['words'] = words
+        return _name, _id
+    def get_seller(self):
+        seller = self.item.copy()
+        # 先找到 key
+        anchor_bbox = None
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if text in ['经销商', '车辆销售方']:
+                        anchor_bbox = bbox
+        # 当找到了 key, 则根据 key 去匹配 value
+        if anchor_bbox:
+            half_width = self.pdf_info['1']['width'] * 0.5
+            for block in self.pdf_info['1']['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \
+                            anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]:
+                            seller['position'] = bbox
+                            seller['words'] = text
+        return seller
+    def get_cldls(self):
+        seller = self.item.copy()
+        # 先找到 key
+        anchor_bbox = None
+        for block in self.pdf_info['2']['blocks']:
+            if anchor_bbox is not None:
+                break
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                if anchor_bbox is not None:
+                    break
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if text.strip() == '车辆代理商':
+                        anchor_bbox = bbox
+        # print(anchor_bbox)
+        # 当找到了 key, 则根据 key 去匹配 value
+        if anchor_bbox:
+            half_width = self.pdf_info['2']['width'] * 0.5
+            for block in self.pdf_info['2']['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \
+                            anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]:
+                            seller['position'] = bbox
+                            seller['words'] = text
+                            return seller
+        return seller
+    def get_borrower_collection_account(self):
+        account = self.item.copy()
+        account_name = self.item.copy()
+        account_bank = self.item.copy()
+        all_text = ''
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    all_text += text
+        # 首先确定账户信息是哪种,我们只输出非另行通知的格式
+        if '借款人收款账户' in all_text:
+            all_text = all_text.replace('　', '').replace(' ', '')
+            matchs_1 = re.findall(r'账号：(.*?)户名', all_text)
+            if matchs_1:
+                words = matchs_1[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account['position'] = bbox
+                                account['words'] = words
+            matchs_2 = re.findall(r'户名：(.*?)开户行', all_text)
+            if matchs_2:
+                words = matchs_2[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account_name['position'] = bbox
+                                account_name['words'] = words
+            matchs_3 = re.findall(r'开户行：(.*?)借款人', all_text)
+            if matchs_3:
+                words = matchs_3[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account_bank['position'] = bbox
+                                account_bank['words'] = words
+        return account, account_name, account_bank
+    def get_payback_account(self):
+        account = self.item.copy()
+        account_name = self.item.copy()
+        account_bank = self.item.copy()
+        all_text = ''
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    all_text += text
+        # 首先确定账户信息是哪种,我们只输出非另行通知的格式
+        if '(13) 还款账户' in all_text:
+            all_text = all_text.split('(13) 还款账户')[-1]
+            all_text = all_text.replace('　', '').replace(' ', '')
+            matchs_1 = re.findall(r'账号：(.*?)户名', all_text)
+            if matchs_1:
+                words = matchs_1[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account['position'] = bbox
+                                account['words'] = words
+            matchs_2 = re.findall(r'户名：(.*?)开户行', all_text)
+            if matchs_2:
+                words = matchs_2[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account_name['position'] = bbox
+                                account_name['words'] = words
+            matchs_3 = re.findall(r'开户行：(.*?)；', all_text)
+            if matchs_3:
+                words = matchs_3[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'开户行：{words}；' in text.replace('　', ''):
+                                account_bank['position'] = bbox
+                                account_bank['words'] = words
+        return account, account_name, account_bank
+    def get_repayment_schedule(self):
+        repayment_schedule = self.item.copy()
+        # 只看第二页
+        repayment_schedule_table = []
+        repayment_schedule_text_list = []
+        table = False
+        for block in self.pdf_info['2']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '序号' == text:
+                        table = True
+                    if '以上表格中所列的序号并非还款期数' in text:
+                        table = False
+                    if table == True:
+                        repayment_schedule_text_list.append(text)
+        for i in range(len(repayment_schedule_text_list)//5):
+            line = []
+            # 5表示5列的意思
+            for j in range(5):
+                line.append(repayment_schedule_text_list[i*5+j])
+            if str(i+1) == line[1]:
+                break
+            repayment_schedule_table.append(line)
+        if len(repayment_schedule_table) > 0:
+            repayment_schedule['words'] = repayment_schedule_table
+        return repayment_schedule
+    def get_signature_role_1(self):
+        signature_role_1 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '借款人(抵押人)' in text:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_1['page_num'] = page_num
+        signature_role_1['position'] = position
+        signature_role_1['words'] = words
+        return signature_role_1
+    def get_signature_role_2(self):
+        signature_role_2 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '共同借款人(共同抵押人)' in text:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_2['page_num'] = page_num
+        signature_role_2['position'] = position
+        signature_role_2['words'] = words
+        return signature_role_2
+    def get_signature_role_3(self):
+        signature_role_3 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '保证人1' in text and int(i) != 0:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_3['page_num'] = page_num
+        signature_role_3['position'] = position
+        signature_role_3['words'] = words
+        return signature_role_3
+    def get_signature_role_4(self):
+        signature_role_4 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '保证人2' in text and int(i) != 0:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_4['page_num'] = page_num
+        signature_role_4['position'] = position
+        signature_role_4['words'] = words
+        return signature_role_4
+    def get_signature_role_5(self):
+        signature_role_5 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '见证人签字' in text and int(i) != 0:
+                            region = True
+                        if '年' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        print(texts)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_5['page_num'] = page_num
+        signature_role_5['position'] = position
+        signature_role_5['words'] = words
+        return signature_role_5
+    def get_last_page_signature(self, page_num, top, bottom):
+        signature_name = self.item.copy()
+        signature_date = self.item.copy()
+        anchor_top = None
+        anchor_bottom = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if top in text:
+                        anchor_top = bbox[1]
+                    if bottom in text:
+                        anchor_bottom = bbox[1]
+        # print(top, anchor_top, anchor_bottom)
+        if anchor_top is not None and anchor_bottom is not None:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
+                            name = text.split(' ')[0]
+                            date = text.split(':')[-1]
+                            signature_name['words'] = name
+                            signature_name['position'] = bbox
+                            signature_date['words'] = date
+                            signature_date['position'] = bbox
+        return signature_name, signature_date
+    def get_info(self):
+        """
+            block['type'] == 0 : 表示该元素为图片
+        Returns:
+            dict: Description
+        """
+        # 先判断是否为 ASP 产品
+        # 只看第一页，判断是否有 '附加产品融资贷款本金总金额' 这一句话，若有则为 ASP 产品
+        # print(self.pdf_info['0']['blocks'])
+        # for block in self.pdf_info['0']['blocks']:
+        #     if block['type'] != 0:
+        #         continue
+        #     for line in block['lines']:
+        #         for span in line['spans']:
+        #             bbox, text = span['bbox'], span['text']
+        #             if '附加产品融资贷款本金总金额' == text:
+        #                 self.is_asp = True
+        for key in self.ocr_results['0']:
+            bbox, text = self.ocr_results['0'][key]
+            if '附加产品融资贷款本金总金额' in text:
+                self.is_asp = True
+        self.gen_init_result(self.is_asp)
+        if len(list(self.ocr_results.keys())) <= 8:             # 8.5 版本客户提供的样本出现串页的情况，暂时无法识别
+            # Page 1
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='0')
+            # print(contract_no)
+            self.init_result['page_1']['合同编号'] = contract_no
+            # 所购车辆价格
+            vehicle_price = self.get_vehicle_price()
+            # print(vehicle_price)
+            self.init_result['page_1']['所购车辆价格'] = vehicle_price
+            # 车架号
+            vin = self.get_vin()
+            # print(vin)
+            self.init_result['page_1']['车架号'] = vin
+            # 贷款本金金额(如果是 ASP产品)则'贷款本金金额'项目中包含'车辆贷款本金金额'和'附加产品融资贷款本金总金额'两个项目
+            upper, lower, asp_1, asp_2 = self.get_loan_principal()
+            # print(upper, lower, asp_1, asp_2)
+            self.init_result['page_1']['贷款本金金额']['大写'] = upper
+            self.init_result['page_1']['贷款本金金额']['小写'] = lower
+            self.init_result['page_1']['贷款本金金额']['车辆贷款本金金额'] = asp_1
+            self.init_result['page_1']['贷款本金金额']['附加产品融资贷款本金总金额'] = asp_2
+            # 贷款期限
+            loan_term = self.get_loan_term()
+            # print(loan_term)
+            self.init_result['page_1']['贷款期限'] = loan_term
+            # 附加产品融资贷款本金总金额明细（ASP-表格）
+            asp_details_table = self.get_asp_details(page_num='0')
+            # print(asp_details_table)
+            self.init_result['page_1']['附加产品融资贷款本金总金额明细'] = asp_details_table
+            # 借款人签字及时间
+            signature = self.get_signature()
+            # print(signature)
+            self.init_result['page_1']['借款人签字及时间'] = signature
+            #######################################
+            # Page 2
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='0')
+            # print(contract_no)
+            self.init_result['page_2']['合同编号'] = contract_no
+            # 找借款人及抵押人(地址字段原本有空格)
+            borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人：', bottom='共同借款人：')
+            # 这是为了同时兼容 8.1 版本
+            if borrower_name['words'] == None:
+                borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人：', bottom='共同借款人及共同抵押人：')
+            # 这是为了兼容车贷分离版本
+            if borrower_name['words'] == None:
+                borrower_name, borrower_id = self.get_somebody(top='借款人：', bottom='共同借款人及抵押人：')
+            # print(borrower_name, borrower_id)
+            self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name
+            self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id
+            # 找共同借款人及共同抵押人
+            co_borrower_name, co_borrower_id = self.get_somebody(top='共同借款人：', bottom='保证人1：')
+            # print(co_borrower_name, co_borrower_id)
+            self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name
+            self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id
+            # 保证人1
+            first_guarantor_name, first_guarantor_id = self.get_somebody(top='保证人1：', bottom='保证人2：')
+            self.init_result['page_2']['保证人1']['name'] = first_guarantor_name
+            self.init_result['page_2']['保证人1']['id'] = first_guarantor_id
+            # 保证人2
+            second_guarantor_name, second_guarantor_id = self.get_somebody(top='保证人2：', bottom='第一章')
+            self.init_result['page_2']['保证人2']['name'] = second_guarantor_name
+            self.init_result['page_2']['保证人2']['id'] = second_guarantor_id
+            # 所购车辆价格
+            vehicle_price = self.get_vehicle_price(page_num='1')
+            # print(vehicle_price)
+            self.init_result['page_2']['所购车辆价格'] = vehicle_price
+            # 车架号
+            vin = self.get_vin(page_num='1')
+            # print(vin)
+            self.init_result['page_2']['车架号'] = vin
+            # 经销商
+            seller = self.get_seller()
+            # print(seller)
+            self.init_result['page_2']['经销商'] = seller
+            # 贷款本金金额(如果是 ASP产品)则'贷款本金金额'项目中包含'车辆贷款本金金额'和'附加产品融资贷款本金总金额'两个项目
+            upper, lower, asp_1, asp_2 = self.get_loan_principal(page_num='1')
+            # print(upper, lower, asp_1, asp_2)
+            self.init_result['page_2']['贷款本金金额']['大写'] = upper
+            self.init_result['page_2']['贷款本金金额']['小写'] = lower
+            self.init_result['page_2']['贷款本金金额']['车辆贷款本金金额'] = asp_1
+            self.init_result['page_2']['贷款本金金额']['附加产品融资贷款本金总金额'] = asp_2
+            # 贷款期限
+            loan_term = self.get_loan_term(page_num='1')
+            # print(loan_term)
+            self.init_result['page_2']['贷款期限'] = loan_term
+            # 本合同当期的标准利率
+            standard_rate = self.get_standard_rate(page_num='1')
+            # print(standard_rate)
+            self.init_result['page_2']['标准利率'] = standard_rate
+            # 202212 release 新增借款人收款账户
+            account, account_name, account_bank = self.get_borrower_collection_account()
+            # print(account, account_name, account_bank)
+            self.init_result['page_2']['借款人收款账户']['账号'] = account
+            self.init_result['page_2']['借款人收款账户']['户名'] = account_name
+            self.init_result['page_2']['借款人收款账户']['开户行'] = account_bank
+            # 还款账户
+            account, account_name, account_bank = self.get_payback_account()
+            # print(account, account_name, account_bank)
+            self.init_result['page_2']['还款账户']['账号'] = account
+            self.init_result['page_2']['还款账户']['户名'] = account_name
+            self.init_result['page_2']['还款账户']['开户行'] = account_bank
+            #######################################
+            # Page 3
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='2')
+            self.init_result['page_3']['合同编号'] = contract_no
+            # 还款计划表（表格）
+            repayment_schedule_table = self.get_repayment_schedule()
+            # print(repayment_schedule_table)
+            self.init_result['page_3']['还款计划表'] = repayment_schedule_table
+            # 车辆代理商
+            cldls = self.get_cldls() 
+            self.init_result['page_3']['车辆代理商'] = cldls
+            #######################################
+            # Page 4
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='3')
+            # print(contract_no)
+            self.init_result['page_4']['合同编号'] = contract_no
+            # 附加产品融资贷款本金总金额明细（ASP-表格）
+            asp_details_table = self.get_asp_details(page_num='3')
+            # print(asp_details_table)
+            self.init_result['page_4']['附加产品融资贷款本金总金额明细'] = asp_details_table
+            #######################################
+            # Page 5
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='4')
+            # print(contract_no)
+            self.init_result['page_5']['合同编号'] = contract_no
+            #######################################
+            # Page 6
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='5')
+            # print(contract_no)
+            self.init_result['page_6']['合同编号'] = contract_no
+            if self.is_asp:
+                # Page 7
+                # 找合同编号
+                contract_no = self.get_contract_no(page_num='6')
+                self.init_result['page_7']['合同编号'] = contract_no
+                # Page 8
+                # 找合同编号
+                contract_no = self.get_contract_no(page_num='7')
+                self.init_result['page_8']['合同编号'] = contract_no
+                signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                    top='合同编号', bottom='共同借款人')
+                if signature_name['words'] == None:
+                    signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                    top='合同编号', bottom='共同借款人（抵押人）')
+                # print(signature_name, signature_date)
+                self.init_result['page_8']['主借人签字']['签字'] = signature_name
+                self.init_result['page_8']['主借人签字']['日期'] = signature_date
+                signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                    top='共同借款人', bottom='保证人1')
+                if signature_name['words'] == None:
+                    signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                    top='共同借款人（抵押人）', bottom='保证人1')
+                # print(signature_name, signature_date)
+                self.init_result['page_8']['共借人签字']['签字'] = signature_name
+                self.init_result['page_8']['共借人签字']['日期'] = signature_date
+                signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                    top='保证人1', bottom='保证人2')
+                self.init_result['page_8']['保证人1签字']['签字'] = signature_name
+                self.init_result['page_8']['保证人1签字']['日期'] = signature_date
+                signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                    top='保证人2', bottom='在本人面前亲笔签署本合同')
+                self.init_result['page_8']['保证人2签字']['签字'] = signature_name
+                self.init_result['page_8']['保证人2签字']['日期'] = signature_date
+                signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                    top='在本人面前亲笔签署本合同', bottom='以下无正文')
+                # print(signature_name, signature_date)
+                self.init_result['page_8']['见证人签字']['签字'] = signature_name
+                self.init_result['page_8']['见证人签字']['日期'] = signature_date
+            else:
+                # Page 7
+                # 找合同编号
+                contract_no = self.get_contract_no(page_num='6')
+                self.init_result['page_7']['合同编号'] = contract_no
+                signature_name, signature_date = self.get_last_page_signature(page_num='6',
+                                                    top='合同编号', bottom='共同借款人')
+                if signature_name['words'] == None:
+                    signature_name, signature_date = self.get_last_page_signature(page_num='6',
+                                                    top='合同编号', bottom='共同借款人（抵押人）')
+                # print(signature_name, signature_date)
+                self.init_result['page_7']['主借人签字']['签字'] = signature_name
+                self.init_result['page_7']['主借人签字']['日期'] = signature_date
+                signature_name, signature_date = self.get_last_page_signature(page_num='6',
+                                                    top='共同借款人', bottom='保证人1')
+                if signature_name['words'] == None:
+                    signature_name, signature_date = self.get_last_page_signature(page_num='6',
+                                                    top='共同借款人（抵押人）', bottom='保证人1')
+                # print(signature_name, signature_date)
+                self.init_result['page_7']['共借人签字']['签字'] = signature_name
+                self.init_result['page_7']['共借人签字']['日期'] = signature_date
+                signature_name, signature_date = self.get_last_page_signature(page_num='6',
+                                                    top='保证人1', bottom='保证人2')
+                self.init_result['page_7']['保证人1签字']['签字'] = signature_name
+                self.init_result['page_7']['保证人1签字']['日期'] = signature_date
+                signature_name, signature_date = self.get_last_page_signature(page_num='6',
+                                                    top='保证人2', bottom='在本人面前亲笔签署本合同')
+                self.init_result['page_7']['保证人2签字']['签字'] = signature_name
+                self.init_result['page_7']['保证人2签字']['日期'] = signature_date
+                signature_name, signature_date = self.get_last_page_signature(page_num='6',
+                                                    top='在本人面前亲笔签署本合同', bottom='以下无正文')
+                # print(signature_name, signature_date)
+                self.init_result['page_7']['见证人签字']['签字'] = signature_name
+                self.init_result['page_7']['见证人签字']['日期'] = signature_date
+        # 重新定制输出
+        new_results = {"is_asp": self.is_asp,
+                       "page_info": self.init_result
+        }
+        return new_results
\ No newline at end of file
--- a/src/common/electronic_hil_contract/get_char_fsm.py 0 → 100644
View file @9f4b364
+++ b/src/common/electronic_hil_contract/get_char_fsm.py 0 → 100644
View file @9f4b364
+import re
+import numpy as np
+from fuzzywuzzy import fuzz
+from shapely.geometry import Polygon
+def caculate_iou(g, p):
+    g = Polygon(np.array(g).reshape((-1, 2)))
+    p = Polygon(np.array(p).reshape((-1, 2)))
+    inter = Polygon(g).intersection(Polygon(p)).area
+    union = g.area + p.area - inter
+    return inter/union
+def get_table_info(bbox_1, bbox_2, ocr_result):
+    anchor = [bbox_2[0], bbox_1[1], bbox_2[2], bbox_1[3],
+            bbox_2[4], bbox_1[5], bbox_2[6], bbox_1[7]]
+    table_info = ''
+    for span in ocr_result:
+        iou = caculate_iou(anchor, span[0])
+        if iou > 0:
+            table_info = span[1]
+    return table_info
+class Finder:
+    def __init__(self, pdf_info):
+        self.pdf_info = pdf_info
+        self.item = {"words": None,
+                     "page": None,
+                     "position": None,
+                    }
+        # 格式化算法输出
+        self.init_result = {"合同编号": self.item,
+                            "承租人-姓名": self.item,
+                            "承租人-证件号码": self.item,
+                            "承租人-法定代表人或授权代表": self.item,
+                            "共同承租人-姓名": self.item,
+                            "共同承租人-证件号码": self.item,
+                            "共同承租人-法定代表人或授权代表": self.item,
+                            "保证人1-姓名": self.item,
+                            "保证人1-证件号码": self.item,
+                            "保证人1-法定代表人或授权代表": self.item,
+                            "保证人2-姓名": self.item,
+                            "保证人2-证件号码": self.item,
+                            "保证人2-法定代表人或授权代表": self.item,
+                            "保证人3-姓名": self.item,
+                            "保证人3-证件号码": self.item,
+                            "保证人3-法定代表人或授权代表": self.item,
+                            "合同编号（正文）": self.item,
+                            "车辆识别代码": self.item,
+                            "车辆卖方（经销商）": self.item,
+                            "车辆代理商": self.item,
+                            "车辆原始销售价格（《机动车销售统一发票》所列金额）": self.item,
+                            "车辆附加产品明细表": self.item,
+                            "融资成本总额": self.item,
+                            "租期": self.item,
+                            "付款计划表": self.item,
+                            "收款银行账户-户名": self.item,
+                            "收款银行账户-银行账号": self.item,
+                            "收款银行账户-开户行": self.item,
+                            "银行账户-户名": self.item,
+                            "银行账户-银行账号": self.item,
+                            "银行账户-开户行": self.item,
+                            "签字页-承租人姓名": self.item,
+                            "签字页-承租人签章": self.item,
+                            "签字页-共同承租人姓名": self.item,
+                            "签字页-共同承租人签章": self.item,
+                            "签字页-保证人1姓名": self.item,
+                            "签字页-保证人1签章": self.item,
+                            "签字页-保证人2姓名": self.item,
+                            "签字页-保证人2签章": self.item,
+                            "签字页-保证人3姓名": self.item,
+                            "签字页-保证人3签章": self.item,
+        }
+        # 格式化输出 车辆处置协议 要是别的字段
+        self.init_result_1 = {"合同编号": self.item,
+                              "承租人-姓名": self.item,
+                              "承租人-证件号码": self.item,
+                              "销售经销商": self.item,
+                              "合同编号（正文）": self.item,
+                              "签字页-承租人姓名": self.item,
+                              "签字页-承租人证件号码": self.item,
+                              "签字页-承租人签章": self.item,
+                              "签字页-销售经销商": self.item,
+                              "签字页-销售经销商签章": self.item,
+        }
+        # 格式化输出 车辆租赁抵押合同
+        self.init_result_2 = {"合同编号": self.item,
+                              "合同编号（正文）": self.item,
+                              "抵押人姓名/名称": self.item,
+                              "抵押人证件号码": self.item,
+                              "抵押人配偶姓名/名称": self.item,
+                              "抵押人配偶证件号码": self.item,
+                              "车辆识别代码": self.item,
+                              "租金总额": self.item,
+                              "融资租赁期限": self.item,
+                              "签字页-抵押人姓名": self.item,
+                              "签字页-抵押人签章": self.item,
+                              "签字页-抵押人配偶姓名": self.item,
+                              "签字页-抵押人配偶签章": self.item,
+        }
+    def get_contract_no(self, page_num):
+        """传入页码,查看该页码右上角的编号
+        Args:
+            page_num (string): 
+        Returns:
+            sting: 
+        """
+        contract_no = self.item.copy()
+        # 只看第一页
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '合同编号：' in text:
+                        words = text.split('：')[-1]
+                        contract_no['position'] = bbox
+                        contract_no['page'] = page_num
+                        contract_no['words'] = words
+        if contract_no['words'] == '':
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if bbox[1] < contract_no['position'][3] and 'CH' in text:
+                            contract_no['position'] = bbox
+                            contract_no['page'] = page_num
+                            contract_no['words'] = text
+        return contract_no
+    def get_vehicle_price(self, page_num='0'):
+        vehicle_price = self.item.copy()
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '所购车辆价格为人民币' in text:
+                        words = text.split('币')[-1]
+                        vehicle_price['position'] = bbox
+                        vehicle_price['words'] = words
+        return vehicle_price
+    def get_contract_no_one(self):
+        # 查找正文中的合同编号,有可能存在换行的情况
+        contract_no = self.item.copy()
+        for pno in self.pdf_info:
+            all_text = ''
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        all_text += text
+            all_text = all_text.replace(' ', '')
+            matchObj = re.search(r'（合同编号：\[(.*?)\]）', all_text)
+            if matchObj:
+                words = matchObj.group(1)
+                contract_no['position'] = None
+                contract_no['page'] = pno
+                # contract_no['words'] = words
+                contract_no['words'] = re.sub("\s", "", words).replace("）", "")
+                return contract_no
+            matchObj = re.search(r'编号为(.*?)的', all_text)
+            if matchObj:
+                words = matchObj.group(1).strip()
+                contract_no['position'] = None
+                contract_no['page'] = pno
+                # contract_no['words'] = words
+                contract_no['words'] = re.sub("\s", "", words).replace("）", "")
+                return contract_no
+            matchObj = re.search(r'编号为(.*?)）的', all_text)
+            if matchObj:
+                words = matchObj.group(1).strip()
+                contract_no['position'] = None
+                contract_no['page'] = pno
+                # contract_no['words'] = words
+                contract_no['words'] = re.sub("\s", "", words)
+        return contract_no
+    def get_key_value(self, key, page_num=None):
+        value = self.item.copy()
+        if page_num is not None:
+            pno = page_num
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if key in text:
+                            words = text.split('：')[-1].replace("。", "")
+                            value['position'] = bbox
+                            value['page'] = pno
+                            # value['words'] = words
+                            value['words'] = re.sub("\s", "", words)
+        else:
+            for pno in self.pdf_info:
+                for block in self.pdf_info[pno]['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if key in text:
+                                # print(self.pdf_info[pno])
+                                words = text.split('：')[-1].replace("。", "")
+                                value['position'] = bbox
+                                value['page'] = pno
+                                # value['words'] = words
+                                value['words'] = re.sub("\s", "", words)
+        return value
+    def get_loan_principal(self, page_num='0'):
+        chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾',
+                            '佰', '仟', '万', '亿', '元', '角', '分', '零', '整']
+        upper = self.item.copy()
+        lower = self.item.copy()
+        asp_1 = self.item.copy()
+        asp_2 = self.item.copy()
+        anchor_bbox = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if fuzz.ratio(''.join(chinese_keywords), text) > 15:
+                        text = text.split('：')[-1].strip()
+                        upper['position'] = bbox
+                        upper['words'] = text
+                    if '小写：¥' in text:
+                        words = text.split('¥')[-1].strip()
+                        lower['position'] = bbox
+                        lower['words'] = words
+                    if '附加产品融资贷款本金总金额' == text:
+                        anchor_bbox = bbox
+        if anchor_bbox:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if np.mean(bbox[1::2]) < np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
+                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
+                            asp_1['position'] = bbox
+                            asp_1['words'] = words
+                        if np.mean(bbox[1::2]) > np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
+                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
+                            asp_2['position'] = bbox
+                            asp_2['words'] = words
+        return upper, lower, asp_1, asp_2
+    def get_loan_term(self, page_num='0'):
+        loan_term = self.item.copy()
+        all_text = ''
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    all_text += text
+        matchs = re.search(r'贷款期限(\d+)个月', all_text)
+        if matchs:
+            words = matchs.group(1)
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if f'{words}个月' in text:
+                            loan_term['position'] = bbox
+                            loan_term['words'] = words
+        return loan_term
+    def get_asp_details(self, page_num):
+        asp_details_table_term = self.item.copy()
+        asp_details_table = []
+        asp_details_text_list = []
+        table = False
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '附加产品融资贷款本金总金额明细' == text:
+                        table = True
+                    if '第二条' in text or '征信管理' in text:
+                        table = False
+                    if table == True:
+                        asp_details_text_list.append(text)
+        for i in range((len(asp_details_text_list)+2)//3):
+            line = []
+            if i == 0:
+                line = [asp_details_text_list[0]]
+            else:
+                for j in range(3):
+                    line.append(asp_details_text_list[i*3-2+j])
+            asp_details_table.append(line)
+        if len(asp_details_table) > 0:
+            asp_details_table_term['words'] = asp_details_table
+        return asp_details_table_term
+    def get_signature(self):
+        signature = self.item.copy()
+        for block in self.pdf_info['0']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '签署日期' in text:
+                        words = text
+                        signature['words'] = words
+                        signature['position'] = bbox
+        return signature
+    def get_somebody(self, top, bottom):
+        # 指定上下边界后,返回上下边界内的客户信息
+        _name = self.item.copy()
+        _id = self.item.copy()
+        # 只看第一页，先划定上下边界
+        y_top = 0
+        y_bottom = 0
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if top in text:
+                        y_top = bbox[3]
+                    if bottom in text:
+                        y_bottom = bbox[3]
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if y_top < bbox[3] < y_bottom:
+                        if '姓名/名称' in text:
+                            words = text.split('：')[-1]
+                            _name['position'] = bbox
+                            _name['words'] = words
+                        if '自然人身份证件号码/法人执照号码' in text:
+                            words = text.split('：')[-1]
+                            _id['position'] = bbox
+                            _id['words'] = words
+        return _name, _id
+    def get_seller(self):
+        seller = self.item.copy()
+        # 先找到 key
+        anchor_bbox = None
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '经销商' == text:
+                        anchor_bbox = bbox
+        # 当找到了 key, 则根据 key 去匹配 value
+        if anchor_bbox:
+            half_width = self.pdf_info['1']['width'] * 0.5
+            for block in self.pdf_info['1']['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \
+                            anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]:
+                            seller['position'] = bbox
+                            seller['words'] = text
+        return seller
+    def get_payback_account(self):
+        account = self.item.copy()
+        account_name = self.item.copy()
+        account_bank = self.item.copy()
+        all_text = ''
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    all_text += text
+        # 首先确定账户信息是哪种,我们只输出非另行通知的格式
+        if '☑账号' in all_text:
+            all_text = all_text.replace('　', '')
+            matchs_1 = re.findall(r'账号：(.*)户名', all_text)
+            if matchs_1:
+                words = matchs_1[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account['position'] = bbox
+                                account['words'] = words
+            matchs_2 = re.findall(r'户名：(.*)开户行', all_text)
+            if matchs_2:
+                words = matchs_2[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account_name['position'] = bbox
+                                account_name['words'] = words
+            matchs_3 = re.findall(r'开户行：(.*)；', all_text)
+            if matchs_3:
+                words = matchs_3[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'开户行：{words}；' in text.replace('　', ''):
+                                account_bank['position'] = bbox
+                                account_bank['words'] = words
+        return account, account_name, account_bank
+    def get_repayment_schedule(self):
+        repayment_schedule = self.item.copy()
+        repayment_schedule_text_list = []
+        table = False
+        page = None
+        left = 0
+        right = 0
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '剩余融资' in text:
+                            right = bbox[2]
+                        if '以上表格中所列序号' in text:
+                            table = False
+                        if table == True:
+                            # 过滤汉字
+                            if re.compile(r'[\u4e00-\u9fff]').search(text):
+                                continue
+                            # 过滤 1. - 61. 这些标题
+                            if re.findall("\d+", text):
+                                if len(re.findall("\d+", text)) == 1:
+                                    continue
+                            if not left < bbox[0] < right:
+                                continue
+                            repayment_schedule_text_list.append(text)
+                        if text.strip() == "61.":
+                            page = pno
+                            table = True
+                            left = bbox[0]
+        # print("repayment_schedule_text_list = ", repayment_schedule_text_list)
+        # repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']]
+        repayment_schedule_table = [['序号', '租金']]
+        for i in range(len(repayment_schedule_text_list)//4):
+            line = [f'{i+1}.']
+            # 4表示4列的意思
+            for j in range(4):
+                line.append(repayment_schedule_text_list[i*4+j])
+            # 只保留序号和租金列
+            line = [line[0].replace('.', ''), line[3]]
+            repayment_schedule_table.append(line)
+        repayment_schedule['words'] = repayment_schedule_table
+        repayment_schedule['page'] = page
+        return repayment_schedule
+    def get_signature_role_1(self):
+        signature_role_1 = self.item.copy()
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '签署日期' in text:
+                            signature_role_1['position'] = bbox
+                            signature_role_1['page'] = pno
+                            signature_role_1['words'] = text
+        return signature_role_1
+    def get_signature_role_2(self):
+        signature_role_2 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '共同借款人(共同抵押人)' in text:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_2['page_num'] = page_num
+        signature_role_2['position'] = position
+        signature_role_2['words'] = words
+        return signature_role_2
+    def get_signature_role_3(self):
+        signature_role_3 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '保证人1' in text and int(i) != 0:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_3['page_num'] = page_num
+        signature_role_3['position'] = position
+        signature_role_3['words'] = words
+        return signature_role_3
+    def get_signature_role_4(self):
+        signature_role_4 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '保证人2' in text and int(i) != 0:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_4['page_num'] = page_num
+        signature_role_4['position'] = position
+        signature_role_4['words'] = words
+        return signature_role_4
+    def get_signature_role_5(self):
+        signature_role_5 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '见证人签字' in text and int(i) != 0:
+                            region = True
+                        if '年' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        # print(texts)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_5['page_num'] = page_num
+        signature_role_5['position'] = position
+        signature_role_5['words'] = words
+        return signature_role_5
+    def get_last_page_signature(self, page_num, top, bottom):
+        signature_name = self.item.copy()
+        signature_date = self.item.copy()
+        anchor_top = None
+        anchor_bottom = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if top in text:
+                        anchor_top = bbox[1]
+                    if bottom in text:
+                        anchor_bottom = bbox[1]
+        if anchor_top is not None and anchor_bottom is not None:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
+                            name = text.split(' ')[0]
+                            date = text.split(':')[-1]
+                            signature_name['words'] = name
+                            signature_name['position'] = bbox
+                            signature_date['words'] = date
+                            signature_name['position'] = bbox
+        return signature_name, signature_date
+    def get_electronic_signature(self, top, bottom, t_pno=None):
+        signature = self.item.copy()
+        anchor_top = None
+        anchor_bottom = None
+        for pno in self.pdf_info:
+            if t_pno is not None and pno != t_pno:
+                continue
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if top in text:
+                            anchor_top = bbox[1]
+                        elif bottom in text and anchor_top is not None and bbox[3] > anchor_top:
+                            anchor_bottom = bbox[3] 
+        if anchor_top is not None and anchor_bottom is not None:
+            # print('in')
+            # print(anchor_top)
+            # print(anchor_bottom)
+            for pno in self.pdf_info:
+                if t_pno is not None and pno != t_pno:
+                    continue
+                for block in self.pdf_info[pno]['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            # ------------ #
+                            # print("--text = ", text)
+                            if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
+                                words = text
+                                signature['words'] = words
+                                signature['page'] = pno
+                                signature['position'] = bbox
+        return signature
+    def get_role_info(self, role_key, page_num='0'):
+        name = self.item.copy()
+        id_num = self.item.copy()
+        representative = self.item.copy()
+        # 以保证人3 的左上角为定位点
+        anchor = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    # 找到角色姓名
+                    if re.match('保证人3', text) is not None:
+                        anchor = [bbox[0], bbox[1]]
+        if anchor is not None:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        # 找到角色姓名
+                        if re.match(role_key, text) is not None:
+                            words = text.split('：')[-1]
+                            name['words'] = words
+                            name['page'] = page_num
+                            name['position'] = bbox
+                        if role_key == '承租人：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '保证人1：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '保证人2：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '保证人3：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+        return name, id_num, representative
+    def get_table_add_product(self):
+        table_add_product = self.item.copy()
+        add_product_page_num = None
+        for pno in self.pdf_info:
+            for block in self.pdf_info[f'{pno}']['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '车辆附加产品（明细见下表）' in text:
+                            add_product_page_num = pno
+        ocr_results = []
+        for block in self.pdf_info[f'{add_product_page_num}']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    xmin, ymin, xmax, ymax = bbox
+                    bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]
+                    ocr_results.append([bbox, text])
+        lines = [['项目', '购买价格', '实际融资金额']]
+        key_xm = None
+        key_gmjg = None
+        key_sjrzje = None
+        key_total = None
+        for index, span in enumerate(ocr_results):
+            if span[1] == '项目':
+                key_xm = index
+            if span[1] == '购买价格':
+                key_gmjg = index
+            if span[1] == '实际融资金额':
+                key_sjrzje = index
+            if span[1] == '总计':
+                key_total = index
+        bbox, text = ocr_results[key_xm]
+        rh = abs(bbox[1]-bbox[-1])
+        anchor = np.array(bbox).reshape((-1, 2))
+        anchor[:, 0] += 2*rh
+        anchor[:, 1] += rh
+        for i in range(5):
+            for span in ocr_results:
+                iou = caculate_iou(anchor, span[0])
+                if iou > 0.01 and span[1].strip() != '所购':
+                    x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results)
+                    y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results)
+                    line = [span[1].replace('\u3000', ' '), x, y]
+                    # print(line)
+                    lines.append(line)
+                    anchor = np.array(span[0]).reshape((-1, 2))
+                    anchor[:, 1] += rh
+        total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results)
+        lines.append(['总计', '', total])
+        # 所购　BMW悦然焕
+        # 新服务
+        # 所购　BMW5年10
+        # 万公里长悦保养套餐
+        # 所购　事故维修补偿
+        # 方案
+        # 所购 BMW5年10万公里
+        # 长悦保养套餐
+        # 所购 MINI4年6万公里长悦
+        # 保养套餐
+        filtered_lines = []
+        for line in lines:
+            if line[0][:2] not in ['所购', '项目', '总计']:
+                continue
+            if 'BMW悦然' in line[0]:
+                line[0] = '所购 BMW悦然焕新服务'
+            if 'BMW5年10' in line[0]:
+                line[0] = '所购 BMW5年10万公里长悦保养套餐'
+            if '事故维修补' in line[0]:
+                line[0] = '所购 事故维修补偿方案'
+            if 'MINI4年6万公里长悦' in line[0]:
+                line[0] = '所购 MINI4年6万公里长悦保养套餐'
+            filtered_lines.append(line)
+        table_add_product['words'] = filtered_lines
+        table_add_product['page'] = add_product_page_num
+        table_add_product['position'] = None
+        return table_add_product
+    def get_contract_no_dy(self):
+        # 查找抵押合同编号
+        contract_no = self.item.copy()
+        key_box = None
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '抵押合同编号' in text:
+                            key_box = bbox
+        if key_box is not None:
+            for pno in self.pdf_info:
+                for block in self.pdf_info[pno]['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and 'CH-' in text:
+                                contract_no['position'] = bbox
+                                contract_no['page'] = pno
+                                contract_no['words'] = text
+        return contract_no
+    def get_dyr_name_id(self):
+        name = self.item.copy()
+        _id = self.item.copy()
+        key_box = None
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if text == '抵押人':
+                           key_box = bbox
+        if key_box is not None:
+            rh = abs(key_box[1]-key_box[3])
+            for pno in self.pdf_info:
+                for block in self.pdf_info[pno]['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text:
+                                words = text.split('：')[-1]
+                                name['position'] = bbox
+                                name['page'] = pno
+                                name['words'] = words
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text:
+                                words = text.split('：')[-1]
+                                _id['position'] = bbox
+                                _id['page'] = pno
+                                _id['words'] = words
+        return name, _id
+    def get_dyrpo_name_id(self):
+        name = self.item.copy()
+        _id = self.item.copy()
+        key_box = None
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if text == '抵押人配偶(如适':
+                           key_box = bbox
+        if key_box is not None:
+            rh = abs(key_box[1]-key_box[3])
+            for pno in self.pdf_info:
+                for block in self.pdf_info[pno]['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text:
+                                words = text.split('：')[-1]
+                                name['position'] = bbox
+                                name['page'] = pno
+                                name['words'] = words
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text:
+                                words = text.split('：')[-1]
+                                _id['position'] = bbox
+                                _id['page'] = pno
+                                _id['words'] = words.strip()
+        return name, _id
+    def get_key_value_position(self, key):
+        value = self.item.copy()
+        key_box = None
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if text == key:
+                           key_box = bbox
+        if key_box is not None:
+            rh = abs(key_box[1]-key_box[3])
+            for pno in self.pdf_info:
+                for block in self.pdf_info[pno]['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(key_box[2]-bbox[0]) < rh*10:
+                                words = text
+                                value['position'] = bbox
+                                value['page'] = pno
+                                value['words'] = words
+        return value
+    def get_role_info_3_3(self, role_key, page_num='0'):
+        name = self.item.copy()
+        id_num = self.item.copy()
+        representative = self.item.copy()
+        # 以保证人2 的左上角为定位点
+        anchor = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    # 找到角色姓名
+                    if re.match('保证人2', text) is not None:
+                        anchor = [bbox[0], bbox[1]]
+        if anchor is not None:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        # 找到角色姓名
+                        if re.match(role_key, text) is not None:
+                            words = text.split('：')[-1]
+                            name['words'] = words
+                            name['page'] = page_num
+                            name['position'] = bbox
+                        if role_key == '承租人一：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '共同承租人：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '保证人1：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '保证人2：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+        return name, id_num, representative
+    def get_value_by_findall(self, prefix, suffix, page_num):
+        value = self.item.copy()
+        all_text = ''
+        pno = page_num
+        for block in self.pdf_info[pno]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    all_text += text
+        words_list = re.findall(f"{prefix}(.*?){suffix}", all_text)
+        if len(words_list) > 0:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if words_list[0] in text:
+                            value['position'] = bbox
+                            value['page'] = pno
+                            value['words'] = words_list[0]
+        return value
+    def get_info(self):
+        """
+            block['type'] == 0 : 表示该元素为图片
+        Returns:
+            dict: Description
+        """
+        if len(self.pdf_info) > 0:
+            # 取 Page 1 上的合同编号
+            contract_no = self.get_contract_no(page_num='0')
+            self.init_result['合同编号'] = contract_no
+            # 粗略判断是否是 ‘车贷分离版本’ 的合同
+            is_cdfl = False
+            for block in self.pdf_info['0']['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '共同承租人：' in text:
+                            is_cdfl = True
+            if is_cdfl == False:
+                # 从第一页上取四个角色的姓名和证件号码
+                name, id_num, representative = self.get_role_info(role_key='承租人：', page_num='0')
+                if name["words"] == None:
+                    name, id_num, representative = self.get_role_info_3_3(role_key='承租人一：', page_num='0')
+                self.init_result['承租人-姓名'] = name
+                self.init_result['承租人-证件号码'] = id_num
+                self.init_result['承租人-法定代表人或授权代表'] = representative
+                name, id_num, representative = self.get_role_info(role_key='保证人1：', page_num='0')
+                self.init_result['保证人1-姓名'] = name
+                self.init_result['保证人1-证件号码'] = id_num
+                self.init_result['保证人1-法定代表人或授权代表'] = representative  
+                # if条件判别 对应3_3版本          
+                if name["words"] == None:
+                    name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人：', page_num='0')
+                    self.init_result['共同承租人-姓名'] = name
+                    self.init_result['共同承租人-证件号码'] = id_num
+                    self.init_result['共同承租人-法定代表人或授权代表'] = representative
+                name, id_num, representative = self.get_role_info(role_key='保证人2：', page_num='0')
+                self.init_result['保证人2-姓名'] = name
+                self.init_result['保证人2-证件号码'] = id_num
+                self.init_result['保证人2-法定代表人或授权代表'] = representative 
+                # if条件判别 对应3_3版本           
+                if name["words"] == None:
+                    name, id_num, representative = self.get_role_info_3_3(role_key='保证人1：', page_num='0')
+                    self.init_result['保证人2-姓名'] = name
+                    self.init_result['保证人2-证件号码'] = id_num
+                    self.init_result['保证人2-法定代表人或授权代表'] = representative
+                name, id_num, representative = self.get_role_info(role_key='保证人3：', page_num='0')
+                self.init_result['保证人3-姓名'] = name
+                self.init_result['保证人3-证件号码'] = id_num
+                self.init_result['保证人3-法定代表人或授权代表'] = representative
+                if name["words"] == None:
+                    name, id_num, representative = self.get_role_info_3_3(role_key='保证人2：', page_num='0')
+                    self.init_result['保证人3-姓名'] = name
+                    self.init_result['保证人3-证件号码'] = id_num
+                    self.init_result['保证人3-法定代表人或授权代表'] = representative
+            else:
+                name, id_num, representative = self.get_role_info_3_3(role_key='承租人一：', page_num='0')
+                self.init_result['承租人-姓名'] = name
+                self.init_result['承租人-证件号码'] = id_num
+                self.init_result['承租人-法定代表人或授权代表'] = representative
+                name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人：', page_num='0')
+                self.init_result['共同承租人-姓名'] = name
+                self.init_result['共同承租人-证件号码'] = id_num
+                self.init_result['共同承租人-法定代表人或授权代表'] = representative
+                name, id_num, representative = self.get_role_info_3_3(role_key='保证人1：', page_num='0')
+                self.init_result['保证人1-姓名'] = name
+                self.init_result['保证人1-证件号码'] = id_num
+                self.init_result['保证人1-法定代表人或授权代表'] = representative
+                name, id_num, representative = self.get_role_info_3_3(role_key='保证人2：', page_num='0')
+                self.init_result['保证人2-姓名'] = name
+                self.init_result['保证人2-证件号码'] = id_num
+                self.init_result['保证人2-法定代表人或授权代表'] = representative
+            # 在所有页面中找正文中（第二部分 融资租赁主要条款及付款计划）的那个编号，因为存在换行的情况所以暂时不带位置输出
+            contract_no = self.get_contract_no_one()
+            self.init_result['合同编号（正文）'] = contract_no
+            # 找到车辆识别代码
+            vin = self.get_key_value(key='车辆识别代码：')
+            self.init_result['车辆识别代码'] = vin
+            # 找到经销商(车辆卖方(经销商))
+            seller = self.get_key_value(key='车辆卖方（经销商）：')
+            if seller['words'] == None:
+                seller = self.get_key_value(key='车辆卖方：')
+            self.init_result['车辆卖方（经销商）'] = seller
+            # 找到车辆代理商
+            cldls = self.get_key_value(key='车辆代理商', page_num='4')
+            self.init_result['车辆代理商'] = cldls
+            # 找到 —— 车辆原始销售价格
+            vehicle_price = self.get_key_value(key='车辆原始销售价格（《机动车销售统一发票》所列金额）：')
+            self.init_result['车辆原始销售价格（《机动车销售统一发票》所列金额）'] = vehicle_price
+            # 找车辆附加产品明细（表）
+            table_add_product = self.get_table_add_product()
+            self.init_result['车辆附加产品明细表'] = table_add_product
+            # 找融资成本总额
+            financing_cost = self.get_key_value(key='融资成本总额：')
+            self.init_result['融资成本总额'] = financing_cost
+            # 找租期
+            lease_term = self.get_key_value(key='租期：')
+            self.init_result['租期'] = lease_term
+            # 找还款计划(表)
+            repayment_schedule = self.get_repayment_schedule()
+            self.init_result['付款计划表'] = repayment_schedule
+            # 找承租人收款账户户名、银行账号、银行
+            name = self.get_key_value(key='户名：', page_num='4')
+            self.init_result['收款银行账户-户名'] = name
+            account = self.get_key_value(key='银行账号：', page_num='4')
+            self.init_result['收款银行账户-银行账号'] = account
+            bank = self.get_key_value(key='开户银行：', page_num='4')
+            self.init_result['收款银行账户-开户行'] = bank
+            # 找承租人扣款账户户名、银行账号、银行
+            name = self.get_key_value(key='户名：', page_num='5')
+            self.init_result['银行账户-户名'] = name
+            account = self.get_key_value(key='银行账号：', page_num='5')
+            self.init_result['银行账户-银行账号'] = account
+            bank = self.get_key_value(key='开户银行：', page_num='5')
+            self.init_result['银行账户-开户行'] = bank
+            # 找签字页上的系列信息
+            # 承租人姓名、签章
+            if is_cdfl == False:
+                name = self.get_key_value(key='承租人姓名：')
+                electronic_signature = self.get_electronic_signature(top='承租人姓名：', bottom='保证人1姓名：', t_pno='5')
+                if name["words"] == None:
+                    name = self.get_key_value(key='承租人一姓名：')
+                    electronic_signature = self.get_electronic_signature(top='承租人一姓名：', bottom='共同承租人名称：', t_pno='5')
+                self.init_result['签字页-承租人姓名'] = name
+                self.init_result['签字页-承租人签章'] = electronic_signature
+                # 保证人1姓名、签章
+                name = self.get_key_value(key='保证人1姓名：')
+                electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：', t_pno='5')
+                self.init_result['签字页-保证人1姓名'] = name
+                self.init_result['签字页-保证人1签章'] = electronic_signature
+                # 这里用的是 name["words"] == ""
+                if name["words"] == "":
+                    name = self.get_key_value(key='共同承租人名称：')
+                    electronic_signature = self.get_electronic_signature(top='共同承租人名称：', bottom='保证人1姓名：', t_pno='5')
+                    self.init_result['签字页-共同承租人姓名'] = name
+                    self.init_result['签字页-共同承租人签章'] = electronic_signature
+                # 保证人2姓名、签章
+                name = self.get_key_value(key='保证人2姓名：')
+                electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='保证人3姓名：', t_pno='5')
+                self.init_result['签字页-保证人2姓名'] = name
+                self.init_result['签字页-保证人2签章'] = electronic_signature
+                # if判断条件对应3_3版本
+                if name["words"] == "":
+                    name = self.get_key_value(key='保证人1姓名：')
+                    electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：', t_pno='5')
+                    self.init_result['签字页-保证人1姓名'] = name
+                    self.init_result['签字页-保证人1签章'] = electronic_signature
+                # 保证人3姓名、签章
+                name = self.get_key_value(key='保证人3姓名：')
+                electronic_signature = self.get_electronic_signature(top='保证人3姓名：', bottom='日期：', t_pno='5')
+                self.init_result['签字页-保证人3姓名'] = name
+                self.init_result['签字页-保证人3签章'] = electronic_signature
+                # if判断条件对应3_3版本
+                if name["words"] == None:
+                    name = self.get_key_value(key='保证人2姓名：')
+                    electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='日期：', t_pno='5')
+                    self.init_result['签字页-保证人2姓名'] = name
+                    self.init_result['签字页-保证人2签章'] = electronic_signature
+            else:
+                name = self.get_key_value(key='承租人一姓名：')
+                electronic_signature = self.get_electronic_signature(top='承租人一姓名：', bottom='共同承租人名称：', t_pno='5')
+                self.init_result['签字页-承租人姓名'] = name
+                self.init_result['签字页-承租人签章'] = electronic_signature
+                name = self.get_key_value(key='共同承租人名称：')
+                electronic_signature = self.get_electronic_signature(top='共同承租人名称：', bottom='保证人1姓名：', t_pno='5')
+                self.init_result['签字页-共同承租人姓名'] = name
+                self.init_result['签字页-共同承租人签章'] = electronic_signature
+                name = self.get_key_value(key='保证人1姓名：')
+                electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：', t_pno='5')
+                self.init_result['签字页-保证人1姓名'] = name
+                self.init_result['签字页-保证人1签章'] = electronic_signature
+                name = self.get_key_value(key='保证人2姓名：')
+                electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='保证人3姓名：', t_pno='5')
+                self.init_result['签字页-保证人2姓名'] = name
+                self.init_result['签字页-保证人2签章'] = electronic_signature
+        return self.init_result
+    def get_info_1(self):
+        if len(self.pdf_info) > 0:
+            contract_no = self.get_contract_no(page_num='0')
+            self.init_result_1['合同编号'] = contract_no
+            # 承租人姓名
+            name = self.get_key_value(key='承租人：', page_num='0')
+            self.init_result_1['承租人-姓名'] = name
+            # 承租人证件号码
+            _id = self.get_key_value(key='证件号码：', page_num='0')
+            self.init_result_1['承租人-证件号码'] = _id
+            # 销售经销商
+            seller = self.get_key_value(key='销售经销商：', page_num='0')
+            if seller['words'] == "":
+                seller = self.get_value_by_findall('销售经销商：', '地址：', page_num='0')
+            self.init_result_1['销售经销商'] = seller
+            # 合同编号（正文）
+            contract_no = self.get_contract_no_one()
+            self.init_result_1['合同编号（正文）'] = contract_no
+            # 签字页-承租人姓名
+            name = self.get_key_value(key='姓名/名称：')
+            self.init_result_1['签字页-承租人姓名'] = name
+            # 签字页-承租人证件号码
+            _id = self.get_key_value(key='自然人身份证件号码/法人执照号码：')
+            self.init_result_1['签字页-承租人证件号码'] = _id
+            # 签字页-承租人签章
+            signature_role_1 = self.get_signature_role_1()
+            self.init_result_1['签字页-承租人签章'] = signature_role_1
+            # 签字页-销售经销商
+            seller = self.get_key_value(key='销售经销商：')
+            if seller['words'] == "":
+                # 销售经销商：深圳市宝创汽车贸易有限公司南山分公司（请授权代表签字并请盖章）
+                seller = self.get_value_by_findall('销售经销商：', '（请授权代表签字并请盖章）', page_num='3')
+            self.init_result_1['签字页-销售经销商'] = seller
+            # 经销商签章
+            pass
+        return self.init_result_1
+    def get_info_2(self):
+        if len(self.pdf_info) > 0:
+            contract_no = self.get_contract_no_dy()
+            self.init_result_2['合同编号'] = contract_no
+            # 合同编号（正文）
+            contract_no = self.get_contract_no_one()
+            self.init_result_2['合同编号（正文）'] = contract_no
+            # 抵押人姓名/名称
+            name, _id = self.get_dyr_name_id()
+            self.init_result_2['抵押人姓名/名称'] = name
+            self.init_result_2['抵押人证件号码'] = _id
+            # 抵押人配偶信息
+            name, _id = self.get_dyrpo_name_id()
+            self.init_result_2['抵押人配偶姓名/名称'] = name
+            self.init_result_2['抵押人配偶证件号码'] = _id
+            # 车辆识别代码
+            vin = self.get_key_value(key='车辆识别代码：')
+            self.init_result_2['车辆识别代码'] = vin
+            # 租金总额
+            rent = self.get_key_value_position(key='租金总额')
+            self.init_result_2['租金总额'] = rent
+            # 融资租赁期限
+            lease_term = self.get_key_value_position(key='融资租赁期限')
+            self.init_result_2['融资租赁期限'] = lease_term
+            # 签字页抵押人姓名和签章
+            name = self.get_key_value(key='抵押人姓名：')
+            electronic_signature = self.get_electronic_signature(top='抵押权人盖章', bottom='抵押人配偶姓名：', t_pno='1')
+            self.init_result_2['签字页-抵押人姓名'] = name
+            self.init_result_2['签字页-抵押人签章'] = electronic_signature
+            # 签字页抵押人配偶姓名和签章
+            name = self.get_key_value(key='抵押人配偶姓名：')
+            electronic_signature = self.get_electronic_signature(top='抵押人配偶姓名：', bottom='日期', t_pno='1')
+            self.init_result_2['签字页-抵押人配偶姓名'] = name
+            self.init_result_2['签字页-抵押人配偶签章'] = electronic_signature
+        return self.init_result_2
\ No newline at end of file
--- a/src/common/electronic_hil_contract/hil_contract_ocr.py
View file @9f4b364
+++ b/src/common/electronic_hil_contract/hil_contract_ocr.py
View file @9f4b364
@@ -6,9 +6,10 @@
 # @Description   :
 from .get_char import Finder
+from .get_char_fsm import Finder as FSMFinder
-def predict(pdf_info, file_cls):
+def predict(pdf_info, file_cls, is_fsm=False):
    """Summary
    Args:
@@ -58,6 +59,10 @@ def predict(pdf_info, file_cls):
        pdf_info = dict()
        for pno, page_info in enumerate(pdf_info_1):
            pdf_info[str(pno)] = page_info
+    if is_fsm:
+        f = FSMFinder(pdf_info) 
+    else:
        f = Finder(pdf_info)
    if file_cls == 0:
        results = f.get_info()
--- a/src/common/fsm_econtract/const.py 0 → 100644
View file @9f4b364
+++ b/src/common/fsm_econtract/const.py 0 → 100644
View file @9f4b364
+WEP_FIELD = {
+    "0": {
+        'keys': {
+            '客户姓名': [('客户姓名', (r'^姓名.?$', r'^企业名称.?$'), 'top1', {})],
+            '证件类型': [('证件类型', (r'^证件类型.?$', ), 'top1', {})],
+            '证件号码': [('证件号码', (r'^证件号码.?$', r'^统一社会信用代码.?$'), 'top1', {})],
+            '合同价格（小写）': [('人民币', (r'^人民币￥.?$', ), 'top1', {})],
+            '客户签名': [('客户签名／盖章', (r'^客户签名／盖章.*$', ), 'top1', {})],
+            '签单日期': [('签单日期', (r'^签单日期.*签单日期.?$', ), 'top1', {})],
+        },
+        'value': {
+            '客户姓名': ('text', 'right', {'offset_tuple': (-1.1, 1, 0.3, 0)}, ''),
+            '证件类型': ('text', 'right', {'offset_tuple': (-1, 1, 0, 0)}, ''),
+            '证件号码': ('text', 'right', {'offset_tuple': (-1, 2, 0.3, 0)}, ''),
+            '合同价格（小写）': ('text', 'right', {'offset_tuple': (-1, 1, 0.3, 0)}, ''),
+            '客户签名': ('img', 'under', {'offset_tuple': (0, 0, 0, 4), 'rigorous': True}, '无'),
+            '签单日期': ('img', 'right', {'offset_tuple': (0, 0, 1.1, 0), 'rigorous': True}, '无'),
+        },
+    }
+}
+MSI_FIELD = {
+    "0": {
+        'keys': {
+            '客户姓名': [('客户姓名', (r'^客户姓名.?$', r'^企业名称.?$'), 'top1', {})],
+            '证件类型': [('证件类型', (r'^证件类型.?$', ), 'top1', {})],
+            '证件号码': [('证件号码', (r'^证件号码.?$', r'^统一社会信用代码.?$'), 'top1', {})],
+            '合同价格（小写）': [('人民币', (r'^人民币￥.?$', ), 'top1', {})],
+        },
+        'value': {
+            '客户姓名': ('text', 'right', {'offset_tuple': (-1.2, 1, 0.3, 0)}, ''),
+            '证件类型': ('text', 'right', {'offset_tuple': (-1, 1, 0, 0)}, ''),
+            '证件号码': ('text', 'right', {'offset_tuple': (-1, 2, 0.3, 0)}, ''),
+            '合同价格（小写）': ('text', 'right', {'offset_tuple': (-1, 1, 0.3, 0)}, ''),
+        },
+    },
+    "1": {
+        'keys': {
+            '客户签名': [('客户签名／盖章', (r'^客户签名／盖章.*$', ), 'top1', {})],
+            '签单日期': [('签单日期', (r'^签单日期.*签单日期.?$', ), 'top1', {})],
+        },
+        'value': {
+            '客户签名': ('img', 'under', {'offset_tuple': (0, 0, 0, 4), 'rigorous': True}, '无'),
+            '签单日期': ('img', 'right', {'offset_tuple': (0, 0, 1.1, 0), 'rigorous': True}, '无'),
+        },
+    }
+}
+SC_FIELD = {
+    "0": {
+        'keys': {
+            '姓名': [('姓名', (r'^姓名.?$', r'^企业名称.?$'), 'top1', {})],
+            '证件类型': [('证件类型', (r'^证件类型.?$', ), 'top1', {})],
+            '证件号码': [('证件号码', (r'^证件号码.?$', r'^统一社会信用代码.?$'), 'top1', {})],
+            '总价': [('总价', (r'^总价.?$', ), 'top1', {})],
+        },
+        'value': {
+            '姓名': ('text', 'right', {'offset_tuple': (-2, 8, 0.5, 0)}, ''),
+            '证件类型': ('text', 'right', {'offset_tuple': (-2, 6, 0.5, 0)}, ''),
+            '证件号码': ('text', 'right', {'offset_tuple': (-2, 6, 0.5, 0)}, ''),
+            '总价': ('text', 'right', {'offset_tuple': (-2, 12, 0.5, 0)}, ''),
+        },
+    },
+    "-1": {
+        'keys': {
+            '客户签名': [('客户签名/盖章', (r'^客户签名/盖章.*$', r'^客户签名／盖章.*$'), 'top1', {})],
+            '签单日期': [('签单日期', (r'^签单日期.*签单日期.?$', ), 'top1', {})],
+        },
+        'value': {
+            '客户签名': ('img', 'under', {'offset_tuple': (1.5, 1, 0, 4), 'rigorous': True}, '无'),
+            '签单日期': ('img', 'right', {'offset_tuple': (0, 0, 1.1, 0), 'rigorous': True}, '无'),
+        },
+    }
+}
--- a/src/common/fsm_econtract/fsm_contract_ocr.py 0 → 100644
View file @9f4b364
+++ b/src/common/fsm_econtract/fsm_contract_ocr.py 0 → 100644
View file @9f4b364
+from .retriever import Retriever
+from .const import WEP_FIELD, MSI_FIELD, SC_FIELD
+from .tools import pdf_info_rebuild
+retriever_list = [Retriever(WEP_FIELD), Retriever(MSI_FIELD), Retriever(SC_FIELD)]
+def predict(pdf_info, file_type=0):
+    retriever =  retriever_list[file_type]
+    pdf_text_list, pdf_img_list = pdf_info_rebuild(pdf_info) 
+    return retriever.get_target_fields(pdf_text_list, pdf_img_list)
--- a/src/common/fsm_econtract/hmh_ocr.py 0 → 100644
View file @9f4b364
+++ b/src/common/fsm_econtract/hmh_ocr.py 0 → 100644
View file @9f4b364
+from .retriever import HMHRetriever
+from .tools import pdf_info_rebuild
+hmh_retriever = HMHRetriever() 
+def predict(pdf_info):
+    pdf_text_list, _ = pdf_info_rebuild(pdf_info, fix_bbox=False) 
+    return hmh_retriever.get_target_fields(pdf_text_list)
--- a/src/common/fsm_econtract/retriever.py 0 → 100644
View file @9f4b364
+++ b/src/common/fsm_econtract/retriever.py 0 → 100644
View file @9f4b364
+import re
+class HMHRetriever:
+    def __init__(self):
+        self.words_str = 'words'
+        self.position_str = 'location'
+        self.fix_hava_str = '有'
+        self.default_position = [0, 0, 0, 0] 
+        self.search_fields_list = [
+            ('借款/承租人姓名', ''),
+            ('证件号码', ''),
+            ('渠道', ''),
+            ('合同编号', ''),
+            ('借款人签字/盖章', '无'),
+        ]
+    def get_target_fields(self, pdf_text_list):
+        result = dict()
+        is_find_name_id_company, is_find_application_no, is_find_name_date = False, False, False
+        for bbox, text in pdf_text_list.pop(str(0), []):
+            # print(text)
+            if not is_find_name_id_company:
+                name_id_company_list = re.findall(r'姓名(.*)证件号码(.*)与(.*公司)', text)
+                for name_id_company_tuple in name_id_company_list:
+                    if len(name_id_company_tuple) == 3: 
+                        result[self.search_fields_list[0][0]] = {
+                            self.words_str: name_id_company_tuple[0].replace('\u3000', '').strip(),
+                            self.position_str: bbox
+                        }
+                        result[self.search_fields_list[1][0]] = {
+                            self.words_str: name_id_company_tuple[1].replace('\u3000', '').replace(')', '').replace('）', '').strip(),
+                            self.position_str: bbox
+                        }
+                        result[self.search_fields_list[2][0]] = {
+                            self.words_str: name_id_company_tuple[2],
+                            self.position_str: bbox
+                        }
+                        is_find_name_id_company = True
+                        break
+            if not is_find_application_no:
+                application_no_list = re.findall(r'合同编号.*(CH-B\d*-\d*).*', text)
+                if len(application_no_list) == 1:
+                    result[self.search_fields_list[3][0]] = {
+                        self.words_str: application_no_list[0],
+                        self.position_str: bbox
+                    }
+                    is_find_application_no = True
+            if not is_find_name_date:
+                name_date_list = re.findall(r'(.*).*签署日期.*(\d{4}-\d{2}-\d{2})', text)
+                for name_date_tuple in name_date_list:
+                    if len(name_date_tuple) == 2: 
+                        result[self.search_fields_list[4][0]] = {
+                            # self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]),
+                            self.words_str: self.fix_hava_str,
+                            self.position_str: bbox
+                        }
+                        is_find_name_date = True
+                        break
+        for find_key, default_value in self.search_fields_list:
+            if find_key not in result:
+                result[find_key] = {
+                    self.words_str: default_value,
+                    self.position_str: self.default_position,
+                }
+        # simple_result = []
+        # for key, value_dict in result.items():
+        #     simple_result.append((key, value_dict[self.words_str]))
+        # return simple_result
+        return {"words_result": result}
+class Retriever:
+    def __init__(self, target_fields):
+        self.keys_str = 'keys'
+        self.value_str = 'value'
+        self.text_str = 'text'
+        self.words_str = 'words'
+        self.position_str = 'position'
+        self.default_position = [-1, -1, -1, -1] 
+        self.target_fields = target_fields
+        self.replace_map = {
+            'int': {
+                '(': '0'
+            }
+        }
+    @staticmethod
+    def key_top1(coordinates_list, key_coordinates):
+        # 关键词查找方向：最上面
+        coordinates_list.sort(key=lambda x: x[1])
+        return coordinates_list[0]
+    def key_right(self, coordinates_list, key_coordinates, offset_tuple, rigorous=False):
+        # 关键词查找方向：右侧
+        if len(coordinates_list) == 1:
+            return coordinates_list[0]
+        # 没有上一层关键词的坐标时，返回最上面的坐标
+        if key_coordinates is None:
+            return self.key_top1(coordinates_list, key_coordinates)
+        x_min, y_min, x_max, y_max = self.get_target_bbox(key_coordinates, offset_tuple)
+        x_min_find, find_key_coordinates = None, None
+        for x0, y0, x1, y1 in coordinates_list:
+            if rigorous:
+                is_eligible = x_min < x0 and x1 < x_max and y_min < y0 and y1 < y_max
+            else:
+                cent_x = x0 + ((x1 - x0) / 2)
+                cent_y = y0 + ((y1 - y0) / 2)
+                is_eligible = x_min < cent_x < x_max and y_min < cent_y < y_max
+            if is_eligible:
+                if x_min_find is None or x0 < x_min_find:
+                    x_min_find = x0
+                    find_key_coordinates = (x0, y0, x1, y1)
+        if find_key_coordinates is None:
+            return self.key_top1(coordinates_list, key_coordinates)
+        else:
+            return find_key_coordinates
+    def value_right(self, search_list, key_coordinates, offset_tuple, value_type=None, rigorous=False):
+        # 字段值查找方向：右侧
+        x_min, y_min, x_max, y_max = self.get_target_bbox(key_coordinates, offset_tuple)
+        x_min_find, value, coordinates = None, None, None
+        for (x0, y0, x1, y1), text in search_list:
+            if rigorous:
+                is_eligible = x_min < x0 and x1 < x_max and y_min < y0 and y1 < y_max
+            else:
+                cent_x = x0 + ((x1 - x0) / 2)
+                cent_y = y0 + ((y1 - y0) / 2)
+                is_eligible = x_min < cent_x < x_max and y_min < cent_y < y_max
+            if is_eligible:
+                if x_min_find is None or x0 < x_min_find:
+                    if len(text.strip()) > 0:
+                        x_min_find = x0
+                        value = text
+                        coordinates = (x0, y0, x1, y1)
+        if isinstance(value_type, str) and value_type in self.replace_map and isinstance(value, str):
+            new_value = value.translate(str.maketrans(self.replace_map.get(value_type, {})))
+            return new_value, coordinates
+        return value, coordinates
+    def value_under(self, search_list, key_coordinates, offset_tuple, value_type=None, append=False, rigorous=False):
+        # 字段值查找方向：下方
+        x_min, y_min, x_max, y_max = self.get_target_bbox(key_coordinates, offset_tuple)
+        find_list = []
+        for (x0, y0, x1, y1), text in search_list:
+            if rigorous:
+                is_eligible = x_min < x0 and x1 < x_max and y_min < y0 and y1 < y_max
+            else:
+                cent_x = x0 + ((x1 - x0) / 2)
+                cent_y = y0 + ((y1 - y0) / 2)
+                is_eligible = x_min < cent_x < x_max and y_min < cent_y < y_max
+            if is_eligible:
+                if len(text.strip()) > 0:
+                    find_list.append((x0, y0, x1, y1, text))
+        if len(find_list) == 0:
+            return None, None
+        else:
+            find_list.sort(key=lambda x: (x[1], x[0]))
+            coordinates = find_list[0][:-1]
+            if append:
+                value = ''.join([text for _, _, _, _, text in find_list])
+            else:
+                value = find_list[0][-1]
+            if isinstance(value_type, str) and value_type in self.replace_map and isinstance(value, str):
+                new_value = value.translate(str.maketrans(self.replace_map.get(value_type, {})))
+                return new_value, coordinates
+            return value, coordinates
+    @staticmethod
+    def get_target_bbox(key_coordinates, offset_tuple):
+        offset_xmin, offset_xmax, offset_ymin, offset_ymax = offset_tuple 
+        width = key_coordinates[2] - key_coordinates[0]
+        height = key_coordinates[-1] - key_coordinates[1]
+        x_min = key_coordinates[0] - (width * offset_xmin) # -1
+        x_max = key_coordinates[2] + (width * offset_xmax)
+        y_min = key_coordinates[1] - (height * offset_ymin) # -1
+        y_max = key_coordinates[-1] + (height * offset_ymax)
+        return x_min, y_min, x_max, y_max
+    def get_target_fields(self, pdf_text_list, pdf_img_list):
+        pdf_result = dict()
+        for pno_str, fields_dict in self.target_fields.items():
+            is_last_pno = False
+            if pno_str == '-1':
+                is_last_pno = True
+                pno_int_list = [int(pno_str) for pno_str in pdf_text_list.keys()]
+                pno_str = str(max(pno_int_list)) 
+            # 搜索关键词
+            key_text_info = dict()
+            for key_text_list in fields_dict[self.keys_str].values():
+                for key_text, key_re_tuple, _, _ in key_text_list:
+                    for (x0, y0, x1, y1), text in pdf_text_list.get(pno_str, []):
+                        for key_re in key_re_tuple:
+                            if re.match(key_re, text):
+                                key_text_info.setdefault(key_text, list()).append((x0, y0, x1, y1))
+            # 搜索关键词
+            key_coordinates_info = dict()
+            for field, key_text_list in fields_dict[self.keys_str].items():
+                last_key_coordinates = None
+                for key_text, _, direction, kwargs in key_text_list:
+                    if key_text not in key_text_info:
+                        last_key_coordinates = None
+                        continue
+                    last_key_coordinates = getattr(self, 'key_{0}'.format(direction))(
+                        key_text_info[key_text],
+                        last_key_coordinates,
+                        **kwargs)
+                key_coordinates_info[field] = last_key_coordinates
+            # 搜索字段值
+            page_result = dict()
+            for field, (source, direction, kwargs, default_value) in fields_dict[self.value_str].items():
+                if not isinstance(key_coordinates_info.get(field), tuple):
+                    page_result[field] = {
+                        self.words_str: default_value,
+                        self.position_str: self.default_position,
+                    }
+                    continue
+                value, coordinates = getattr(self, 'value_{0}'.format(direction))(
+                    pdf_text_list.get(pno_str, []) if source == self.text_str else pdf_img_list.get(pno_str, []),
+                    key_coordinates_info[field],
+                    **kwargs
+                )
+                if not isinstance(value, str):
+                    page_result[field] = {
+                        self.words_str: default_value,
+                        self.position_str: self.default_position,
+                    }
+                else:
+                    page_result[field] = {
+                        self.words_str: value,
+                        self.position_str: list(coordinates),
+                    }
+            page_key = 'page_12' if is_last_pno else 'page_{0}'.format(int(pno_str) + 1)
+            pdf_result[page_key] = page_result
+        return pdf_result 
--- a/src/common/fsm_econtract/tools.py 0 → 100644
View file @9f4b364
+++ b/src/common/fsm_econtract/tools.py 0 → 100644
View file @9f4b364
+def pdf_info_rebuild(pdf_info, fix_bbox=True):
+    pdf_text_info = dict()
+    pdf_img_info = dict()
+    for pno_str, page_info in pdf_info.items():
+        text_set = set()
+        for block in page_info['blocks']:
+            if block['type'] == 0:
+                # text有重复的现象
+                text_set.clear()
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text'].strip()
+                        if len(text) != 0 and text not in text_set:
+                            text_set.add(text)
+                            # bbox的高，不准
+                            if fix_bbox and bbox[-1] - bbox[1] < span['size']:
+                                bbox[-1] = bbox[-1] + span['size']
+                            pdf_text_info.setdefault(pno_str, list()).append([bbox, text])
+            elif block['type'] == 1:
+                pdf_img_info.setdefault(pno_str, list()).append((block['bbox'], '有'))
+    return pdf_text_info, pdf_img_info
\ No newline at end of file
--- a/src/common/tools/mssql_script24.py 0 → 100644
View file @9f4b364
+++ b/src/common/tools/mssql_script24.py 0 → 100644
View file @9f4b364
+import pyodbc
+hil_sql = """
+    ALTER TABLE hil_ocr_result ADD fsm_wep_ocr nvarchar(max);
+    ALTER TABLE hil_ocr_result ADD fsm_msi_ocr nvarchar(max);
+    ALTER TABLE hil_ocr_result ADD fsm_sc_ocr nvarchar(max);
+    ALTER TABLE hil_se_ocr_result ADD fsm_wep_ocr nvarchar(max);
+    ALTER TABLE hil_se_ocr_result ADD fsm_msi_ocr nvarchar(max);
+    ALTER TABLE hil_se_ocr_result ADD fsm_sc_ocr nvarchar(max);
+"""
+afc_sql = """
+    ALTER TABLE afc_ocr_result ADD fsm_wep_ocr nvarchar(max);
+    ALTER TABLE afc_ocr_result ADD fsm_msi_ocr nvarchar(max);
+    ALTER TABLE afc_ocr_result ADD fsm_sc_ocr nvarchar(max);
+    ALTER TABLE afc_se_ocr_result ADD fsm_wep_ocr nvarchar(max);
+    ALTER TABLE afc_se_ocr_result ADD fsm_msi_ocr nvarchar(max);
+    ALTER TABLE afc_se_ocr_result ADD fsm_sc_ocr nvarchar(max);
+"""
+hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True)
+hil_cursor = hil_cnxn.cursor()
+hil_cursor.execute(hil_sql)
+hil_cursor.close()
+hil_cnxn.close()
+afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True)
+afc_cursor = afc_cnxn.cursor()
+afc_cursor.execute(afc_sql)
+afc_cursor.close()
+afc_cnxn.close()
--- a/src/pos/views.py
View file @9f4b364
+++ b/src/pos/views.py
View file @9f4b364
@@ -8,13 +8,16 @@ from common.tools.comparison import cp
 from common.mixins import LoggerMixin
 from rest_framework.permissions import IsAuthenticated
 from apps.account.authentication import OAuth2AuthenticationWithUser
+from apps.doc.models import NscInvoice
+import json
+from datetime import datetime
 params = {
    'invoiceCode': fields.Str(required=True, validate=validate.Length(max=128)),
    'invoiceNumber': fields.Str(required=True, validate=validate.Length(max=64)),
    'issueDate': CustomDate(required=True),
    'buyerName': fields.Str(required=True, validate=validate.Length(max=64)),
-    "buyerId": fields.Int(required=True),
+    "buyerId": fields.Str(required=True, validate=validate.Length(max=64)),
    'vin': fields.Str(required=True, validate=validate.Length(max=128)),
    'dealer': fields.Str(required=False, validate=validate.Length(max=64)),
    'priceWithVat': CustomDecimal(required=True),
@@ -29,7 +32,7 @@ input_args = {
 }
-# poss 接口接收NSC 发票信息
+# pos 接口接收NSC 发票信息
 class NSCInvoiceView(GenericView):
    permission_classes = [IsAuthenticated]
    authentication_classes = [OAuth2AuthenticationWithUser]
@@ -50,6 +53,7 @@ class NSCInvoiceView(GenericView):
        vat = content.get('vat', 0.0)
        vat_rate = content.get('vatRate', 0.0)
+        NscInvoice.objects.create(vin=vin, content=json.dumps(content), create_time=datetime.now())
        return response.ok()
@@ -90,11 +94,17 @@ class DeMortgageView(GenericView):
            'applicationName': application_name,
            'deMortgageDate': de_mortgage_date
        }
-        de_mortgage_info = {}
+        de_mortgage_info = {'customer_name':'','applicationName':'','deMortgageDate':''}
        # 绿本必须分开ocr
        for file_obj in files:
            info = PosHandler.de_mortgage_ocr_process1(file_obj)
-            de_mortgage_info.update(info)
+            if info.get('customerName') is not '':
+                de_mortgage_info['customerName'] = info.get('customerName')
+            if info.get('applicationName') is not '':
+                de_mortgage_info['applicationName'] = info.get('applicationName')
+            if info.get('deMortgageDate') is not '':
+                de_mortgage_info['deMortgageDate'] = info.get('deMortgageDate')    
+            #de_mortgage_info.update(info)
        request_pass = True
        fields_result = []