0b550510 by 王聪

Merge branch 'feature/uat-tmp' of http://gitlab.situdata.com/zhouweiqi/bmw-ocr into feature/uat-tmp

2 parents 2d500b7a 76b3bbd5
......@@ -1168,6 +1168,7 @@ MS_ERROR_COL = (5, 6)
WECHART_CLASSIFY = 12
NEW_ZHIFUBAO_CLASSIFY = 48
ALI_WECHART_CLASSIFY = {12, 13, 48}
JSYH_CLASSIFY = {11, 27, 34}
WECHART_ERROR_COL = (1, 2)
SPECIAL_HEADERS_MAPPING = copy.deepcopy(HEADERS_MAPPING)
SPECIAL_HEADERS_MAPPING.update(
......
......@@ -2009,6 +2009,9 @@ class Command(BaseCommand, LoggerMixin):
report_list[5] = BSCheckResult.CHECK_FAILED.value
finally:
self.online_log.info('{0} [task={1}] [license_summary={2}] '
'[contract_result_compare={3}]'.format(self.log_base, task_str,
license_summary, contract_result_compare))
self.rebuild_contract(license_summary, contract_result_compare)
bs_rebuild = self.rebuild_bs(merged_bs_summary)
......
......@@ -27,6 +27,7 @@ class RequestTeam(NamedEnum):
SETTLEMENT = (1, 'SETTLEMENT')
CONTRACTMANAGEMENT = (2, 'CONTRACTMANAGEMENT')
CONTROLLING = (3, 'CONTROLLING')
INSURANCE = (4, 'INSURANCE')
class RequestTrigger(NamedEnum):
......
......@@ -39,6 +39,18 @@ class BSWorkbook(Workbook):
self.MAX_MEAN = 31
self.need_follow = False
# @staticmethod
# def date_calibration(date_str):
# result = True
# try:
# if date_str[-2] not in ['20', '21']:
# result = False
# if date_str[-5:-3] not in ['03', '06', '09', '12']:
# result = False
# except Exception as e:
# result = False
# return result
@staticmethod
def replace_newline(queryset_value):
new_set = set()
......@@ -338,7 +350,7 @@ class BSWorkbook(Workbook):
metadata_rows.append(('Producer', producer))
if len(author) > 0:
metadata_highlight_row.append(6)
if 'iText' not in producer and 'Qt' not in producer and 'Haru Free' not in producer:
if 'iText' not in producer and 'Qt' not in producer and 'Haru Free' not in producer and 'OpenPDF' not in producer:
metadata_highlight_row.append(7)
metadata_rows.append(self.blank_row)
......@@ -496,6 +508,7 @@ class BSWorkbook(Workbook):
tmp_ws = self.create_sheet('tmp_ws')
tmp2_ws = self.create_sheet('tmp2_ws')
tmp3_ws = self.create_sheet('tmp3_ws')
if classify in consts.ALI_WECHART_CLASSIFY:
high_light_keyword = self.wechat_keyword
else:
......@@ -629,7 +642,7 @@ class BSWorkbook(Workbook):
# 关键词1提取
if summary_cell_value in self.interest_keyword:
new_amount_cell_value = None if amount_cell is None else amount_cell.value
ms.append((summary_cell_value, date_cell_value, new_amount_cell_value))
tmp3_ws.append((summary_cell_value, date_cell_value, new_amount_cell_value))
# 关键词2提取至临时表
elif summary_cell_value in self.salary_keyword:
new_amount_cell_value = None if amount_cell is None else amount_cell.value
......@@ -657,6 +670,18 @@ class BSWorkbook(Workbook):
# if summary_cell_idx is not None:
# new_ws[row][summary_cell_idx].fill = self.amount_fill
# 关键词1信息提取:结息
for row in tmp3_ws.iter_rows(values_only=True):
ms.append(row)
# # 建设银行
# if classify in consts.JSYH_CLASSIFY:
# if isinstance(row[1], str) and self.date_calibration(row[1]):
# pass
# else:
# for cell in ms[ms.max_row]:
# cell.fill = self.amount_fill
self.remove(tmp3_ws)
# 关键词2信息提取
ms.append(self.blank_row)
ms.append(self.salary_keyword_header)
......
......@@ -257,6 +257,7 @@ se_compare_content = {
'fsmSpecialCar': fields.Boolean(required=False),
'fsmBestPrice': fields.Boolean(required=False),
'isAutoSettlement': fields.Boolean(required=False),
'fsmLandingDealer': fields.Str(required=False, validate=validate.Length(max=1024)),
'individualCusInfo': fields.List(fields.Nested(se_individual_args),
required=True, validate=validate.Length(min=1, max=4)),
......@@ -592,12 +593,12 @@ class UploadDocView(GenericView, DocHandler):
if business_type == consts.HIL_PREFIX:
if document_scheme == RequestTeam.ACCEPTANCE.name:
result_class = HILOCRResult
elif document_scheme == RequestTeam.SETTLEMENT.name:
elif document_scheme == RequestTeam.SETTLEMENT.name or document_scheme == RequestTeam.INSURANCE.name:
result_class = HILSEOCRResult
elif business_type == consts.AFC_PREFIX:
if document_scheme == RequestTeam.ACCEPTANCE.name:
result_class = AFCOCRResult
elif document_scheme == RequestTeam.SETTLEMENT.name:
elif document_scheme == RequestTeam.SETTLEMENT.name or document_scheme == RequestTeam.INSURANCE.name:
result_class = AFCSEOCRResult
ocr_result_obj = result_class.objects.filter(application_id=application_id).first()
......@@ -610,6 +611,7 @@ class UploadDocView(GenericView, DocHandler):
ocr_result_obj.fsm_activited = 1
ocr_result_obj.save()
self.running_log.info('[doc upload applicationId-{0}] [ocr result saved]'.format(application_id))
if data_source == consts.DATA_SOURCE_LIST[1]:
if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'):
self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args))
......@@ -880,6 +882,7 @@ class SECompareView(GenericView, PreSEHandler):
fsm_flag = content.get('fsmFlag', False)
fsm_special_car = content.get('fsmSpecialCar', False)
fsm_best_price = content.get('fsmBestPrice', False)
fsm_landing_dealer = content.get('fsmLandingDealer')
if fsm_special_car:
compare_result = {
......
......@@ -62,6 +62,7 @@ class Finder:
},
"page_3": {"合同编号": self.item,
"还款计划表": self.item,
"车辆代理商": self.item,
},
"page_4": {"合同编号": self.item,
"附加产品融资贷款本金总金额明细": self.item,
......@@ -71,6 +72,7 @@ class Finder:
"page_6": {"合同编号": self.item,
},
}
if self.is_asp:
self.init_result["page_7"] = {"合同编号": self.item,
}
self.init_result["page_8"] = {"合同编号": self.item,
......@@ -90,6 +92,25 @@ class Finder:
"日期": self.item,
},
}
else:
self.init_result["page_7"] = {"合同编号": self.item,
"主借人签字": {"签字": self.item,
"日期": self.item,
},
"共借人签字": {"签字": self.item,
"日期": self.item,
},
"保证人1签字": {"签字": self.item,
"日期": self.item,
},
"保证人2签字": {"签字": self.item,
"日期": self.item,
},
"见证人签字": {"签字": self.item,
"日期": self.item,
},
}
def get_top_iou(self, poly, ocr_result):
"""传入一个多边形, 找到与之最匹配的多边形
......@@ -398,6 +419,39 @@ class Finder:
seller['words'] = text
return seller
def get_cldls(self):
seller = self.item.copy()
# 先找到 key
anchor_bbox = None
for block in self.pdf_info['2']['blocks']:
if anchor_bbox is not None:
break
if block['type'] != 0:
continue
for line in block['lines']:
if anchor_bbox is not None:
break
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if text.strip() == '车辆代理商':
anchor_bbox = bbox
# print(anchor_bbox)
# 当找到了 key, 则根据 key 去匹配 value
if anchor_bbox:
half_width = self.pdf_info['2']['width'] * 0.5
for block in self.pdf_info['2']['blocks']:
if block['type'] != 0:
continue
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \
anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]:
seller['position'] = bbox
seller['words'] = text
return seller
return seller
def get_borrower_collection_account(self):
account = self.item.copy()
account_name = self.item.copy()
......@@ -885,6 +939,9 @@ class Finder:
repayment_schedule_table = self.get_repayment_schedule()
# print(repayment_schedule_table)
self.init_result['page_3']['还款计划表'] = repayment_schedule_table
# 车辆代理商
cldls = self.get_cldls()
self.init_result['page_3']['车辆代理商'] = cldls
#######################################
# Page 4
# 找合同编号
......@@ -907,6 +964,8 @@ class Finder:
contract_no = self.get_contract_no(page_num='5')
# print(contract_no)
self.init_result['page_6']['合同编号'] = contract_no
if self.is_asp:
# Page 7
# 找合同编号
contract_no = self.get_contract_no(page_num='6')
......@@ -944,6 +1003,41 @@ class Finder:
# print(signature_name, signature_date)
self.init_result['page_8']['见证人签字']['签字'] = signature_name
self.init_result['page_8']['见证人签字']['日期'] = signature_date
else:
# Page 7
# 找合同编号
contract_no = self.get_contract_no(page_num='6')
self.init_result['page_7']['合同编号'] = contract_no
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='合同编号', bottom='共同借款人')
if signature_name['words'] == None:
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='合同编号', bottom='共同借款人(抵押人)')
# print(signature_name, signature_date)
self.init_result['page_7']['主借人签字']['签字'] = signature_name
self.init_result['page_7']['主借人签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='共同借款人', bottom='保证人1')
if signature_name['words'] == None:
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='共同借款人(抵押人)', bottom='保证人1')
# print(signature_name, signature_date)
self.init_result['page_7']['共借人签字']['签字'] = signature_name
self.init_result['page_7']['共借人签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='保证人1', bottom='保证人2')
self.init_result['page_7']['保证人1签字']['签字'] = signature_name
self.init_result['page_7']['保证人1签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='保证人2', bottom='在本人面前亲笔签署本合同')
self.init_result['page_7']['保证人2签字']['签字'] = signature_name
self.init_result['page_7']['保证人2签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='在本人面前亲笔签署本合同', bottom='以下无正文')
# print(signature_name, signature_date)
self.init_result['page_7']['见证人签字']['签字'] = signature_name
self.init_result['page_7']['见证人签字']['日期'] = signature_date
# 重新定制输出
new_results = {"is_asp": self.is_asp,
......
......@@ -6,6 +6,7 @@ class HMHRetriever:
def __init__(self):
self.words_str = 'words'
self.position_str = 'location'
self.fix_hava_str = '有'
self.default_position = [0, 0, 0, 0]
self.search_fields_list = [
('借款/承租人姓名', ''),
......@@ -51,7 +52,8 @@ class HMHRetriever:
for name_date_tuple in name_date_list:
if len(name_date_tuple) == 2:
result[self.search_fields_list[4][0]] = {
self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]),
# self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]),
self.words_str: self.fix_hava_str,
self.position_str: bbox
}
is_find_name_date = True
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!