diff --git a/票据理赔自动化/main.py b/票据理赔自动化/main.py index 79beaf4..1c62903 100644 --- a/票据理赔自动化/main.py +++ b/票据理赔自动化/main.py @@ -20,6 +20,7 @@ import cv2 import numpy from dateutil.parser import parse from jinja2 import Environment, FileSystemLoader +from jionlp import parse_location from zen import ZenDecision, ZenEngine from utils.client import Authenticator, HTTPClient @@ -28,666 +29,6 @@ from utils.client import Authenticator, HTTPClient # from utils.ocr import fuzzy_match -def bankcard_extraction(**kwargs) -> dict | None: - """银行卡数据提取""" - - # 影像件全局唯一标识:优先使用关键词变量,其次使用全局变量,再次使用随机唯一标识 - image_guid = kwargs.get( - "image_guid", globals().get("image_guid", uuid.uuid4().hex.upper()) - ) - - # 影像件格式 - image_format = kwargs.get("image_format", globals()["image_format"]) - if image_format is None: - raise RuntimeError("请入参:image_format") - - # 影像件BASE64编码 - image_base64 = kwargs.get("image_base64", globals()["image_base64"]) - if image_base64 is None: - raise RuntimeError("请入参:image_base64") - - # 请求深圳快瞳银行卡识别接口 - response = globals()["http_client"].post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/bankCard"), - headers={"X-RequestId-Header": image_guid}, - data={ - "token": globals()["authenticator"].get_token(servicer="szkt"), - "imgBase64": f"data:image/{image_format};base64,{image_base64}", - }, - guid=hashlib.md5((url + image_guid).encode("utf-8")).hexdigest().upper(), - ) - - # 若响应非成功,则返回NONE - if not (response.get("status") == 200 and response.get("code") == 0): - return None - - extraction = { - "卡类型": {"1": "借记卡", "2": "贷记卡"}.get( - response["data"]["bankCardType"], "其它" - ), # 0不能识别、3准贷记卡、4预付卡合并为其它 - "银行名称": response["data"]["bankInfo"], - "卡号": response["data"]["cardNo"].replace(" ", ""), - } - - return extraction - - -def invoice_extraction(**kwargs) -> dict | None: - """增值税发票/收费票据数据提取""" - - # 影像件全局唯一标识:优先使用关键词变量,其次使用全局变量,再次使用随机唯一标识 - image_guid = kwargs.get( - "image_guid", globals().get("image_guid", uuid.uuid4().hex.upper()) - ) - - # 影像件格式 - image_format = kwargs.get("image_format", globals()["image_format"]) - if image_format is None: - return None - - # 影像件BASE64编码 - image_base64 = kwargs.get("image_base64", globals()["image_base64"]) - if image_base64 is None: - return None - - try: - # 请求深圳快瞳票据查验接口(兼容增值税发票、医疗门诊/住院收费票据) - response = globals()["http_client"].post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll"), - headers={"X-RequestId-Header": image_guid}, - data={ - "token": globals()["authenticator"].get_token(servicer="szkt"), - "imgBase64": f"data:image/{image_format};base64,{image_base64}", - }, - guid=hashlib.md5((url + image_guid).encode("utf-8")).hexdigest().upper(), - ) - if not (response.get("status") == 200 and response.get("code") == 10000): - raise RuntimeError("深圳快瞳票据查验发生异常") - - match response["data"]["productCode"]: - case "003082": # 增值税发票 - extraction = { - "票据类型": { - "10108": "数电票", - "10101": "增值税普通发票", - "10100": "增值税专用发票", - "30100": "数电票", - "30101": "数电票", - "30104": "增值税专用发票", - "30105": "数电票", - "10106": "区块链电子发票", - "30109": "数电票", - "30121": "增值税普通发票", - "10102": "增值税普通发票", - "10103": "增值税普通发票", - "10107": "数电票", - }.get(response["data"]["type"], "其它增值税发票"), - "票据号码": response["data"]["details"]["number"], - "票据代码": ( - code if (code := response["data"]["details"]["code"]) else None - ), # 深圳快瞳票据查验中数电票票据代码为空字符,转为NONE - "开票日期": datetime.strptime( - response["data"]["details"]["date"], "%Y年%m月%d日" - ).strftime( - "%Y-%m-%d" - ), # 深圳快瞳票据查验中就增值税发票开票日期格式为%Y年%m月%d日,转为%Y-%m-%d - "校验码": response["data"]["details"]["check_code"], - "收款方": response["data"]["details"]["seller"], - "付款方": response["data"]["details"]["buyer"], - "票据金额": format( - Decimal(response["data"]["details"]["total"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "查验状态": ( - "真票" - if response["data"]["details"]["invoiceTypeNo"] == "0" - else "红票" - ), - "备注": ( - remark - if (remark := response["data"]["details"]["remark"]) - else None - ), # 深圳快瞳票据查验中增值税发票备注可能为空字符,转为NONE - "项目": [ - { - "名称": item["name"], - "规格": ( - specification - if (specification := item["specification"]) - else None - ), - "单位": unit if (unit := item["unit"]) else None, - "数量": ( - format( - Decimal(quantity).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if (quantity := item["quantity"]) - else None - ), - "金额": format( - ( - Decimal(item["total"]) + Decimal(item["tax"]) - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), # 价税合计 - } - for item in response["data"]["details"].get("items", []) - ], - } # 深圳快瞳票据查验中就部分增值税发票仅可查,数据标准化抛出异常 - return extraction - - case "003081": # 门诊/住院收费票据 - extraction = { - "票据类型": ( - "门诊收费票据" - if "门诊" in response["data"]["billName"] - else "住院收费票据" - ), - "票据号码": response["data"]["billNumber"], - "票据代码": response["data"]["billCode"], - "开票日期": response["data"][ - "invoiceDate" - ], # 深圳快瞳票据查验中就收费票据开票日期格式为%Y-%m-%d - "校验码": response["data"]["checkCode"], - "收款方": response["data"]["payeeName"], - "付款方": response["data"]["payer"], - "票据金额": format( - Decimal(response["data"]["amount"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "查验状态": {"true": "真票", "false": "红票"}[ - response["data"]["flushedRed"] - ], - "备注": response["data"].get("remark"), - "医保支付": format( - Decimal(response["data"].get("medicarePay", "0.00")).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "其它支付": format( - Decimal(response["data"].get("otherPayment", "0.00")).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "个人自付": format( - Decimal(response["data"].get("personalPay", "0.00")).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "自付一": format( - Decimal(response["data"].get("self_pay_one", "0.00")).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), # 深圳快瞳票据查验中就部分地区无自付一 - "自付二": format( - Decimal( - response["data"].get("classificationPays", "0.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), # 深圳快瞳票据查验中就部分地区无自付二 - "个人自费": format( - Decimal( - response["data"].get("personalExpense", "0.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "住院日期": ( - parse(date.split("-")[0]).strftime("%Y-%m-%d") - if (date := response["data"].get("hospitalizationDate")) - else None - ), # 深圳快瞳票据查验中就收费票据住院日期格式为%Y%m%d-%Y%m%d,即住院日期-出院日期 - "出院日期": ( - parse(date.split("-")[1]).strftime("%Y-%m-%d") if date else None - ), - "医疗机构类型": response["data"]["institutionsType"], - "项目": [ - { - "名称": item["itemName"], - "规格": item[ - "medical_level" - ], # 甲类无自付、乙类有自付、丙类全自付 - "单位": item["unit"], - "数量": format( - Decimal(item["number"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "金额": format( - Decimal(item["totalAmount"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - } - for item in response["data"]["feedetails"] - ], - } - return extraction - # 若请求深圳快瞳票据查验接口或解析发生异常,则根据影像件类型请求深圳快瞳增值税发票/收费票据识别接口 - except: - # 影像件类型 - image_type = kwargs.get("image_type", globals()["image_type"]) - if image_type is None: - return None - - match image_type: - case "增值税发票": - try: - # 请求深圳快瞳增值税发票识别接口 - response = globals()["http_client"].post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/vatInvoice"), - headers={"X-RequestId-Header": image_guid}, - data={ - "token": globals()["authenticator"].get_token( - servicer="szkt" - ), - "imgBase64": f"data:image/{image_format};base64,{image_base64}", - }, - guid=hashlib.md5((url + image_guid).encode("utf-8")) - .hexdigest() - .upper(), - ) - # 若深圳快瞳增值税发票识别响应非成功则返回NONE - if not ( - response.get("status") == 200 and response.get("code") == 0 - ): - return None - - extraction = { - "票据类型": ( - invoice_type := ( - data := { - item["desc"]: item["value"] - for item in response["data"] - } - ).get("发票类型") - ), - "票据号码": (number := data.get("发票号码")), - "票据代码": data.get("发票代码"), - "开票日期": ( - datetime.strptime(date, "%Y年%m月%d日").strftime("%Y-%m-%d") - if re.match( - r"\d{4}年\d{1,2}月\d{1,2}日", - (date := data.get("开票日期")), - ) - else date - ), - "校验码": ( - check_code if (check_code := data.get("校验码")) else number - ), # 若校验码为空则默认为票据号码 - "收款方": data.get("销售方名称"), - "付款方": data.get("购买方名称"), - "票据金额": format( - Decimal( - data.get("小写金额").replace("¥", "") - if invoice_type == "电子发票(普通发票)" - else data.get("合计金额(小写)") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "备注": remark if (remark := data.get("备注")) else None, - "项目": ( - [ - { - "名称": name, - "规格": specification if specification else None, - "单位": unit if unit else None, - "数量": ( - format( - Decimal(quantity).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if quantity - else None - ), - "金额": format( - (Decimal(amount) + Decimal(tax)).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", # 价税合计,保留两位小数 - ), - } - for name, specification, unit, quantity, amount, tax in zip( - [ - component["value"] - for component in response["data"] - if re.match( - r"^项目名称(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response["data"] - if re.match( - r"^规格型号(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response["data"] - if re.match( - r"^单位(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response["data"] - if re.match( - r"^数量(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response["data"] - if re.match( - r"^金额(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response["data"] - if re.match( - r"^税额(\d+)?$", - component["desc"], - ) - ], - ) - ] - if invoice_type == "电子发票(普通发票)" - else [ - { - "名称": name, - "数量": format( - Decimal(quantity).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "0.2f", - ), - "金额": format( - Decimal(amount).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - } - for name, quantity, amount in zip( - [ - component["value"] - for component in response["data"] - if re.match( - r"^项目名称明细(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response["data"] - if re.match( - r"^项目数量明细(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response["data"] - if re.match( - r"^项目金额明细(\d+)?$", - component["desc"], - ) - ], - ) - ] - ), - "查验状态": "无法查验", - } - return extraction - except: - return None - - case _: - try: - # 请求深圳快瞳收费票据识别接口 - response = globals()["http_client"].post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/medical"), - headers={"X-RequestId-Header": image_guid}, - data={ - "token": globals()["authenticator"].get_token( - servicer="szkt" - ), - "imgBase64": f"data:image/{image_format};base64,{image_base64}", - }, - guid=hashlib.md5((url + image_guid).encode("utf-8")) - .hexdigest() - .upper(), - ) - # 若深圳快瞳收费票据识别响应非成功则返回NONE - if not ( - response.get("status") == 200 and response.get("code") == 0 - ): - return None - - extraction = { - "票据类型": ( - "门诊收费票据" - if response["data"]["insured"]["receipt_outpatient"] - else "住院收费票据" - ), - "票据号码": ( - receipt := ( - response["data"]["insured"]["receipt_outpatient"] - or response["data"]["insured"][ - "receipt_hospitalization" - ] - )["receipts"][0] - )["receipt_no"][ - "value" - ], # 默认提取门诊/住院收费票据的第一张票据 - "票据代码": receipt["global_detail"]["invoice_code"]["value"], - "开票日期": receipt["global_detail"]["invoice_date"][ - "value" - ], # 深圳快瞳收费票据识别中就开票日期格式为%Y-%m-%d - "校验码": fuzzy_match( - target="校验码", - components=receipt["global_detail"]["region_specific"], - specify_key="name", - return_key="word.value", - ), - "收款方": receipt["hospital_name"]["value"], - "付款方": receipt["name"]["value"], - "票据金额": format( - Decimal(receipt["total_amount"]["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "医保支付": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance((field := receipt.get("medicare_pay")), dict) - else None - ), - "其它支付": format( - ( - Decimal(value).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ) - if ( - value := fuzzy_match( - target="其它支付", - components=receipt.get("global_detail", {}).get( - "pay_list", [] - ), - specify_key="name", - return_key="word.value", - ) - ) - else None - ), - ".2f", - ), - "个人自付": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance((field := receipt.get("self_pay")), dict) - else None - ), - "自付一": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance(field := (receipt.get("self_pay_one")), dict) - else None - ), - "自付二": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance(field := (receipt.get("self_pay_two")), dict) - else None - ), - "个人自费": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance(field := (receipt.get("self_cost")), dict) - else None - ), - "住院日期": ( - datetime.strptime(field["value"], "%Y%m%d").strftime( - "%Y-%m-%d" - ) - if isinstance(field := (receipt.get("starttime")), dict) - else None - ), - "出院日期": ( - datetime.strptime(field["value"], "%Y%m%d").strftime( - "%Y-%m-%d" - ) - if isinstance(field := (receipt.get("endtime")), dict) - else None - ), - "医疗机构类型": receipt["others"]["medical_institution_type"][ - "value" - ], - "项目": [ - { - "名称": ( - field["value"] - if isinstance((field := item["item_name"]), dict) - else None - ), - "规格": ( - field["value"] - if isinstance( - (field := item["specifications"]), dict - ) - else None - ), - "单位": ( - field["value"] - if isinstance((field := item["unit"]), dict) - else None - ), - "数量": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance((field := item["number"]), dict) - else None - ), - "金额": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance((field := item["total_amount"]), dict) - else None - ), - } - for item in receipt["feeitems"] - ], - "查验状态": "无法查验", - } - return extraction - except: - return None - - def common_extraction(**kwargs) -> dict | None: """通用数据提取""" @@ -1135,22 +476,27 @@ if __name__ == "__main__": return image_base64, image_type, image_orientation # noinspection PyShadowingNames - def image_recognize(image_guid, image_format, image_base64, image_type) -> None: + def image_recognize( + image_index, image_guid, image_format, image_base64, insurance_branch, image_type + ) -> None: """ 影像件识别并整合至赔案档案 + :param image_index: 影像件编号 :param image_guid: 影像件唯一标识 :param image_format: 影像件格式 :param image_base64: 影像件BASE64编码 + :param insurance_branch: 保险分公司 :param image_type: 影像件类型 :return: 空 """ + # TODO: 后续添加居民身份证(国徽面)和居民身份证(头像面)合并 # noinspection PyShadowingNames def idcard_recognize( image_guid, image_format, image_base64, image_type ) -> None: """ - 居民身份证别并整合至赔案档案 + 居民身份证识别并整合至赔案档案 :param image_guid: 影像件唯一标识 :param image_format: 影像件格式 :param image_base64: 影像件BASE64编码 @@ -1168,7 +514,7 @@ if __name__ == "__main__": servicer="szkt" ), # 获取深圳快瞳访问令牌 "imgBase64": f"data:image/{image_format.lstrip(".")};base64,{image_base64}", - }, # 支持同时识别居民身份证正反面 + }, # 深圳快瞳支持同时识别居民国徽面和头像面 guid=md5((url + image_guid).encode("utf-8")).hexdigest().upper(), ) # 若响应非成功则抛出异常 @@ -1176,60 +522,773 @@ if __name__ == "__main__": if not (response.get("status") == 200 and response.get("code") == 0): raise RuntimeError("请求深圳快瞳居民身份证识别接口发生异常") - if image_type in ["居民身份证(正背面)", "居民身份证(正面)"]: - dossier["赔案层"]["申请人信息"].update( + if image_type in ["居民身份证(国徽、头像面)", "居民身份证(国徽面)"]: + # noinspection PyTypeChecker + dossier["出险人层"].update( { - "证件有效期起": datetime.strptime( - extraction["有效期起"], "%Y-%m-%d" + "有效期起": parse( + (period := response["data"]["validDate"].split("-"))[0] + ).strftime( + "%Y-%m-%d" + ), # 就有效期限解析为有效期起和有效期止。其中,若有效期止为长期则默认为9999-12-31 + "有效期止": ( + datetime(9999, 12, 31).strftime("%Y-%m-%d") + if period[1] == "长期" + else parse(period[1]).strftime("%Y-%m-%d") ), - "证件有效期止": ( - date - if (date := extraction["有效期止"]) == "长期" - else datetime.strptime(date, "%Y-%m-%d") - ), # 若证件有效期止为NONE默认为“长期”, } - ) # 原则上由影像件数据提取环节负责数据标准化,赔案档案数据填充环节负责数据机构化 + ) - if image_type in ["居民身份证(正背面)", "居民身份证(背面)"]: - dossier["赔案层"]["申请人信息"].update( + if image_type in ["居民身份证(国徽、头像面)", "居民身份证(头像面)"]: + # noinspection PyTypeChecker + dossier["出险人层"].update( { - "姓名": extraction["姓名"], + "姓名": response["data"]["name"], "证件类型": "居民身份证", - "证件号码": extraction["公民身份号码"], - "性别": extraction["性别"], + "证件号码": response["data"]["idNo"], + "性别": response["data"]["sex"], "出生": datetime.strptime( - extraction["出生"], "%Y-%m-%d" - ), # 默认日期格式为%Y-%m-%d - "省": (address := parse_location(extraction["住址"])).get( + response["data"]["birthday"], "%Y-%m-%d" + ), # 深圳快瞳居民身份证识别接口中出生由字符串(%Y.%m.%d)转为日期,日期格式默认为%Y-%m-%d + "省": ( + address := parse_location(response["data"]["address"]) + ).get( "province" - ), + ), # 就住址解析为省、地、县和详细地址 "地": address.get("city"), "县": address.get("county"), "详细地址": address.get("detail"), } ) - extraction = { - "姓名": response["data"]["name"], - "性别": response["data"]["sex"], - "民族": response["data"]["nation"], - "出生": response["data"][ - "birthday" - ], # 深圳快瞳居民身份证出生日期格式为%Y-%m-%d - "住址": response["data"]["address"], - "公民身份号码": response["data"]["idNo"], - "签发机关": response["data"]["issuedBy"], - "有效期起": parse( - (date := response["data"]["validDate"]).split("-")[0] - ).strftime( - "%Y-%m-%d" - ), # 深圳快瞳居民身份证识别中有效期日期格式为%Y.%m.%d,转为%Y-%m-%d - "有效期止": ( - date - if (date := date.split("-")[1]) == "长期" - else parse(date).strftime("%Y-%m-%d") - ), + # noinspection PyShadowingNames + def bankcard_recognize(image_guid, image_format, image_base64) -> None: + """ + 银行卡识别并整合至赔案档案 + :param image_guid: 影像件唯一标识 + :param image_format: 影像件格式 + :param image_base64: 影像件BASE64编码 + :return: 空 + """ + # 请求深圳快瞳居民身份证识别接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/bankCard"), + headers={"X-RequestId-Header": image_guid}, + data={ + "token": authenticator.get_token( + servicer="szkt" + ), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image_format.lstrip(".")};base64,{image_base64}", + }, + guid=md5((url + image_guid).encode("utf-8")).hexdigest().upper(), + ) + # 若响应非成功或银行卡类型非借记卡则抛出异常 + # TODO: 若响应非成功则流转至人工处理 + if not ( + response.get("status") == 200 + and response.get("code") == 0 + and response.get("data", {}).get("bankCardType") == 1 + ): + raise RuntimeError( + "请求深圳快瞳居民身份证识别接口发生异常或已识别非借记卡" + ) + + # noinspection PyTypeChecker + dossier["受益人层"].update( + { + "开户行": response["data"]["bankInfo"], + "户名": None, + "户号": response["data"]["cardNo"].replace(" ", ""), + } + ) + + # noinspection PyShadowingNames + def receipt_recognize( + image_index, image_guid, image_format, image_base64, image_type + ) -> None: + """ + 票据识别并整合至赔案档案 + :param image_index: 影像件编号 + :param image_guid: 影像件唯一标识 + :param image_format: 影像件格式 + :param image_base64: 影像件BASE64编码 + :param image_type: 影像件类型 + :return: 空 + """ + # 初始化票据数据 + receipt = {"影像件编号": image_index} + + # 请求深圳快瞳票据查验接口(兼容增值税发票、医疗门诊/住院收费票据) + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll"), + headers={"X-RequestId-Header": image_guid}, + data={ + "token": authenticator.get_token(servicer="szkt"), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image_format.lstrip(".")};base64,{image_base64}", + }, + guid=md5((url + image_guid).encode("utf-8")) + .hexdigest() + .upper(), + ) + # 若查验为真票或红票则直接整合至赔案档案 + if response.get("status") == 200 and response.get("code") == 10000: + pass + + # 若查验为假票或无法查验 + else: + if response.get("status") == 400 and (response.get("code") == 10100 or response.get("code") == 10001): + receipt["查验结果"] = "假票" + else: + receipt["查验结果"] = "无法查验" + + + + match response["data"]["productCode"]: + case "003082": # 增值税发票 + extraction = { + "票据类型": { + "10108": "数电票", + "10101": "增值税普通发票", + "10100": "增值税专用发票", + "30100": "数电票", + "30101": "数电票", + "30104": "增值税专用发票", + "30105": "数电票", + "10106": "区块链电子发票", + "30109": "数电票", + "30121": "增值税普通发票", + "10102": "增值税普通发票", + "10103": "增值税普通发票", + "10107": "数电票", + }.get(response["data"]["type"], "其它增值税发票"), + "票据号码": response["data"]["details"]["number"], + "票据代码": ( + code + if (code := response["data"]["details"]["code"]) + else None + ), # 深圳快瞳票据查验中数电票票据代码为空字符,转为NONE + "开票日期": datetime.strptime( + response["data"]["details"]["date"], "%Y年%m月%d日" + ).strftime( + "%Y-%m-%d" + ), # 深圳快瞳票据查验中就增值税发票开票日期格式为%Y年%m月%d日,转为%Y-%m-%d + "校验码": response["data"]["details"]["check_code"], + "收款方": response["data"]["details"]["seller"], + "付款方": response["data"]["details"]["buyer"], + "票据金额": format( + Decimal(response["data"]["details"]["total"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "查验状态": ( + "真票" + if response["data"]["details"]["invoiceTypeNo"] == "0" + else "红票" + ), + "备注": ( + remark + if (remark := response["data"]["details"]["remark"]) + else None + ), # 深圳快瞳票据查验中增值税发票备注可能为空字符,转为NONE + "项目": [ + { + "名称": item["name"], + "规格": ( + specification + if (specification := item["specification"]) + else None + ), + "单位": unit if (unit := item["unit"]) else None, + "数量": ( + format( + Decimal(quantity).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if (quantity := item["quantity"]) + else None + ), + "金额": format( + ( + Decimal(item["total"]) + + Decimal(item["tax"]) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), # 价税合计 + } + for item in response["data"]["details"].get("items", []) + ], + } # 深圳快瞳票据查验中就部分增值税发票仅可查,数据标准化抛出异常 + return extraction + + case "003081": # 门诊/住院收费票据 + extraction = { + "票据类型": ( + "门诊收费票据" + if "门诊" in response["data"]["billName"] + else "住院收费票据" + ), + "票据号码": response["data"]["billNumber"], + "票据代码": response["data"]["billCode"], + "开票日期": response["data"][ + "invoiceDate" + ], # 深圳快瞳票据查验中就收费票据开票日期格式为%Y-%m-%d + "校验码": response["data"]["checkCode"], + "收款方": response["data"]["payeeName"], + "付款方": response["data"]["payer"], + "票据金额": format( + Decimal(response["data"]["amount"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "查验状态": {"true": "真票", "false": "红票"}[ + response["data"]["flushedRed"] + ], + "备注": response["data"].get("remark"), + "医保支付": format( + Decimal( + response["data"].get("medicarePay", "0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "其它支付": format( + Decimal( + response["data"].get("otherPayment", "0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "个人自付": format( + Decimal( + response["data"].get("personalPay", "0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "自付一": format( + Decimal( + response["data"].get("self_pay_one", "0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), # 深圳快瞳票据查验中就部分地区无自付一 + "自付二": format( + Decimal( + response["data"].get("classificationPays", "0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), # 深圳快瞳票据查验中就部分地区无自付二 + "个人自费": format( + Decimal( + response["data"].get("personalExpense", "0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "住院日期": ( + parse(date.split("-")[0]).strftime("%Y-%m-%d") + if (date := response["data"].get("hospitalizationDate")) + else None + ), # 深圳快瞳票据查验中就收费票据住院日期格式为%Y%m%d-%Y%m%d,即住院日期-出院日期 + "出院日期": ( + parse(date.split("-")[1]).strftime("%Y-%m-%d") + if date + else None + ), + "医疗机构类型": response["data"]["institutionsType"], + "项目": [ + { + "名称": item["itemName"], + "规格": item[ + "medical_level" + ], # 甲类无自付、乙类有自付、丙类全自付 + "单位": item["unit"], + "数量": format( + Decimal(item["number"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "金额": format( + Decimal(item["totalAmount"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + } + for item in response["data"]["feedetails"] + ], + } + return extraction + # 若请求深圳快瞳票据查验接口或解析发生异常,则根据影像件类型请求深圳快瞳增值税发票/收费票据识别接口 + except: + # 影像件类型 + image_type = kwargs.get("image_type", globals()["image_type"]) + if image_type is None: + return None + + match image_type: + case "增值税发票": + try: + # 请求深圳快瞳增值税发票识别接口 + response = globals()["http_client"].post( + url=( + url := "https://ai.inspirvision.cn/s/api/ocr/vatInvoice" + ), + headers={"X-RequestId-Header": image_guid}, + data={ + "token": globals()["authenticator"].get_token( + servicer="szkt" + ), + "imgBase64": f"data:image/{image_format};base64,{image_base64}", + }, + guid=hashlib.md5((url + image_guid).encode("utf-8")) + .hexdigest() + .upper(), + ) + # 若深圳快瞳增值税发票识别响应非成功则返回NONE + if not ( + response.get("status") == 200 + and response.get("code") == 0 + ): + return None + + extraction = { + "票据类型": ( + invoice_type := ( + data := { + item["desc"]: item["value"] + for item in response["data"] + } + ).get("发票类型") + ), + "票据号码": (number := data.get("发票号码")), + "票据代码": data.get("发票代码"), + "开票日期": ( + datetime.strptime(date, "%Y年%m月%d日").strftime( + "%Y-%m-%d" + ) + if re.match( + r"\d{4}年\d{1,2}月\d{1,2}日", + (date := data.get("开票日期")), + ) + else date + ), + "校验码": ( + check_code + if (check_code := data.get("校验码")) + else number + ), # 若校验码为空则默认为票据号码 + "收款方": data.get("销售方名称"), + "付款方": data.get("购买方名称"), + "票据金额": format( + Decimal( + data.get("小写金额").replace("¥", "") + if invoice_type == "电子发票(普通发票)" + else data.get("合计金额(小写)") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "备注": ( + remark if (remark := data.get("备注")) else None + ), + "项目": ( + [ + { + "名称": name, + "规格": ( + specification if specification else None + ), + "单位": unit if unit else None, + "数量": ( + format( + Decimal(quantity).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if quantity + else None + ), + "金额": format( + ( + Decimal(amount) + Decimal(tax) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", # 价税合计,保留两位小数 + ), + } + for name, specification, unit, quantity, amount, tax in zip( + [ + component["value"] + for component in response["data"] + if re.match( + r"^项目名称(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^规格型号(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^单位(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^数量(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^金额(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^税额(\d+)?$", + component["desc"], + ) + ], + ) + ] + if invoice_type == "电子发票(普通发票)" + else [ + { + "名称": name, + "数量": format( + Decimal(quantity).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "0.2f", + ), + "金额": format( + Decimal(amount).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + } + for name, quantity, amount in zip( + [ + component["value"] + for component in response["data"] + if re.match( + r"^项目名称明细(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^项目数量明细(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^项目金额明细(\d+)?$", + component["desc"], + ) + ], + ) + ] + ), + "查验状态": "无法查验", + } + return extraction + except: + return None + + case _: + try: + # 请求深圳快瞳收费票据识别接口 + response = globals()["http_client"].post( + url=( + url := "https://ai.inspirvision.cn/s/api/ocr/medical" + ), + headers={"X-RequestId-Header": image_guid}, + data={ + "token": globals()["authenticator"].get_token( + servicer="szkt" + ), + "imgBase64": f"data:image/{image_format};base64,{image_base64}", + }, + guid=hashlib.md5((url + image_guid).encode("utf-8")) + .hexdigest() + .upper(), + ) + # 若深圳快瞳收费票据识别响应非成功则返回NONE + if not ( + response.get("status") == 200 + and response.get("code") == 0 + ): + return None + + extraction = { + "票据类型": ( + "门诊收费票据" + if response["data"]["insured"]["receipt_outpatient"] + else "住院收费票据" + ), + "票据号码": ( + receipt := ( + response["data"]["insured"][ + "receipt_outpatient" + ] + or response["data"]["insured"][ + "receipt_hospitalization" + ] + )["receipts"][0] + )["receipt_no"][ + "value" + ], # 默认提取门诊/住院收费票据的第一张票据 + "票据代码": receipt["global_detail"]["invoice_code"][ + "value" + ], + "开票日期": receipt["global_detail"]["invoice_date"][ + "value" + ], # 深圳快瞳收费票据识别中就开票日期格式为%Y-%m-%d + "校验码": fuzzy_match( + target="校验码", + components=receipt["global_detail"][ + "region_specific" + ], + specify_key="name", + return_key="word.value", + ), + "收款方": receipt["hospital_name"]["value"], + "付款方": receipt["name"]["value"], + "票据金额": format( + Decimal(receipt["total_amount"]["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "医保支付": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + (field := receipt.get("medicare_pay")), dict + ) + else None + ), + "其它支付": format( + ( + Decimal(value).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + if ( + value := fuzzy_match( + target="其它支付", + components=receipt.get( + "global_detail", {} + ).get("pay_list", []), + specify_key="name", + return_key="word.value", + ) + ) + else None + ), + ".2f", + ), + "个人自付": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + (field := receipt.get("self_pay")), dict + ) + else None + ), + "自付一": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + field := (receipt.get("self_pay_one")), dict + ) + else None + ), + "自付二": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + field := (receipt.get("self_pay_two")), dict + ) + else None + ), + "个人自费": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + field := (receipt.get("self_cost")), dict + ) + else None + ), + "住院日期": ( + datetime.strptime( + field["value"], "%Y%m%d" + ).strftime("%Y-%m-%d") + if isinstance( + field := (receipt.get("starttime")), dict + ) + else None + ), + "出院日期": ( + datetime.strptime( + field["value"], "%Y%m%d" + ).strftime("%Y-%m-%d") + if isinstance( + field := (receipt.get("endtime")), dict + ) + else None + ), + "医疗机构类型": receipt["others"][ + "medical_institution_type" + ]["value"], + "项目": [ + { + "名称": ( + field["value"] + if isinstance( + (field := item["item_name"]), dict + ) + else None + ), + "规格": ( + field["value"] + if isinstance( + (field := item["specifications"]), dict + ) + else None + ), + "单位": ( + field["value"] + if isinstance((field := item["unit"]), dict) + else None + ), + "数量": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + (field := item["number"]), dict + ) + else None + ), + "金额": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + (field := item["total_amount"]), dict + ) + else None + ), + } + for item in receipt["feeitems"] + ], + "查验状态": "无法查验", + } + return extraction + except: + return None + + # 影像件识别使能检查,若影像件不识别则跳过 + if not recognition_enable.evaluate( + { + "insurance_branch": insurance_branch, + "image_type": image_type, } + )["result"]["recognition_enable"]: + return # 根据影像件类型匹配影像件识别方法 match image_type: @@ -1239,7 +1298,18 @@ if __name__ == "__main__": case ( "居民身份证(国徽、头像面)" | "居民身份证(国徽面)" | "居民身份证(头像面)" ): - idcard_recognize(image_guid, image_format, image_base64) + # 居民身份证识别并整合至赔案档案 + idcard_recognize(image_guid, image_format, image_base64, image_type) + # TODO: 后续添加居民户口簿识别和整合方法 + case "中国港澳台地区及境外护照": + raise RuntimeError("暂不支持中国港澳台地区及境外护照") + case "银行卡": + # 银行卡识别并整合至赔案档案 + bankcard_recognize(image_guid, image_format, image_base64) + # TODO: 暂仅支持增值税发票识别和购药及就医类型为药店购药整合至赔案档案,后续逐步添加 + case "增值税发票" | "门诊收费票据" | "住院收费票据": + # 票据识别并整合至赔案档案 + receipt_recognize(image_guid, image_format, image_base64, image_type) # 遍历工作目录中赔案目录并创建赔案档案(模拟自动化域就待自动化任务创建理赔档案) for case_path in [x for x in directory_path.iterdir() if x.is_dir()]: @@ -1253,6 +1323,8 @@ if __name__ == "__main__": "赔案号": (case_number := case_path.stem), # 设定:赔案目录名称为赔案号 }, "影像件层": [], + "出险人层": {}, + "受益人层": {}, } # 遍历赔案目录中影像件路径对象 @@ -1270,6 +1342,7 @@ if __name__ == "__main__": # 初始化影像件数据 image = { "原始影像件": { + "影像件编号": image_index, "影像件地址": image_path.as_posix(), # 将影像件路径对象转为字符串 "影像件名称": (image_name := image_path.stem), "影像件格式": (image_format := image_path.suffix.lower()), @@ -1292,18 +1365,13 @@ if __name__ == "__main__": image["影像件类型"] = image_type image["影像件方向"] = image_orientation # 将影像件数据添加至影像件层 + # TODO: 若影像件类型为居民身份证(国徽面)和居民身份证(头像面)需合并 dossier["影像件层"].append(image) - # 影像件识别使能检查,若影像件不识别则跳过 - if not recognition_enable.evaluate( - { - "insurance_branch": insurance_branch, - "image_type": image_type, - } - )["result"]["recognition_enable"]: - continue # 影像件识别并整合至赔案档案 - image_recognize(image_guid, image_format, image_base64, image_type) + image_recognize( + image_index, image_guid, image_format, image_base64, insurance_branch, image_type + ) """ @@ -1313,19 +1381,6 @@ if __name__ == "__main__": - case "银行卡": - extraction = bankcard_extraction() - # 若发生异常则跳过该影像件 - if extraction is None: - dossier["影像件层"][-1]["已识别"] = "否,无法识别" - continue - - dossier["赔案层"]["受益人信息"].update( - { - "开户银行": extraction["银行名称"], - "银行账号": extraction["卡号"], - } - ) case "增值税发票" | "门诊收费票据" | "住院收费票据": extraction = invoice_extraction()