diff --git a/票据理赔自动化/directory/254728869001/invoice_11.jpg b/票据理赔自动化/directory/254728869001/invoice_11.jpg deleted file mode 100644 index 2f166c8..0000000 Binary files a/票据理赔自动化/directory/254728869001/invoice_11.jpg and /dev/null differ diff --git a/票据理赔自动化/main.py b/票据理赔自动化/main.py index 05cfde6..16e4296 100644 --- a/票据理赔自动化/main.py +++ b/票据理赔自动化/main.py @@ -1,11 +1,10 @@ # -*- coding: utf-8 -*- """ -根据现普康票据理赔自动化最小化实现 +基于普康票据理赔自动化最小化实现 功能清单 https://liubiren.feishu.cn/docx/WFjTdBpzroUjQvxxrNIcKvGnneh?from=from_copylink """ - import json import re from base64 import b64encode @@ -17,16 +16,13 @@ from typing import Any, Dict, List, Optional, Tuple import cv2 import numpy -from dateutil.parser import parse +from fuzzywuzzy import fuzz from jinja2 import Environment, FileSystemLoader from jionlp import parse_location from zen import ZenDecision, ZenEngine from utils.client import Authenticator, HTTPClient, SQLiteClient -# from utils.ocr import fuzzy_match - - # ------------------------- # 主逻辑 # ------------------------- @@ -128,7 +124,7 @@ if __name__ == "__main__": # noinspection PyShadowingNames def query_institution_type(self, institution: str) -> Optional[str]: """ - 查询并获取单条购药及就医机构类型 + 根据购药及就医机构查询购药及就医机构类型 :param institution: 购药及就医机构 :return: 购药及就医机构类型 """ @@ -144,9 +140,10 @@ if __name__ == "__main__": """, parameters=(institution,), ) - return ( - None if result is None else result["institution_type"] - ) # 返回购药及就医机构类型 + # TODO: 若购药及就医机构类型为空值则流转至主数据人工处理 + if result is None: + raise RuntimeError("购药及就医机构类型为空值") + return result["institution_type"] except Exception as exception: raise RuntimeError( "查询并获取单条购药及就医机构类型发生异常" @@ -561,7 +558,7 @@ if __name__ == "__main__": """ # noinspection PyShadowingNames - def mlm_recognize(image, schema) -> Dict[str, Any]: + def mlm_recognize(image, schema) -> Optional[Dict[str, Any]]: """ 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果 :param image: 影像件 @@ -619,9 +616,14 @@ if __name__ == "__main__": .hexdigest() .upper(), ) - # 就消息内容JSON反序列化 - # noinspection PyTypeChecker - return json.loads(response["choices"][0]["message"]["content"]) + + # 尝试就响应中消息内容JSON反序列化 + # noinspection PyBroadException + try: + # noinspection PyTypeChecker + return json.loads(response["choices"][0]["message"]["content"]) + except: + return None # noinspection PyShadowingNames def boc_application_recognize(image: str) -> None: @@ -728,25 +730,23 @@ if __name__ == "__main__": "additionalProperties": False, # 禁止就识别结果的JSON结构新增属性 } - # noinspection PyBroadException - try: - # 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果 - recognition = mlm_recognize(image, schema) - - dossier["受益人层"].update( - { - "开户行": recognition["开户银行"], - "户名": recognition["户名"], - "户号": recognition["账号"], - "手机号": recognition["手机"], - } - ) - except Exception: - # TODO: 若非成功则流转至人工处理 + # 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果 + recognition = mlm_recognize(image, schema) + # TODO: 若非成功则流转至人工处理 + if recognition is None: raise RuntimeError( "就中银保险有限公司的理赔申请书识别并整合至赔案档案发生异常" ) + dossier["受益人层"].update( + { + "开户行": recognition["开户银行"], + "户名": recognition["户名"], + "户号": recognition["账号"], + "手机号": recognition["手机"], + } + ) + # 根据保险分公司匹配结构化识别文本方法 match insurer_company: # 中银保险有限公司 @@ -760,12 +760,142 @@ if __name__ == "__main__": :param image: 影像件 :return: 空 """ + + # noinspection PyShadowingNames + def query_value(contents: list, key: str) -> Optional[str]: + """ + 就识别结果,根据指定键名查询值 + :param contents: 识别结果 + :param key: 指定键名 + :return 值 + 需要匹配的键名的键值 + """ + # 若识别结果为空列表则返回None + if not contents: + return None + + # noinspection PyInconsistentReturns + match contents[0].keys(): + # 对应深圳快瞳增值税发票识别结果 + case _ if "desc" in contents[0].keys(): + # 遍历识别结果,若内容的键名为指定键名则返回值 + for content in contents: + if content["desc"] == key: + return content["value"] if content["value"] else None + + candidates = [] + # 基于加权补偿的莱文斯坦距离算法计算所有内容的键名和指定键名的相似度 + for content in contents: + candidates.append( + ( + content["value"], + fuzz.WRatio( + content["desc"], key, force_ascii=False + ), + ) + ) + + # 返回最大相似度的值 + return ( + (result[0] if result[0] else None) + if (result := max(candidates, key=lambda x: x[1]))[1] >= 80 + else None + ) + # 对应深圳快瞳医疗收费票据识别结果 + case _ if "name" in contents[0].keys(): + # 遍历识别结果,若内容的键名为指定键名则返回值 + for content in contents: + if content["name"] == key: + return ( + content["word"]["value"] + if content["word"]["value"] + else None + ) + + candidates = [] + # 基于加权补偿的莱文斯坦距离算法计算所有内容的键名和指定键名的相似度 + for content in contents: + candidates.append( + ( + content["word"]["value"], + fuzz.WRatio( + content["name"], key, force_ascii=False + ), + ) + ) + + # 返回最大相似度的值 + return ( + (result[0] if result[0] else None) + if (result := max(candidates, key=lambda x: x[1]))[1] >= 80 + else None + ) + + def parse_items(contents): + """ + 就识别结果,解析明细项 + :param contents: 识别结果 + :return 解析后的明细项 + """ + # noinspection PyInconsistentReturns + match contents[0].keys(): + case _ if "desc" in contents[0].keys(): + # noinspection PyTypeChecker + return [ + { + "名称": name, + "数量": Decimal(quantity).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "金额": (Decimal(amount) + Decimal(tax)).quantize( + Decimal("0.00"), rounding=ROUND_HALF_UP + ), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 + } + for name, quantity, amount, tax in zip( + [ + x["value"] + for x in contents + if re.match( + r"^项目名称(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in contents + if re.match( + r"^数量(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in contents + if re.match( + r"^金额(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in contents + if re.match( + r"^税额(\d+)?$", + x["desc"], + ) + ], + ) + ] + # 初始化票据数据 receipt = {"影像件编号": image["影像件编号"]} - # 先请求深圳快瞳票据查验接口(兼容增值税发票、医疗门诊/住院收费票据) + # 请求深圳快瞳票据查验接口(兼容增值税发票、医疗门诊/住院收费票据) response = http_client.post( url=(url := "https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll"), - headers={"X-RequestId-Header": image["影像件唯一标识"]}, + headers={ + "X-RequestId-Header": image["影像件唯一标识"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 data={ "token": authenticator.get_token( servicer="szkt" @@ -776,7 +906,7 @@ if __name__ == "__main__": .hexdigest() .upper(), ) - # 若查验成功则直接整合至赔案档案 + # 若查验结果为真票或红票则直接整合至赔案档案 if response.get("status") == 200 and response.get("code") == 10000: # noinspection PyTypeChecker match response["data"]["productCode"]: @@ -796,10 +926,10 @@ if __name__ == "__main__": response["data"]["details"]["code"] if response["data"]["details"]["code"] else None - ), # 全电发票无发票代码,深圳快瞳票据查验接口中票据代码由空字符转为None + ), "开票日期": datetime.strptime( response["data"]["details"]["date"], "%Y年%m月%d日" - ), # 深圳快瞳票据查验接口中开票日期由字符串转为日期 + ), # 深圳快瞳票据查验接口中开票日期由字符串转为datetime对象 "校验码": response["data"]["details"]["check_code"], "开票金额": Decimal( response["data"]["details"]["total"] @@ -807,68 +937,37 @@ if __name__ == "__main__": Decimal("0.00"), rounding=ROUND_HALF_UP, ), # 深圳快瞳票据查验接口中开票金额由字符串转为Decimal,保留两位小数 - "出险人": response["data"]["details"]["buyer"], - "购药及就医机构": ( - institution := response["data"]["details"]["seller"] + "姓名": response["data"]["details"]["buyer"], + "购药及就医机构": response["data"]["details"]["seller"], + "明细项": [ + { + "名称": item["name"], + "数量": ( + Decimal(item["quantity"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + if item["quantity"] + else Decimal("0.00") + ), # 深圳快瞳票据查验接口中明细单位由空字符转为None,若非空字符由字符串转为Decimal,保留两位小数 + "金额": ( + Decimal(item["total"]) + + Decimal(item["tax"]) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 深圳快瞳票据查验接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 + } + for item in response["data"]["details"]["items"] + ], + "备注": ( + response["data"]["details"]["remark"] + if response["data"]["details"]["remark"] + else None ), } ) - - "费项层": [ - { - "名称": item["name"], - "规格": ( - item["specification"] - if item["specification"] - else None - ), # 深圳快瞳票据查验接口中明细规则由空字符转为None - "单位": ( - item["unit"] if item["unit"] else None - ), # 深圳快瞳票据查验接口中明细单位由空字符转为None - "数量": ( - Decimal(item["quantity"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ) - if item["quantity"] - else None - ), # 深圳快瞳票据查验接口中明细单位由空字符转为None,若非空字符由字符串转为Decimal,保留两位小数 - "金额": ( - Decimal(item["total"]) - + Decimal(item["tax"]) - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 深圳快瞳票据查验接口中明细金额税额由字符串转为Decimal,保留两位小数,求和 - } - for item in response["data"]["details"].get( - "items", [] - ) - ], - - # 查询并获取单条购药及就医机构类型 - institution_type = master_data.query_institution_type( - institution - ) - # TODO: 若查询并获取单条购药及就医机构类型发生异常则流转至主数据人工处理 - if institution_type is None: - raise RuntimeError( - "查询并获取单条购药及就医机构类型发生异常" - ) - receipt["购药及就医机构类型"] = institution_type - - # 根据购药及就医机构类型匹配处理方法 - match institution_type: - # 若购药及就医机构类型为药店,则根据 - case "药店": - pass - case "私立医院": - - pass - case _: - raise RuntimeError("") - - # 门诊/住院收费票据 + # 医疗门诊、住院收费票据 case "003081": # noinspection PyTypeChecker receipt.update( @@ -878,19 +977,33 @@ if __name__ == "__main__": if response["data"]["flushedRed"] == "true" else "红票" ), - "票据号码": response["data"]["billNumber"], - "票据代码": ( - response["data"]["billCode"] - if response["data"]["billCode"] + "票据号": response["data"]["billNumber"], + "票据代码": response["data"]["billCode"], + "开票日期": datetime.strptime( + response["data"]["invoiceDate"], "%Y-%m-%d %H:%M:%S" + ), # 深圳快瞳票据查验接口中开票日期由字符串转为datetime对象 + "入院日期": ( + datetime.strptime( + response["data"]["hospitalizationDate"].split( + "-" + )[0], + "%Y%m%d", + ) + if response["data"]["hospitalizationDate"] else None - ), # 部分地区医疗收费票据无发票代码,深圳快瞳票据查验接口中票据代码由空字符转为None - "开票日期": parse( - response["data"]["invoiceDate"] - ).strftime( - "%Y-%m-%d" - ), # 深圳快瞳票据查验接口中开票日期由字符串(%Y-%m-%d)转为日期 + ), # 深圳快瞳票据查验接口中住院日期解析为入院日期和出院日期 + "出院日期": ( + datetime.strptime( + response["data"]["hospitalizationDate"].split( + "-" + )[1], + "%Y%m%d", + ) + if response["data"]["hospitalizationDate"] + else None + ), "校验码": response["data"]["checkCode"], - "票据金额": Decimal( + "开票金额": Decimal( response["data"]["amount"] ).quantize( Decimal("0.00"), @@ -900,546 +1013,310 @@ if __name__ == "__main__": "购药及就医机构": response["data"][ "receivablesInstitution" ], - "医保支付": format( - Decimal( - response["data"].get("medicarePay", "0.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "其它支付": format( - Decimal( - response["data"].get("otherPayment", "0.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "个人自付": format( - Decimal( - response["data"].get("personalPay", "0.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "自付一": format( - Decimal( - response["data"].get("self_pay_one", "0.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), # 深圳快瞳票据查验中就部分地区无自付一 - "自付二": format( - Decimal( - response["data"].get( - "classificationPays", "0.00" - ) - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), # 深圳快瞳票据查验中就部分地区无自付二 - "个人自费": format( - Decimal( - response["data"].get("personalExpense", "0.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "住院日期": ( - parse(date.split("-")[0]).strftime("%Y-%m-%d") - if ( - date := response["data"].get( - "hospitalizationDate" - ) - ) - else None - ), # 深圳快瞳票据查验中就收费票据住院日期格式为%Y%m%d-%Y%m%d,即住院日期-出院日期 - "出院日期": ( - parse(date.split("-")[1]).strftime("%Y-%m-%d") - if date - else None - ), - "医疗机构类型": response["data"]["institutionsType"], - "项目": [ + "明细项": [ { "名称": item["itemName"], - "规格": item[ - "medical_level" - ], # 甲类无自付、乙类有自付、丙类全自付 - "单位": item["unit"], - "数量": format( - Decimal(item["number"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", + "数量": Decimal(item["number"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, ), - "金额": format( - Decimal(item["totalAmount"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", + "金额": Decimal(item["totalAmount"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, ), } - for item in response["data"]["feedetails"] + for item in response["data"]["feeitems"] ], - } - ) - # 若查验为假票或无法查验则 - else: - if response.get("status") == 400 and ( - response.get("code") == 10100 or response.get("code") == 10001 - ): - receipt["查验结果"] = "假票" - else: - receipt["查验结果"] = "无法查验" - - try: - - match image_type: - case "增值税发票": - try: - # 请求深圳快瞳增值税发票识别接口 - response = globals()["http_client"].post( - url=( - url := "https://ai.inspirvision.cn/s/api/ocr/vatInvoice" - ), - headers={"X-RequestId-Header": image_guid}, - data={ - "token": globals()[ - "authenticator" - ].get_token(servicer="szkt"), - "imgBase64": f"data:image/{image_format};base64,{image_base64}", - }, - guid=hashlib.md5( - (url + image_guid).encode("utf-8") - ) - .hexdigest() - .upper(), - ) - # 若深圳快瞳增值税发票识别响应非成功则返回None - if not ( - response.get("status") == 200 - and response.get("code") == 0 - ): - return None - - extraction = { - "票据类型": ( - invoice_type := ( - data := { - item["desc"]: item["value"] - for item in response["data"] - } - ).get("发票类型") - ), - "票据号码": (number := data.get("发票号码")), - "票据代码": data.get("发票代码"), - "开票日期": ( - datetime.strptime( - date, "%Y年%m月%d日" - ).strftime("%Y-%m-%d") - if re.match( - r"\d{4}年\d{1,2}月\d{1,2}日", - (date := data.get("开票日期")), - ) - else date - ), - "校验码": ( - check_code - if (check_code := data.get("校验码")) - else number - ), # 若校验码为空则默认为票据号码 - "收款方": data.get("销售方名称"), - "付款方": data.get("购买方名称"), - "票据金额": format( - Decimal( - data.get("小写金额").replace("¥", "") - if invoice_type == "电子发票(普通发票)" - else data.get("合计金额(小写)") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - "备注": ( - remark - if (remark := data.get("备注")) - else None - ), - "项目": ( - [ - { - "名称": name, - "规格": ( - specification - if specification - else None - ), - "单位": unit if unit else None, - "数量": ( - format( - Decimal(quantity).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if quantity - else None - ), - "金额": format( - ( - Decimal(amount) - + Decimal(tax) - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", # 价税合计,保留两位小数 - ), - } - for name, specification, unit, quantity, amount, tax in zip( - [ - component["value"] - for component in response[ - "data" - ] - if re.match( - r"^项目名称(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response[ - "data" - ] - if re.match( - r"^规格型号(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response[ - "data" - ] - if re.match( - r"^单位(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response[ - "data" - ] - if re.match( - r"^数量(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response[ - "data" - ] - if re.match( - r"^金额(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response[ - "data" - ] - if re.match( - r"^税额(\d+)?$", - component["desc"], - ) - ], - ) - ] - if invoice_type == "电子发票(普通发票)" - else [ - { - "名称": name, - "数量": format( - Decimal(quantity).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "0.2f", - ), - "金额": format( - Decimal(amount).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ), - } - for name, quantity, amount in zip( - [ - component["value"] - for component in response[ - "data" - ] - if re.match( - r"^项目名称明细(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response[ - "data" - ] - if re.match( - r"^项目数量明细(\d+)?$", - component["desc"], - ) - ], - [ - component["value"] - for component in response[ - "data" - ] - if re.match( - r"^项目金额明细(\d+)?$", - component["desc"], - ) - ], - ) - ] - ), - "查验状态": "无法查验", - } - except: - pass - # 请求深圳快瞳收费票据识别接口 - response = globals()["http_client"].post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/medical"), - headers={"X-RequestId-Header": image_guid}, - data={ - "token": globals()["authenticator"].get_token( - servicer="szkt" - ), - "imgBase64": f"data:image/{image_format};base64,{image_base64}", - }, - guid=hashlib.md5((url + image_guid).encode("utf-8")) - .hexdigest() - .upper(), - ) - # 若深圳快瞳收费票据识别响应非成功则返回NONE - if not ( - response.get("status") == 200 and response.get("code") == 0 - ): - return None - - extraction = { - "票据类型": ( - "门诊收费票据" - if response["data"]["insured"]["receipt_outpatient"] - else "住院收费票据" - ), - "票据号码": ( - receipt := ( - response["data"]["insured"]["receipt_outpatient"] - or response["data"]["insured"][ - "receipt_hospitalization" - ] - )["receipts"][0] - )["receipt_no"][ - "value" - ], # 默认提取门诊/住院收费票据的第一张票据 - "票据代码": receipt["global_detail"]["invoice_code"][ - "value" - ], - "开票日期": receipt["global_detail"]["invoice_date"][ - "value" - ], # 深圳快瞳收费票据识别中就开票日期格式为%Y-%m-%d - "校验码": fuzzy_match( - target="校验码", - components=receipt["global_detail"]["region_specific"], - specify_key="name", - return_key="word.value", - ), - "收款方": receipt["hospital_name"]["value"], - "付款方": receipt["name"]["value"], - "票据金额": format( - Decimal(receipt["total_amount"]["value"]).quantize( + "个人自费": Decimal( + response["data"]["personalExpense"] + if response["data"]["personalExpense"] + else Decimal("0.00") + ).quantize( Decimal("0.00"), rounding=ROUND_HALF_UP, ), - ".2f", - ), - "医保支付": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance( - (field := receipt.get("medicare_pay")), dict - ) - else None - ), - "其它支付": format( - ( - Decimal(value).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, + "个人自付": Decimal( + response["data"]["personalPay"] + if response["data"]["personalPay"] + else Decimal("0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "医保支付": ( + Decimal(response["data"]["medicarePay"]) + if response["data"]["medicarePay"] + else Decimal("0.00") + + Decimal( + response["data"]["otherPayment"] + if response["data"]["otherPayment"] + else Decimal("0.00") ) - if ( - value := fuzzy_match( - target="其它支付", - components=receipt.get( - "global_detail", {} - ).get("pay_list", []), - specify_key="name", - return_key="word.value", - ) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 包括医保统筹基金支付和其它支付(例如,退休补充支付) + } + ) + # 若查验结果为假票或无法查验则再请求深圳快瞳票据识别接口接整合至赔案档案 + else: + receipt["查验结果"] = ( + "假票" + if response.get("status") == 400 + and (response.get("code") == 10100 or response.get("code") == 10001) + else "无法查验" + ) # 假票:查无此票或查验成功五要素不一致 + + match image["影像件类型"]: + case "增值税发票": + # 请求深圳快瞳增值税发票识别接口 + response = http_client.post( + url=( + url := "https://ai.inspirvision.cn/s/api/ocr/vatInvoice" + ), + headers={ + "X-RequestId-Header": image["影像件唯一标识"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token( + servicer="szkt" + ), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["影像件格式"].lstrip(".")};base64,{image["影像件BASE64编码"]}", + }, + guid=md5((url + image["影像件唯一标识"]).encode("utf-8")) + .hexdigest() + .upper(), + ) + # TODO: 若响应非成功则流转至人工处理 + if not ( + response.get("status") == 200 and response.get("code") == 0 + ): + raise RuntimeError("请求深圳快瞳增值税发票识别接口发生异常") + + # noinspection PyTypeChecker + receipt.update( + { + "票据号": query_value(response["data"], "发票号码"), + "票据代码": query_value(response["data"], "发票代码"), + "开票日期": datetime.strptime( + query_value(response["data"], "开票日期"), + "%Y年%m月%d日", + ), + "校验码": query_value(response["data"], "校验码"), + "开票金额": Decimal( + query_value(response["data"], "小写金额").replace( + "¥", "" + ) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "姓名": query_value(response["data"], "购买方名称"), + "购药及就医机构": query_value( + response["data"], "销售方名称" + ), + "明细项": parse_items(response["data"]), + "备注": query_value(response["data"], "备注"), + } + ) + case "医疗门诊收费票据" | "医疗住院收费票据": + # 请求深圳快瞳医疗收费票据识别接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/medical"), + headers={ + "X-RequestId-Header": image["影像件唯一标识"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token( + servicer="szkt" + ), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["影像件格式"].lstrip(".")};base64,{image["影像件BASE64编码"]}", + }, + guid=md5((url + image["影像件唯一标识"]).encode("utf-8")) + .hexdigest() + .upper(), + ) + # TODO: 若响应非成功则流转至人工处理 + if not ( + response.get("status") == 200 and response.get("code") == 0 + ): + raise RuntimeError( + "请求深圳快瞳医疗收费票据识别接口发生异常" + ) + + # noinspection PyTypeChecker + receipt.update( + { + "票据号": ( + receipt := ( + response["data"]["insured"][ + ( + "receipt_hospitalization" + if image["影像件类型"] + == "医疗门诊收费票据" + else "receipt_outpatient" + ) + ] + )["receipts"][0] + )["receipt_no"][ + "value" + ], # 默认为第一张票据 + "票据代码": receipt["global_detail"]["invoice_code"][ + "value" + ], + "开票日期": datetime.strptime( + receipt["global_detail"]["invoice_date"]["value"], + "%Y-%m-%d", + ), + "校验码": query_value( + receipt["global_detail"]["region_specific"], + "校验码", + ), + "开票金额": Decimal( + receipt["total_amount"]["value"] + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "姓名": receipt["name"]["value"], + "购药及就医机构": receipt["hospital_name"]["value"], + "明细项": [ + { + "名称": field["value"], + "数量": Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=( + ROUND_HALF_UP + if isinstance( + (field := item["number"]), dict + ) + else None + ), + ), + "金额": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + (field := item["total_amount"]), dict + ) + else None + ), + } + for item in receipt["feeitems"] + ], + "医保支付": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + (field := receipt.get("medicare_pay")), dict ) else None ), - ".2f", - ), - "个人自付": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, + "其它支付": format( + ( + Decimal(value).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + if ( + value := fuzzy_match( + target="其它支付", + components=receipt.get( + "global_detail", {} + ).get("pay_list", []), + specify_key="name", + return_key="word.value", + ) + ) + else None ), ".2f", - ) - if isinstance((field := receipt.get("self_pay")), dict) - else None - ), - "自付一": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance( - field := (receipt.get("self_pay_one")), dict - ) - else None - ), - "自付二": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance( - field := (receipt.get("self_pay_two")), dict - ) - else None - ), - "个人自费": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance(field := (receipt.get("self_cost")), dict) - else None - ), - "住院日期": ( - datetime.strptime(field["value"], "%Y%m%d").strftime( - "%Y-%m-%d" - ) - if isinstance(field := (receipt.get("starttime")), dict) - else None - ), - "出院日期": ( - datetime.strptime(field["value"], "%Y%m%d").strftime( - "%Y-%m-%d" - ) - if isinstance(field := (receipt.get("endtime")), dict) - else None - ), - "医疗机构类型": receipt["others"][ - "medical_institution_type" - ]["value"], - "项目": [ - { - "名称": ( - field["value"] - if isinstance( - (field := item["item_name"]), dict - ) - else None - ), - "规格": ( - field["value"] - if isinstance( - (field := item["specifications"]), dict - ) - else None - ), - "单位": ( - field["value"] - if isinstance((field := item["unit"]), dict) - else None - ), - "数量": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance((field := item["number"]), dict) - else None - ), - "金额": ( - format( - Decimal(field["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - ".2f", - ) - if isinstance( - (field := item["total_amount"]), dict - ) - else None - ), - } - for item in receipt["feeitems"] - ], - "查验状态": "无法查验", - } - return extraction - except: - return None + ), + "个人自付": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + (field := receipt.get("self_pay")), dict + ) + else None + ), + "自付一": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + field := (receipt.get("self_pay_one")), dict + ) + else None + ), + "自付二": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + field := (receipt.get("self_pay_two")), dict + ) + else None + ), + "个人自费": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance( + field := (receipt.get("self_cost")), dict + ) + else None + ), + "住院日期": ( + datetime.strptime( + field["value"], "%Y%m%d" + ).strftime("%Y-%m-%d") + if isinstance( + field := (receipt.get("starttime")), dict + ) + else None + ), + "出院日期": ( + datetime.strptime( + field["value"], "%Y%m%d" + ).strftime("%Y-%m-%d") + if isinstance( + field := (receipt.get("endtime")), dict + ) + else None + ), + "医疗机构类型": receipt["others"][ + "medical_institution_type" + ]["value"], + } + ) # noinspection PyShadowingNames def bank_card_recognize(image) -> None: