From 2ba89f571ea03dec773979473f2fc45990e7c175 Mon Sep 17 00:00:00 2001 From: liubiren Date: Mon, 29 Dec 2025 20:41:38 +0800 Subject: [PATCH] =?UTF-8?q?=E6=97=A5=E5=B8=B8=E6=9B=B4=E6=96=B0=20from=20N?= =?UTF-8?q?UC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 票据理赔自动化/image.py | 2182 +++++++++++++++++++-------------------- 票据理赔自动化/main.py | 7 - 2 files changed, 1079 insertions(+), 1110 deletions(-) diff --git a/票据理赔自动化/image.py b/票据理赔自动化/image.py index 457078f..130b065 100644 --- a/票据理赔自动化/image.py +++ b/票据理赔自动化/image.py @@ -24,6 +24,204 @@ authenticator = Authenticator() http_client = HTTPClient(timeout=300, cache_enabled=True) # 使用缓存 +# noinspection PyShadowingNames +def image_read( + image_path: Path, +) -> Optional[numpy.ndarray | None]: + """ + 打开并读取影像件 + :param image_path: 影像件路径(path对象) + :return: 影像件数据(numpy.ndarray对象) + """ + # noinspection PyBroadException + try: + # 打开并读取影像件(默认转为单通道灰度图) + image_ndarray = cv2.imread(image_path.as_posix(), cv2.IMREAD_GRAYSCALE) + if image_ndarray is None: + raise + return image_ndarray + except Exception as exception: + raise RuntimeError(f"打开并读取影像件发生异常:{str(exception)}") + + +# noinspection PyShadowingNames +def image_serialize(image_format: str, image_ndarray: numpy.ndarray) -> str: + """ + 生成影像件唯一标识 + :param image_format: 影像件格式 + :param image_ndarray: 影像件数据 + :return: 影像件唯一标识 + """ + success, image_ndarray_encoded = cv2.imencode(image_format, image_ndarray) + if not success or image_ndarray_encoded is None: + raise RuntimeError("编码影像件发生异常") + + # 转为字节流并生成影像件唯一标识 + image_guid = md5(image_ndarray_encoded.tobytes()).hexdigest().upper() + return image_guid + + +# noinspection PyShadowingNames +def image_compress( + image_format: str, + image_ndarray: numpy.ndarray, + image_size_specified: float = 2.0, +) -> Optional[str]: + """ + 压缩影像件 + :param image_format: 影像件格式 + :param image_ndarray: 影像件数据 + :param image_size_specified: 指定压缩影像件大小,单位为兆字节(MB) + :return: 压缩后影像件BASE64编码 + """ + # 转为字节 + image_size_specified = image_size_specified * 1024 * 1024 + + # 通过调整影像件质量和尺寸达到压缩影像件目的(先调整影像件质量再调整影像件尺寸) + for quality in range(100, 50, -10): + image_ndarray_copy = image_ndarray.copy() + for _ in range(10): + success, image_ndarray_encoded = cv2.imencode( + image_format, + image_ndarray_copy, + params=( + [cv2.IMWRITE_PNG_COMPRESSION, 10 - quality // 10] + if image_format == "png" + else [cv2.IMWRITE_JPEG_QUALITY, quality] + ), + ) + if not success or image_ndarray_encoded is None: + break + + # 影像件BASE64编码 + image_base64 = b64encode(image_ndarray_encoded.tobytes()).decode("utf-8") + if len(image_base64) <= image_size_specified: + return image_base64 + + image_ndarray_copy = cv2.resize( + image_ndarray_copy, + ( + int(image_ndarray_copy.shape[0] * 0.95), + int(image_ndarray_copy.shape[1] * 0.95), + ), + interpolation=cv2.INTER_AREA, + ) + # 若调整影像件尺寸后宽/高小于350像素则终止循环 + if min(image_ndarray_copy.shape[:2]) < 350: + break + + return None + + +# noinspection PyShadowingNames +def calculate_age(report_time: datetime, birth_date: datetime) -> int: + """ + 根据报案时间计算周岁 + :param report_time: 报案时间 + :param birth_date: 出生日期 + :return 周岁 + """ + age = report_time.year - birth_date.year + + return ( + age - 1 + if (report_time.month, report_time.day) + < ( + birth_date.month, + birth_date.day, + ) + else age + ) # 若报案时间的月日小于生成日期的月日则前推一年 + + +# TODO: 后续添加居民身份证(国徽面)和居民身份证(头像面)合并 +# noinspection PyShadowingNames +def identity_card_recognize(image, insurer_company) -> None: + """ + 识别居民身份证并整合至赔案档案 + :param image: 影像件 + :param insurer_company: 保险分公司 + :return: 无 + """ + # 请求深圳快瞳居民身份证识别接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/identityCard"), + headers={ + "X-RequestId-Header": image["image_guid"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token(servicer="szkt"), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 + }, # 深圳快瞳支持同时识别居民国徽面和头像面 + guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(), + ) + # TODO: 若请求深圳快瞳居民身份证识别接口发生异常则流转至人工处理 + if not (response.get("status") == 200 and response.get("code") == 0): + raise + + if image["image_type"] in [ + "居民身份证(国徽、头像面)", + "居民身份证(头像面)", + ]: + # noinspection PyTypeChecker + dossier["insured_person_layer"].update( + { + "insured_person": ( + insured_person := response["data"]["name"] + ), # 被保险人 + "identity_type": (identity_type := "居民身份证"), # 证件类型 + "identity_number": ( + indentity_number := response["data"]["idNo"] + ), # 证件号码 + "gender": response["data"]["sex"], # 性别 + "birth_date": ( + birth_date := datetime.strptime( + response["data"]["birthday"], "%Y-%m-%d" + ) + ), # 出生日期,转为日期时间(datetime对象),格式默认为%Y-%m-%d + "age": calculate_age( + dossier["report_layer"]["report_time"], birth_date + ), # 年龄 + "province": ( + residential_address := parse_location(response["data"]["address"]) + ).get( + "province" + ), # 就住址解析为所在省、市、区和详细地址 + "city": residential_address.get("city"), + "district": residential_address.get("county"), + "detailed_address": residential_address.get("detail"), + } + ) + + # 根据保险分公司、被保险人、证件类型、证件号码和出险时间查询个单 + dossier["insured_persons_layer"] = master_data.query_liabilities( + insurer_company, + insured_person, + identity_type, + indentity_number, + dossier["report_layer"]["report_time"].strftime("%Y-%m-%d"), + ) + + if image["image_type"] in [ + "居民身份证(国徽、头像面)", + "居民身份证(国徽面)", + ]: + # noinspection PyTypeChecker + dossier["insured_person_layer"].update( + { + "commencement_date": datetime.strptime( + (period := response["data"]["validDate"].split("-"))[0], + "%Y.%m.%d", + ), # 就有效期限解析为有效起期和有效止期。其中,若有效止期为长期则默认为9999-12-31 + "termination_date": ( + datetime(9999, 12, 31) + if period[1] == "长期" + else datetime.strptime(period[1], "%Y.%m.%d") + ), + } + ) + + # noinspection PyShadowingNames def image_classify(image_index: int, image_path: Path) -> Optional[Tuple[str, str]]: """ @@ -32,95 +230,6 @@ def image_classify(image_index: int, image_path: Path) -> Optional[Tuple[str, st :param image_path: 影像件路径(path对象) :return: 无 """ - - # noinspection PyShadowingNames - def image_read( - image_path: Path, - ) -> Optional[numpy.ndarray | None]: - """ - 打开并读取影像件 - :param image_path: 影像件路径(path对象) - :return: 影像件数据(numpy.ndarray对象) - """ - # noinspection PyBroadException - try: - # 打开并读取影像件(默认转为单通道灰度图) - image_ndarray = cv2.imread(image_path.as_posix(), cv2.IMREAD_GRAYSCALE) - if image_ndarray is None: - raise - return image_ndarray - except Exception as exception: - raise RuntimeError(f"打开并读取影像件发生异常:{str(exception)}") - - # noinspection PyShadowingNames - def image_serialize(image_format: str, image_ndarray: numpy.ndarray) -> str: - """ - 生成影像件唯一标识 - :param image_format: 影像件格式 - :param image_ndarray: 影像件数据 - :return: 影像件唯一标识 - """ - success, image_ndarray_encoded = cv2.imencode(image_format, image_ndarray) - if not success or image_ndarray_encoded is None: - raise RuntimeError("编码影像件发生异常") - - # 转为字节流并生成影像件唯一标识 - image_guid = md5(image_ndarray_encoded.tobytes()).hexdigest().upper() - return image_guid - - # noinspection PyShadowingNames - def image_compress( - image_format: str, - image_ndarray: numpy.ndarray, - image_size_specified: float = 2.0, - ) -> Optional[str]: - """ - 压缩影像件 - :param image_format: 影像件格式 - :param image_ndarray: 影像件数据 - :param image_size_specified: 指定压缩影像件大小,单位为兆字节(MB) - :return: 压缩后影像件BASE64编码 - """ - # 转为字节 - image_size_specified = image_size_specified * 1024 * 1024 - - # 通过调整影像件质量和尺寸达到压缩影像件目的(先调整影像件质量再调整影像件尺寸) - for quality in range(100, 50, -10): - image_ndarray_copy = image_ndarray.copy() - for _ in range(10): - success, image_ndarray_encoded = cv2.imencode( - image_format, - image_ndarray_copy, - params=( - [cv2.IMWRITE_PNG_COMPRESSION, 10 - quality // 10] - if image_format == "png" - else [cv2.IMWRITE_JPEG_QUALITY, quality] - ), - ) - if not success or image_ndarray_encoded is None: - break - - # 影像件BASE64编码 - image_base64 = b64encode(image_ndarray_encoded.tobytes()).decode( - "utf-8" - ) - if len(image_base64) <= image_size_specified: - return image_base64 - - image_ndarray_copy = cv2.resize( - image_ndarray_copy, - ( - int(image_ndarray_copy.shape[0] * 0.95), - int(image_ndarray_copy.shape[1] * 0.95), - ), - interpolation=cv2.INTER_AREA, - ) - # 若调整影像件尺寸后宽/高小于350像素则终止循环 - if min(image_ndarray_copy.shape[:2]) < 350: - break - - return None - # 打开并读取影像件 image_ndarray = image_read(image_path) image_index = f"{image_index:02d}" @@ -221,6 +330,887 @@ def image_classify(image_index: int, image_path: Path) -> Optional[Tuple[str, st ) +# noinspection PyShadowingNames +def mlm_recognize(image, schema) -> Optional[Dict[str, Any]]: + """ + 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果 + :param image: 影像件 + :param schema: JSON格式 + :return: 结构化后识别结果 + """ + # 请求火山引擎多模态大模型接口并就消息内容JSON反序列化 + response = http_client.post( + url="https://ark.cn-beijing.volces.com/api/v3/chat/completions", + headers={ + "Authorization": "Bearer 2c28ab07-888c-45be-84a2-fc4b2cb5f3f2", + "Content-Type": "application/json; charset=utf-8", + }, + json=( + json_ := { + "model": "doubao-seed-1-6-251015", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}" + }, # 影像件BASE64编码嵌入数据统一资源标识符 + }, + { + "type": "text", + "text": "请就理赔申请书进行光学字符识别、结构化识别结果并返回符合Schema的JSON数据", + }, + ], + } + ], + "temperature": 0.2, # 采样温度,基于strict平衡稳定和容错 + "thinking": { + "type": "disabled", + }, # 不使用深度思考能力 + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "就理赔申请书进行光学字符识别并结构化识别结果", + "schema": schema, + "strict": True, # 启用严格遵循模式 + }, + }, + } + ), + guid=md5( + json.dumps( + json_, + sort_keys=True, + ensure_ascii=False, + ).encode("utf-8") + ) + .hexdigest() + .upper(), + ) + + # 就响应中消息内容JSON反序列化 + # noinspection PyBroadException + try: + # noinspection PyTypeChecker + return json.loads(response["choices"][0]["message"]["content"]) + except: + return None + + +# noinspection PyShadowingNames +def boc_application_recognize(image: str) -> None: + """ + 识别中银保险有限公司的理赔申请书并整合至赔案档案 + :param image: 影像件 + :return: 无 + """ + # JSON格式 + schema = { + "type": "object", + "description": "识别结果对象", + "properties": { + "申请人": { + "type": "string", + "description": "申请人,若无数据则为空字符串", + }, + "性别": { + "type": "string", + "description": "性别,若无数据则为空字符串", + }, + "年龄": { + "type": "string", + "description": "年龄,若无数据则为空字符串", + }, + "手机": { + "type": "string", + "description": "手机,若无数据则为空字符串", + }, + "所属分支行及部门": { + "type": "string", + "description": "所属分支行及部门,若无数据则为空字符串", + }, + "身份证号码": { + "type": "string", + "description": "身份证号码,若无数据则为空字符串", + }, + "就诊记录": { + "type": "array", + "description": "所有就诊记录数组", + "items": { + "type": "object", + "description": "每条就诊记录对象", + "properties": { + "就诊序号": { + "type": "string", + "description": "就诊序号,若无数据则为空字符串", + }, + "发票日期": { + "type": "string", + "description": "发票日期,若无数据则为空字符串,若有数据则格式为YYYY/MM/DD", + }, + "发票上的就诊医院/药店": { + "type": "string", + "description": "发票上的就诊医院/药店,若无数据则为空字符串", + }, + "票据张数": { + "type": "string", + "description": "票据张数,若无数据则为空字符串", + }, + "票据金额": { + "type": "string", + "description": "票据金额,若无数据则为空字符串,若有数据则保留两位小数", + }, + "诊断": { + "type": "string", + "description": "诊断,若无数据则为空字符串", + }, + }, + "required": [ + "发票日期", + "发票上的就诊医院/药店", + "诊断", + ], # 就诊记录必须字段 + "additionalProperties": False, # 禁止就就诊记录新增属性 + }, + }, + "票据金额合计": { + "type": "string", + "description": "票据金额合计,若无数据则为空字符串,若有数据则保留两位小数", + }, + "开户银行": { + "type": "string", + "description": "开户银行,若无数据则为空字符串,请注意开户银行可能为多行", + }, + "户名": { + "type": "string", + "description": "户名,若无数据则为空字符串", + }, + "账号": { + "type": "string", + "description": "账号,若无数据则为空字符串", + }, + }, + "required": [ + "申请人", + "手机", + "身份证号码", + "就诊记录", + "开户银行", + "户名", + "账号", + ], # JSON结构必须字段 + "additionalProperties": False, # 禁止就JSON结构新增属性 + } + + # 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果 + recognition = mlm_recognize(image, schema) + # TODO: 若识别中银保险有限公司的理赔申请书并整合至赔案档案发生异常则流转至人工处理 + if not recognition: + raise + dossier["insured_person_layer"].update( + { + "phone_number": recognition["手机"], + "account": recognition["户名"], + "account_bank": recognition["开户银行"], + "account_number": recognition["账号"], + } + ) + + +# noinspection PyShadowingNames +def application_recognize(image, insurer_company) -> None: + """ + 识别理赔申请书并整合至赔案档案 + :param image: 影像件 + :param insurer_company: 保险分公司 + :return: 无 + """ + + # 根据保险分公司匹配处理方法 + match insurer_company: + # 中银保险有限公司 + case _ if insurer_company.startswith("中银保险有限公司"): + boc_application_recognize(image) + + +# noinspection PyShadowingNames +def fuzzy_match(contents: list, key: str) -> Optional[str]: + """ + 根据内容列表(基于深圳快瞳增值税发票和医疗收费票据识别结果)模糊匹配键名 + :param contents: 内容列表 + :param key: 键名 + :return 值 + """ + # 若内容列表为空值则返回None + if not contents: + return None + + # noinspection PyInconsistentReturns + match contents[0].keys(): + # 对应深圳快瞳增值税发票识别结果 + case _ if "desc" in contents[0].keys(): + for content in contents: + if content["desc"] == key: + return content["value"] if content["value"] else None + + candidates = [] + for content in contents: + candidates.append( + ( + content["value"], + fuzz.WRatio( + content["desc"], key, force_ascii=False + ), # 基于加权莱文斯坦距离算法计算所有键名和指定键名的相似度 + ) + ) + + return ( + (result[0] if result[0] else None) + if (result := max(candidates, key=lambda x: x[1]))[1] >= 80 + else None + ) # 返回似度>=80且最大的值 + + # 对应深圳快瞳医疗收费票据识别结果 + case _ if "name" in contents[0].keys(): + for content in contents: + if content["name"] == key: + return ( + content["word"]["value"] if content["word"]["value"] else None + ) + + candidates = [] + for content in contents: + candidates.append( + ( + content["word"]["value"], + fuzz.WRatio( + content["name"], key, force_ascii=False + ), # 基于加权莱文斯坦距离算法计算所有键名和指定键名的相似度 + ) + ) + + return ( + (result[0] if result[0] else None) + if (result := max(candidates, key=lambda x: x[1]))[1] >= 80 + else None + ) # 返回>=80且最大的相似度的值 + + +def parse_item(item: str) -> Tuple[str, Optional[str]]: + """ + 根据明细项解析明细项类别和具体内容,并根据具体内容查询药品/医疗服务 + :param item: 明细项 + return 明细项类别和药品/医疗服务 + """ + if match := re.match( + r"^\*(?P.*?)\*(?P.*)$", + item, + ): + return match.group("category"), master_data.query_medicine( + match.group("specific") + ) + # 一般增值税发票明细项格式形如*{category}*{specific},其中category为明细项类别,例如中成药;specific为明细项具体内容,例如[同仁堂]金贵肾气水蜜丸 300丸/瓶,需要据此查询药品。其它格式则将明细项内容作为明细项类别,药品为空值 + else: + return item, None + + +# noinspection PyShadowingNames +def receipt_recognize(image, insurer_company) -> None: + """ + 识别票据并整合至赔案档案 + :param image: 影像件 + :param insurer_company: 保险分公司 + :return: 空 + """ + # 初始化票据数据 + receipt = {"image_index": image["image_index"]} + # 请求深圳快瞳票据查验接口(兼容增值税发票、医疗门诊/住院收费票据) + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll"), + headers={ + "X-RequestId-Header": image["image_guid"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token(servicer="szkt"), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 + }, + guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(), + ) + # 若查验状态为真票或红票则直接整合至赔案档案 + if response.get("status") == 200 and response.get("code") == 10000: + # noinspection PyTypeChecker + match response["data"]["productCode"]: + # 增值税发票,目前深圳快瞳支持全电发票和全电纸质发票、区块链发票和增值税发票查验 + case "003082": + # noinspection PyTypeChecker + receipt.update( + { + "verification": ( + "真票" + if response["data"]["details"]["invoiceTypeNo"] == "0" + else "红票" + ), # 红票为状态为失控、作废、已红冲、部分红冲和全额红冲的票据 + "number": response["data"]["details"]["number"], + "code": ( + response["data"]["details"]["code"] + if response["data"]["details"]["code"] + else None + ), + "date": datetime.strptime( + response["data"]["details"]["date"], "%Y年%m月%d日" + ), # 转为日期时间(datetime对象) + "verification_code": response["data"]["details"]["check_code"], + "amount": Decimal( + response["data"]["details"]["total"] + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 深圳快瞳票据查验接口中开票金额由字符串转为Decimal,保留两位小数 + "payer": response["data"]["details"]["buyer"], + "institution": response["data"]["details"]["seller"], + "items": [ + { + "item": item["name"], + "quantity": ( + Decimal(item["quantity"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + if item["quantity"] + else Decimal("0.00") + ), # 深圳快瞳票据查验接口中明细单位由空字符转为None,若非空字符由字符串转为Decimal,保留两位小数 + "amount": ( + Decimal(item["total"]) + Decimal(item["tax"]) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 深圳快瞳票据查验接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 + } + for item in response["data"]["details"]["items"] + ], + "remarks": ( + response["data"]["details"]["remark"] + if response["data"]["details"]["remark"] + else None + ), + } + ) + # 医疗门诊、住院收费票据 + case "003081": + # noinspection PyTypeChecker + receipt.update( + { + "verification": ( + "真票" + if response["data"]["flushedRed"] == "true" + else "红票" + ), + "number": response["data"]["billNumber"], + "code": response["data"]["billCode"], + "date": datetime.strptime( + response["data"]["invoiceDate"], "%Y-%m-%d %H:%M:%S" + ), # 转为日期时间(datetime对象) + "admission_date": ( + datetime.strptime( + response["data"]["hospitalizationDate"].split("-")[0], + "%Y%m%d", + ) + if response["data"]["hospitalizationDate"] + else None + ), # 深圳快瞳票据查验接口中住院日期解析为入院日期和出院日期 + "discharge_date": ( + datetime.strptime( + response["data"]["hospitalizationDate"].split("-")[1], + "%Y%m%d", + ) + if response["data"]["hospitalizationDate"] + else None + ), + "verification_code": response["data"]["checkCode"], + "amount": Decimal(response["data"]["amount"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "payer": response["data"]["payer"], + "institution": response["data"]["receivablesInstitution"], + "items": [ + { + "item": item["itemName"], + "quantity": Decimal(item["number"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "amount": Decimal(item["totalAmount"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + } + for item in response["data"]["feeitems"] + ], + "personal_self_payment": Decimal( + response["data"]["personalExpense"] + if response["data"]["personalExpense"] + else Decimal("0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "non_medical_payment": Decimal( + response["data"]["personalPay"] + if response["data"]["personalPay"] + else Decimal("0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "medical_payment": ( + Decimal(response["data"]["medicarePay"]) + if response["data"]["medicarePay"] + else Decimal("0.00") + + Decimal( + response["data"]["otherPayment"] + if response["data"]["otherPayment"] + else Decimal("0.00") + ) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 包括医保统筹基金支付和其它支付(例如,退休补充支付) + } + ) + # 若查验状态为假票或无法查验则再请求深圳快瞳票据识别接口接整合至赔案档案 + else: + receipt["verification"] = ( + "假票" + if response.get("status") == 400 + and (response.get("code") == 10100 or response.get("code") == 10001) + else "无法查验" + ) # 假票:查无此票或查验成功五要素不一致 + + match image["image_type"]: + case "增值税发票": + # 请求深圳快瞳增值税发票识别接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/vatInvoice"), + headers={ + "X-RequestId-Header": image["image_guid"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token( + servicer="szkt" + ), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 + }, + guid=md5((url + image["image_guid"]).encode("utf-8")) + .hexdigest() + .upper(), + ) + # TODO: 若请求深圳快瞳增值税发票识别接口发生异常则流转至人工处理 + if not (response.get("status") == 200 and response.get("code") == 0): + raise + + match fuzzy_match(response["data"], "发票类型"): + case "电子发票(普通发票)": + # noinspection PyTypeChecker + receipt.update( + { + "number": fuzzy_match(response["data"], "发票号码"), + "code": fuzzy_match(response["data"], "发票代码"), + "date": datetime.strptime( + fuzzy_match(response["data"], "开票日期"), + "%Y年%m月%d日", + ), + "verification_code": fuzzy_match( + response["data"], "校验码" + ), + "amount": Decimal( + fuzzy_match(response["data"], "小写金额").replace( + "¥", "" + ) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "payer": fuzzy_match(response["data"], "购买方名称"), + "institution": fuzzy_match( + response["data"], "销售方名称" + ), + "items": [ + { + "item": name, + "quantity": Decimal(quantity).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "amount": ( + Decimal(amount) + Decimal(tax) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 + } + for name, quantity, amount, tax in zip( + [ + x["value"] + for x in response["data"] + if re.match( + r"^项目名称(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in response["data"] + if re.match( + r"^数量(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in response["data"] + if re.match( + r"^金额(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in response["data"] + if re.match( + r"^税额(\d+)?$", + x["desc"], + ) + ], + ) + ], + "remarks": fuzzy_match(response["data"], "备注"), + } + ) + case "增值税普通发票(卷票)": + # noinspection PyTypeChecker + receipt.update( + { + "number": fuzzy_match(response["data"], "发票号码"), + "code": fuzzy_match(response["data"], "发票代码"), + "date": datetime.strptime( + fuzzy_match(response["data"], "开票日期"), + "%Y-%m-%d", + ), + "verification_code": fuzzy_match( + response["data"], "校验码" + ), + "amount": Decimal( + fuzzy_match( + response["data"], "合计金额(小写)" + ).replace("¥", "") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "payer": fuzzy_match(response["data"], "购买方名称"), + "institution": fuzzy_match( + response["data"], "销售方名称" + ), + "items": [ + { + "item": name, + "quantity": Decimal(quantity).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "amount": Decimal(amount).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 + } + for name, quantity, amount in zip( + [ + x["value"] + for x in response["data"] + if re.match( + r"^项目名称明细(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in response["data"] + if re.match( + r"^项目数量明细(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in response["data"] + if re.match( + r"^项目金额明细(\d+)?$", + x["desc"], + ) + ], + ) + ], + "remarks": fuzzy_match(response["data"], "备注"), + } + ) + case "医疗门诊收费票据" | "医疗住院收费票据": + # 请求深圳快瞳医疗收费票据识别接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/medical"), + headers={ + "X-RequestId-Header": image["image_guid"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token( + servicer="szkt" + ), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 + }, + guid=md5((url + image["image_guid"]).encode("utf-8")) + .hexdigest() + .upper(), + ) + # TODO: 若请求深圳快瞳医疗收费票据识别接口发生异常则流转至人工处理 + if not (response.get("status") == 200 and response.get("code") == 0): + raise + + # noinspection PyTypeChecker + receipt.update( + { + "number": ( + receipt := ( + response["data"]["insured"][ + ( + "receipt_hospitalization" + if image["image_type"] == "医疗门诊收费票据" + else "receipt_outpatient" + ) + ] + )["receipts"][0] + )["receipt_no"][ + "value" + ], # 默认为第一张票据 + "code": receipt["global_detail"]["invoice_code"]["value"], + "date": datetime.strptime( + receipt["global_detail"]["invoice_date"]["value"], + "%Y-%m-%d", + ), + "admission_date": ( + datetime.strptime(receipt["starttime"]["value"], "%Y-%m-%d") + if isinstance(receipt["starttime"], dict) + else None + ), + "discharge_date": ( + datetime.strptime(receipt["endtime"]["value"], "%Y-%m-%d") + if isinstance(receipt["endtime"], dict) + else None + ), + "verification_code": fuzzy_match( + receipt["global_detail"]["region_specific"], + "校验码", + ), + "amount": Decimal(receipt["total_amount"]["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "payer": receipt["name"]["value"], + "institution": receipt["hospital_name"]["value"], + "items": [ + { + "item": ( + item["item"]["value"] + if isinstance(item["item"], dict) + else None + ), + "quantity": Decimal( + item["number"]["value"] + if isinstance(item["number"], dict) + else Decimal("1.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "amount": Decimal( + item["total_amount"]["value"] + if isinstance(item["total_amount"], dict) + else Decimal("1.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + } + for item in receipt["feeitems"] + if isinstance(item, dict) + ], + "personal_self_payment": ( + Decimal(receipt["self_cost"]["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + ), + "non_medical_payment": ( + Decimal(receipt["self_pay"]["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + ), + "medical_payment": ( + Decimal( + receipt["medicare_pay"]["value"] + ) # 医保基金统筹支付 + + ( + Decimal(receipt["addition_pay"]["value"]) + if isinstance(receipt["addition_pay"], dict) + else Decimal("0.00") + ) # 附加支付 + + ( + Decimal(receipt["third_pay"]["value"]) + if isinstance(receipt["third_pay"], dict) + else Decimal("0.00") + ) # 第三方支付 + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + } + ) + + # 根据购药及就医机构查询购药及就医机构类型 + receipt["institution_type"] = master_data.query_institution_type( + receipt["institution"] + ) + + # 根据影像件类型和购药及就医机构类型匹配处理方法 + match (image["image_type"], receipt["institution_type"]): + case ("增值税发票", "药店"): + items = ( + pandas.DataFrame(receipt["items"]) + .groupby("item") # 就相同明细项合并数量和金额 + .agg(quantity=("quantity", "sum"), amount=("amount", "sum")) + .loc[ + lambda dataframe: dataframe["amount"] != 0 + ] # 仅保留金额非0的明细项 + .reset_index() + .pipe( + lambda dataframe: dataframe.join( + dataframe["item"] + .apply( + parse_item + ) # 根据明细项解析明细项类别和具体内容,并根据具体内容查询药品/医疗服务 + .apply( + pandas.Series + ) # 就明细项类别和药品/医疗服务元组展开为两列 + .rename(columns={0: "category", 1: "medicine"}) + ) + ) + .assign( + reasonable_amount=lambda dataframe: dataframe.apply( + lambda row: Decimal( + rule_engine.evaluate( + decision="扣除明细项不合理费用", + inputs={ + "insurer_company": insurer_company, + "category": row["category"], + "medicine": row["medicine"], + "amount": row["amount"], + }, + )["reasonable_amount"] + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + axis="columns", + ) + ) # 扣除明细项不合理费用 + ) + + receipt.update( + { + "payer": ( + dossier["insured_person_layer"]["insured_person"] + if dossier["insured_person_layer"]["insured_person"] + in receipt["payer"] + else None + ), # 出险人 + "accident": "药店购药", # 出险事故 + "diagnosis": "购药拟诊", # 医疗诊断 + "personal_self_payment": Decimal("0.00"), # 个人自费金额 + "non_medical_payment": Decimal("0.00"), # 个人自付金额 + "medical_payment": Decimal("0.00"), # 医保支付金额 + "unreasonable_amount": Decimal( + receipt["amount"] - items["reasonable_amount"].sum() + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 不合理金额 + "reasonable_amount": Decimal( + items["reasonable_amount"].sum() + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 合理金额 + "items": items.to_dict("records"), + } + ) + # TODO: 后续完善就购药及就医类型为门诊就诊(私立医院)处理 + case ("增值税发票", "私立医院"): + receipt["购药及就医类型"] = "门诊就医" + # TODO: 后续完善就购药及就医类型为门诊就诊(公立医院)处理 + case ("医疗门诊收费票据", "公立医院"): + receipt["购药及就医类型"] = "门诊就医" + # TODO: 后续完善就购药及就医类型为住院治疗处理 + case ("医疗住院收费票据", "公立医院"): + receipt["购药及就医类型"] = "住院治疗" + # TODO: 若根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常则流转至人工处理 + case _: + raise RuntimeError( + "根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常" + ) + + dossier["receipts_layer"].append(receipt) + + +# noinspection PyShadowingNames +def bank_card_recognize(image) -> None: + """ + 识别银行卡并整合至赔案档案 + :param image: 影像件 + :return: 空 + """ + # 请求深圳快瞳银行卡识别接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/bankCard"), + headers={ + "X-RequestId-Header": image["image_guid"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token(servicer="szkt"), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 + }, + guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(), + ) + # TODO: 若响应非成功则流转至人工处理 + if not ( + response.get("status") == 200 + and response.get("code") == 0 + and response.get("data", {}).get("bankCardType") + == 1 # # 实际作业亦仅支持借记卡 + ): + raise RuntimeError("请求深圳快瞳银行卡识别接口发生异常或非借记卡") + # noinspection PyTypeChecker + dossier["insured_person_layer"].update( + { + "phone_number": None, + "account": None, + "account_bank": response["data"]["bankInfo"], + "account_number": response["data"]["cardNo"].replace(" ", ""), + } + ) + + # noinspection PyShadowingNames def image_recognize( image, @@ -232,1020 +1222,6 @@ def image_recognize( :param insurer_company: 保险分公司 :return: 无 """ - - # TODO: 后续添加居民身份证(国徽面)和居民身份证(头像面)合并 - # noinspection PyShadowingNames - def identity_card_recognize(image, insurer_company) -> None: - """ - 识别居民身份证并整合至赔案档案 - :param image: 影像件 - :param insurer_company: 保险分公司 - :return: 无 - """ - - # noinspection PyShadowingNames - def calculate_age(report_time: datetime, birth_date: datetime) -> int: - """ - 根据报案时间计算周岁 - :param report_time: 报案时间 - :param birth_date: 出生日期 - :return 周岁 - """ - age = report_time.year - birth_date.year - - return ( - age - 1 - if (report_time.month, report_time.day) - < ( - birth_date.month, - birth_date.day, - ) - else age - ) # 若报案时间的月日小于生成日期的月日则前推一年 - - # 请求深圳快瞳居民身份证识别接口 - response = http_client.post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/identityCard"), - headers={ - "X-RequestId-Header": image["image_guid"] - }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 - data={ - "token": authenticator.get_token( - servicer="szkt" - ), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 - }, # 深圳快瞳支持同时识别居民国徽面和头像面 - guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(), - ) - # TODO: 若请求深圳快瞳居民身份证识别接口发生异常则流转至人工处理 - if not (response.get("status") == 200 and response.get("code") == 0): - raise - - if image["image_type"] in [ - "居民身份证(国徽、头像面)", - "居民身份证(头像面)", - ]: - # noinspection PyTypeChecker - dossier["insured_person_layer"].update( - { - "insured_person": ( - insured_person := response["data"]["name"] - ), # 被保险人 - "identity_type": (identity_type := "居民身份证"), # 证件类型 - "identity_number": ( - indentity_number := response["data"]["idNo"] - ), # 证件号码 - "gender": response["data"]["sex"], # 性别 - "birth_date": ( - birth_date := datetime.strptime( - response["data"]["birthday"], "%Y-%m-%d" - ) - ), # 出生日期,转为日期时间(datetime对象),格式默认为%Y-%m-%d - "age": calculate_age( - dossier["report_layer"]["report_time"], birth_date - ), # 年龄 - "province": ( - residential_address := parse_location( - response["data"]["address"] - ) - ).get( - "province" - ), # 就住址解析为所在省、市、区和详细地址 - "city": residential_address.get("city"), - "district": residential_address.get("county"), - "detailed_address": residential_address.get("detail"), - } - ) - - # 根据保险分公司、被保险人、证件类型、证件号码和出险时间查询个单 - dossier["insured_persons_layer"] = master_data.query_liabilities( - insurer_company, - insured_person, - identity_type, - indentity_number, - dossier["report_layer"]["report_time"].strftime("%Y-%m-%d"), - ) - - if image["image_type"] in [ - "居民身份证(国徽、头像面)", - "居民身份证(国徽面)", - ]: - # noinspection PyTypeChecker - dossier["insured_person_layer"].update( - { - "commencement_date": datetime.strptime( - (period := response["data"]["validDate"].split("-"))[0], - "%Y.%m.%d", - ), # 就有效期限解析为有效起期和有效止期。其中,若有效止期为长期则默认为9999-12-31 - "termination_date": ( - datetime(9999, 12, 31) - if period[1] == "长期" - else datetime.strptime(period[1], "%Y.%m.%d") - ), - } - ) - - # noinspection PyShadowingNames - def application_recognize(image, insurer_company) -> None: - """ - 识别理赔申请书并整合至赔案档案 - :param image: 影像件 - :param insurer_company: 保险分公司 - :return: 无 - """ - - # noinspection PyShadowingNames - def mlm_recognize(image, schema) -> Optional[Dict[str, Any]]: - """ - 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果 - :param image: 影像件 - :param schema: JSON格式 - :return: 结构化后识别结果 - """ - # 请求火山引擎多模态大模型接口并就消息内容JSON反序列化 - response = http_client.post( - url="https://ark.cn-beijing.volces.com/api/v3/chat/completions", - headers={ - "Authorization": "Bearer 2c28ab07-888c-45be-84a2-fc4b2cb5f3f2", - "Content-Type": "application/json; charset=utf-8", - }, - json=( - json_ := { - "model": "doubao-seed-1-6-251015", - "messages": [ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}" - }, # 影像件BASE64编码嵌入数据统一资源标识符 - }, - { - "type": "text", - "text": "请就理赔申请书进行光学字符识别、结构化识别结果并返回符合Schema的JSON数据", - }, - ], - } - ], - "temperature": 0.2, # 采样温度,基于strict平衡稳定和容错 - "thinking": { - "type": "disabled", - }, # 不使用深度思考能力 - "response_format": { - "type": "json_schema", - "json_schema": { - "name": "就理赔申请书进行光学字符识别并结构化识别结果", - "schema": schema, - "strict": True, # 启用严格遵循模式 - }, - }, - } - ), - guid=md5( - json.dumps( - json_, - sort_keys=True, - ensure_ascii=False, - ).encode("utf-8") - ) - .hexdigest() - .upper(), - ) - - # 就响应中消息内容JSON反序列化 - # noinspection PyBroadException - try: - # noinspection PyTypeChecker - return json.loads(response["choices"][0]["message"]["content"]) - except: - return None - - # noinspection PyShadowingNames - def boc_application_recognize(image: str) -> None: - """ - 识别中银保险有限公司的理赔申请书并整合至赔案档案 - :param image: 影像件 - :return: 无 - """ - # JSON格式 - schema = { - "type": "object", - "description": "识别结果对象", - "properties": { - "申请人": { - "type": "string", - "description": "申请人,若无数据则为空字符串", - }, - "性别": { - "type": "string", - "description": "性别,若无数据则为空字符串", - }, - "年龄": { - "type": "string", - "description": "年龄,若无数据则为空字符串", - }, - "手机": { - "type": "string", - "description": "手机,若无数据则为空字符串", - }, - "所属分支行及部门": { - "type": "string", - "description": "所属分支行及部门,若无数据则为空字符串", - }, - "身份证号码": { - "type": "string", - "description": "身份证号码,若无数据则为空字符串", - }, - "就诊记录": { - "type": "array", - "description": "所有就诊记录数组", - "items": { - "type": "object", - "description": "每条就诊记录对象", - "properties": { - "就诊序号": { - "type": "string", - "description": "就诊序号,若无数据则为空字符串", - }, - "发票日期": { - "type": "string", - "description": "发票日期,若无数据则为空字符串,若有数据则格式为YYYY/MM/DD", - }, - "发票上的就诊医院/药店": { - "type": "string", - "description": "发票上的就诊医院/药店,若无数据则为空字符串", - }, - "票据张数": { - "type": "string", - "description": "票据张数,若无数据则为空字符串", - }, - "票据金额": { - "type": "string", - "description": "票据金额,若无数据则为空字符串,若有数据则保留两位小数", - }, - "诊断": { - "type": "string", - "description": "诊断,若无数据则为空字符串", - }, - }, - "required": [ - "发票日期", - "发票上的就诊医院/药店", - "诊断", - ], # 就诊记录必须字段 - "additionalProperties": False, # 禁止就就诊记录新增属性 - }, - }, - "票据金额合计": { - "type": "string", - "description": "票据金额合计,若无数据则为空字符串,若有数据则保留两位小数", - }, - "开户银行": { - "type": "string", - "description": "开户银行,若无数据则为空字符串,请注意开户银行可能为多行", - }, - "户名": { - "type": "string", - "description": "户名,若无数据则为空字符串", - }, - "账号": { - "type": "string", - "description": "账号,若无数据则为空字符串", - }, - }, - "required": [ - "申请人", - "手机", - "身份证号码", - "就诊记录", - "开户银行", - "户名", - "账号", - ], # JSON结构必须字段 - "additionalProperties": False, # 禁止就JSON结构新增属性 - } - - # 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果 - recognition = mlm_recognize(image, schema) - # TODO: 若识别中银保险有限公司的理赔申请书并整合至赔案档案发生异常则流转至人工处理 - if not recognition: - raise - dossier["insured_person_layer"].update( - { - "phone_number": recognition["手机"], - "account": recognition["户名"], - "account_bank": recognition["开户银行"], - "account_number": recognition["账号"], - } - ) - - # 根据保险分公司匹配处理方法 - match insurer_company: - # 中银保险有限公司 - case _ if insurer_company.startswith("中银保险有限公司"): - boc_application_recognize(image) - - # noinspection PyShadowingNames - def receipt_recognize(image, insurer_company) -> None: - """ - 识别票据并整合至赔案档案 - :param image: 影像件 - :param insurer_company: 保险分公司 - :return: 空 - """ - - # noinspection PyShadowingNames - def fuzzy_match(contents: list, key: str) -> Optional[str]: - """ - 根据内容列表(基于深圳快瞳增值税发票和医疗收费票据识别结果)模糊匹配键名 - :param contents: 内容列表 - :param key: 键名 - :return 值 - """ - # 若内容列表为空值则返回None - if not contents: - return None - - # noinspection PyInconsistentReturns - match contents[0].keys(): - # 对应深圳快瞳增值税发票识别结果 - case _ if "desc" in contents[0].keys(): - for content in contents: - if content["desc"] == key: - return content["value"] if content["value"] else None - - candidates = [] - for content in contents: - candidates.append( - ( - content["value"], - fuzz.WRatio( - content["desc"], key, force_ascii=False - ), # 基于加权莱文斯坦距离算法计算所有键名和指定键名的相似度 - ) - ) - - return ( - (result[0] if result[0] else None) - if (result := max(candidates, key=lambda x: x[1]))[1] >= 80 - else None - ) # 返回似度>=80且最大的值 - - # 对应深圳快瞳医疗收费票据识别结果 - case _ if "name" in contents[0].keys(): - for content in contents: - if content["name"] == key: - return ( - content["word"]["value"] - if content["word"]["value"] - else None - ) - - candidates = [] - for content in contents: - candidates.append( - ( - content["word"]["value"], - fuzz.WRatio( - content["name"], key, force_ascii=False - ), # 基于加权莱文斯坦距离算法计算所有键名和指定键名的相似度 - ) - ) - - return ( - (result[0] if result[0] else None) - if (result := max(candidates, key=lambda x: x[1]))[1] >= 80 - else None - ) # 返回>=80且最大的相似度的值 - - def parse_item(item: str) -> Tuple[str, Optional[str]]: - """ - 根据明细项解析明细项类别和具体内容,并根据具体内容查询药品/医疗服务 - :param item: 明细项 - return 明细项类别和药品/医疗服务 - """ - if match := re.match( - r"^\*(?P.*?)\*(?P.*)$", - item, - ): - return match.group("category"), master_data.query_medicine( - match.group("specific") - ) - # 一般增值税发票明细项格式形如*{category}*{specific},其中category为明细项类别,例如中成药;specific为明细项具体内容,例如[同仁堂]金贵肾气水蜜丸 300丸/瓶,需要据此查询药品。其它格式则将明细项内容作为明细项类别,药品为空值 - else: - return item, None - - # 初始化票据数据 - receipt = {"image_index": image["image_index"]} - # 请求深圳快瞳票据查验接口(兼容增值税发票、医疗门诊/住院收费票据) - response = http_client.post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll"), - headers={ - "X-RequestId-Header": image["image_guid"] - }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 - data={ - "token": authenticator.get_token( - servicer="szkt" - ), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 - }, - guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(), - ) - # 若查验状态为真票或红票则直接整合至赔案档案 - if response.get("status") == 200 and response.get("code") == 10000: - # noinspection PyTypeChecker - match response["data"]["productCode"]: - # 增值税发票,目前深圳快瞳支持全电发票和全电纸质发票、区块链发票和增值税发票查验 - case "003082": - # noinspection PyTypeChecker - receipt.update( - { - "verification": ( - "真票" - if response["data"]["details"]["invoiceTypeNo"] == "0" - else "红票" - ), # 红票为状态为失控、作废、已红冲、部分红冲和全额红冲的票据 - "number": response["data"]["details"]["number"], - "code": ( - response["data"]["details"]["code"] - if response["data"]["details"]["code"] - else None - ), - "date": datetime.strptime( - response["data"]["details"]["date"], "%Y年%m月%d日" - ), # 转为日期时间(datetime对象) - "verification_code": response["data"]["details"][ - "check_code" - ], - "amount": Decimal( - response["data"]["details"]["total"] - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 深圳快瞳票据查验接口中开票金额由字符串转为Decimal,保留两位小数 - "payer": response["data"]["details"]["buyer"], - "institution": response["data"]["details"]["seller"], - "items": [ - { - "item": item["name"], - "quantity": ( - Decimal(item["quantity"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ) - if item["quantity"] - else Decimal("0.00") - ), # 深圳快瞳票据查验接口中明细单位由空字符转为None,若非空字符由字符串转为Decimal,保留两位小数 - "amount": ( - Decimal(item["total"]) + Decimal(item["tax"]) - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 深圳快瞳票据查验接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 - } - for item in response["data"]["details"]["items"] - ], - "remarks": ( - response["data"]["details"]["remark"] - if response["data"]["details"]["remark"] - else None - ), - } - ) - # 医疗门诊、住院收费票据 - case "003081": - # noinspection PyTypeChecker - receipt.update( - { - "verification": ( - "真票" - if response["data"]["flushedRed"] == "true" - else "红票" - ), - "number": response["data"]["billNumber"], - "code": response["data"]["billCode"], - "date": datetime.strptime( - response["data"]["invoiceDate"], "%Y-%m-%d %H:%M:%S" - ), # 转为日期时间(datetime对象) - "admission_date": ( - datetime.strptime( - response["data"]["hospitalizationDate"].split("-")[ - 0 - ], - "%Y%m%d", - ) - if response["data"]["hospitalizationDate"] - else None - ), # 深圳快瞳票据查验接口中住院日期解析为入院日期和出院日期 - "discharge_date": ( - datetime.strptime( - response["data"]["hospitalizationDate"].split("-")[ - 1 - ], - "%Y%m%d", - ) - if response["data"]["hospitalizationDate"] - else None - ), - "verification_code": response["data"]["checkCode"], - "amount": Decimal(response["data"]["amount"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "payer": response["data"]["payer"], - "institution": response["data"]["receivablesInstitution"], - "items": [ - { - "item": item["itemName"], - "quantity": Decimal(item["number"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "amount": Decimal(item["totalAmount"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - } - for item in response["data"]["feeitems"] - ], - "personal_self_payment": Decimal( - response["data"]["personalExpense"] - if response["data"]["personalExpense"] - else Decimal("0.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "non_medical_payment": Decimal( - response["data"]["personalPay"] - if response["data"]["personalPay"] - else Decimal("0.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "medical_payment": ( - Decimal(response["data"]["medicarePay"]) - if response["data"]["medicarePay"] - else Decimal("0.00") - + Decimal( - response["data"]["otherPayment"] - if response["data"]["otherPayment"] - else Decimal("0.00") - ) - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 包括医保统筹基金支付和其它支付(例如,退休补充支付) - } - ) - # 若查验状态为假票或无法查验则再请求深圳快瞳票据识别接口接整合至赔案档案 - else: - receipt["verification"] = ( - "假票" - if response.get("status") == 400 - and (response.get("code") == 10100 or response.get("code") == 10001) - else "无法查验" - ) # 假票:查无此票或查验成功五要素不一致 - - match image["image_type"]: - case "增值税发票": - # 请求深圳快瞳增值税发票识别接口 - response = http_client.post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/vatInvoice"), - headers={ - "X-RequestId-Header": image["image_guid"] - }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 - data={ - "token": authenticator.get_token( - servicer="szkt" - ), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 - }, - guid=md5((url + image["image_guid"]).encode("utf-8")) - .hexdigest() - .upper(), - ) - # TODO: 若请求深圳快瞳增值税发票识别接口发生异常则流转至人工处理 - if not ( - response.get("status") == 200 and response.get("code") == 0 - ): - raise - - match fuzzy_match(response["data"], "发票类型"): - case "电子发票(普通发票)": - # noinspection PyTypeChecker - receipt.update( - { - "number": fuzzy_match(response["data"], "发票号码"), - "code": fuzzy_match(response["data"], "发票代码"), - "date": datetime.strptime( - fuzzy_match(response["data"], "开票日期"), - "%Y年%m月%d日", - ), - "verification_code": fuzzy_match( - response["data"], "校验码" - ), - "amount": Decimal( - fuzzy_match( - response["data"], "小写金额" - ).replace("¥", "") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "payer": fuzzy_match( - response["data"], "购买方名称" - ), - "institution": fuzzy_match( - response["data"], "销售方名称" - ), - "items": [ - { - "item": name, - "quantity": Decimal(quantity).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "amount": ( - Decimal(amount) + Decimal(tax) - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 - } - for name, quantity, amount, tax in zip( - [ - x["value"] - for x in response["data"] - if re.match( - r"^项目名称(\d+)?$", - x["desc"], - ) - ], - [ - x["value"] - for x in response["data"] - if re.match( - r"^数量(\d+)?$", - x["desc"], - ) - ], - [ - x["value"] - for x in response["data"] - if re.match( - r"^金额(\d+)?$", - x["desc"], - ) - ], - [ - x["value"] - for x in response["data"] - if re.match( - r"^税额(\d+)?$", - x["desc"], - ) - ], - ) - ], - "remarks": fuzzy_match(response["data"], "备注"), - } - ) - case "增值税普通发票(卷票)": - # noinspection PyTypeChecker - receipt.update( - { - "number": fuzzy_match(response["data"], "发票号码"), - "code": fuzzy_match(response["data"], "发票代码"), - "date": datetime.strptime( - fuzzy_match(response["data"], "开票日期"), - "%Y-%m-%d", - ), - "verification_code": fuzzy_match( - response["data"], "校验码" - ), - "amount": Decimal( - fuzzy_match( - response["data"], "合计金额(小写)" - ).replace("¥", "") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "payer": fuzzy_match( - response["data"], "购买方名称" - ), - "institution": fuzzy_match( - response["data"], "销售方名称" - ), - "items": [ - { - "item": name, - "quantity": Decimal(quantity).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "amount": Decimal(amount).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 - } - for name, quantity, amount in zip( - [ - x["value"] - for x in response["data"] - if re.match( - r"^项目名称明细(\d+)?$", - x["desc"], - ) - ], - [ - x["value"] - for x in response["data"] - if re.match( - r"^项目数量明细(\d+)?$", - x["desc"], - ) - ], - [ - x["value"] - for x in response["data"] - if re.match( - r"^项目金额明细(\d+)?$", - x["desc"], - ) - ], - ) - ], - "remarks": fuzzy_match(response["data"], "备注"), - } - ) - case "医疗门诊收费票据" | "医疗住院收费票据": - # 请求深圳快瞳医疗收费票据识别接口 - response = http_client.post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/medical"), - headers={ - "X-RequestId-Header": image["image_guid"] - }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 - data={ - "token": authenticator.get_token( - servicer="szkt" - ), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 - }, - guid=md5((url + image["image_guid"]).encode("utf-8")) - .hexdigest() - .upper(), - ) - # TODO: 若请求深圳快瞳医疗收费票据识别接口发生异常则流转至人工处理 - if not ( - response.get("status") == 200 and response.get("code") == 0 - ): - raise - - # noinspection PyTypeChecker - receipt.update( - { - "number": ( - receipt := ( - response["data"]["insured"][ - ( - "receipt_hospitalization" - if image["image_type"] == "医疗门诊收费票据" - else "receipt_outpatient" - ) - ] - )["receipts"][0] - )["receipt_no"][ - "value" - ], # 默认为第一张票据 - "code": receipt["global_detail"]["invoice_code"]["value"], - "date": datetime.strptime( - receipt["global_detail"]["invoice_date"]["value"], - "%Y-%m-%d", - ), - "admission_date": ( - datetime.strptime( - receipt["starttime"]["value"], "%Y-%m-%d" - ) - if isinstance(receipt["starttime"], dict) - else None - ), - "discharge_date": ( - datetime.strptime( - receipt["endtime"]["value"], "%Y-%m-%d" - ) - if isinstance(receipt["endtime"], dict) - else None - ), - "verification_code": fuzzy_match( - receipt["global_detail"]["region_specific"], - "校验码", - ), - "amount": Decimal( - receipt["total_amount"]["value"] - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "payer": receipt["name"]["value"], - "institution": receipt["hospital_name"]["value"], - "items": [ - { - "item": ( - item["item"]["value"] - if isinstance(item["item"], dict) - else None - ), - "quantity": Decimal( - item["number"]["value"] - if isinstance(item["number"], dict) - else Decimal("1.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "amount": Decimal( - item["total_amount"]["value"] - if isinstance(item["total_amount"], dict) - else Decimal("1.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - } - for item in receipt["feeitems"] - if isinstance(item, dict) - ], - "personal_self_payment": ( - Decimal(receipt["self_cost"]["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ) - ), - "non_medical_payment": ( - Decimal(receipt["self_pay"]["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ) - ), - "medical_payment": ( - Decimal( - receipt["medicare_pay"]["value"] - ) # 医保基金统筹支付 - + ( - Decimal(receipt["addition_pay"]["value"]) - if isinstance(receipt["addition_pay"], dict) - else Decimal("0.00") - ) # 附加支付 - + ( - Decimal(receipt["third_pay"]["value"]) - if isinstance(receipt["third_pay"], dict) - else Decimal("0.00") - ) # 第三方支付 - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - } - ) - - # 根据购药及就医机构查询购药及就医机构类型 - receipt["institution_type"] = master_data.query_institution_type( - receipt["institution"] - ) - - # 根据影像件类型和购药及就医机构类型匹配处理方法 - match (image["image_type"], receipt["institution_type"]): - case ("增值税发票", "药店"): - items = ( - pandas.DataFrame(receipt["items"]) - .groupby("item") # 就相同明细项合并数量和金额 - .agg(quantity=("quantity", "sum"), amount=("amount", "sum")) - .loc[ - lambda dataframe: dataframe["amount"] != 0 - ] # 仅保留金额非0的明细项 - .reset_index() - .pipe( - lambda dataframe: dataframe.join( - dataframe["item"] - .apply( - parse_item - ) # 根据明细项解析明细项类别和具体内容,并根据具体内容查询药品/医疗服务 - .apply( - pandas.Series - ) # 就明细项类别和药品/医疗服务元组展开为两列 - .rename(columns={0: "category", 1: "medicine"}) - ) - ) - .assign( - reasonable_amount=lambda dataframe: dataframe.apply( - lambda row: Decimal( - rule_engine.evaluate( - decision="扣除明细项不合理费用", - inputs={ - "insurer_company": insurer_company, - "category": row["category"], - "medicine": row["medicine"], - "amount": row["amount"], - }, - )["reasonable_amount"] - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - axis="columns", - ) - ) # 扣除明细项不合理费用 - ) - - receipt.update( - { - "payer": ( - dossier["insured_person_layer"]["insured_person"] - if dossier["insured_person_layer"]["insured_person"] - in receipt["payer"] - else None - ), # 出险人 - "accident": "药店购药", # 出险事故 - "diagnosis": "购药拟诊", # 医疗诊断 - "personal_self_payment": Decimal("0.00"), # 个人自费金额 - "non_medical_payment": Decimal("0.00"), # 个人自付金额 - "medical_payment": Decimal("0.00"), # 医保支付金额 - "unreasonable_amount": Decimal( - receipt["amount"] - items["reasonable_amount"].sum() - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 不合理金额 - "reasonable_amount": Decimal( - items["reasonable_amount"].sum() - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 合理金额 - "items": items.to_dict("records"), - } - ) - # TODO: 后续完善就购药及就医类型为门诊就诊(私立医院)处理 - case ("增值税发票", "私立医院"): - receipt["购药及就医类型"] = "门诊就医" - # TODO: 后续完善就购药及就医类型为门诊就诊(公立医院)处理 - case ("医疗门诊收费票据", "公立医院"): - receipt["购药及就医类型"] = "门诊就医" - # TODO: 后续完善就购药及就医类型为住院治疗处理 - case ("医疗住院收费票据", "公立医院"): - receipt["购药及就医类型"] = "住院治疗" - # TODO: 若根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常则流转至人工处理 - case _: - raise RuntimeError( - "根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常" - ) - - dossier["receipts_layer"].append(receipt) - - # noinspection PyShadowingNames - def bank_card_recognize(image) -> None: - """ - 识别银行卡并整合至赔案档案 - :param image: 影像件 - :return: 空 - """ - # 请求深圳快瞳银行卡识别接口 - response = http_client.post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/bankCard"), - headers={ - "X-RequestId-Header": image["image_guid"] - }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 - data={ - "token": authenticator.get_token( - servicer="szkt" - ), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 - }, - guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(), - ) - # TODO: 若响应非成功则流转至人工处理 - if not ( - response.get("status") == 200 - and response.get("code") == 0 - and response.get("data", {}).get("bankCardType") - == 1 # # 实际作业亦仅支持借记卡 - ): - raise RuntimeError("请求深圳快瞳银行卡识别接口发生异常或非借记卡") - # noinspection PyTypeChecker - dossier["insured_person_layer"].update( - { - "phone_number": None, - "account": None, - "account_bank": response["data"]["bankInfo"], - "account_number": response["data"]["cardNo"].replace(" ", ""), - } - ) - # 基于影像件识别使能规则评估影像件是否识别 if not rule_engine.evaluate( decision="影像件识别使能", diff --git a/票据理赔自动化/main.py b/票据理赔自动化/main.py index a21152d..9bcee26 100644 --- a/票据理赔自动化/main.py +++ b/票据理赔自动化/main.py @@ -15,18 +15,11 @@ from jinja2 import Environment, FileSystemLoader from common import dossier, rule_engine from image import image_classify from image import image_recognize -from utils.client import Authenticator, HTTPClient # ------------------------- # 主逻辑 # ------------------------- if __name__ == "__main__": - - # 实例化认证器 - authenticator = Authenticator() - # 实例化请求客户端 - http_client = HTTPClient(timeout=300, cache_enabled=True) # 使用缓存 - # 初始化工作目录路径 workplace_path = Path("directory") workplace_path.mkdir(parents=True, exist_ok=True) # 若工作目录不存在则创建