diff --git a/剪映脚本生成自动化/main.py b/剪映脚本生成自动化/main.py index f5e3e72..0b417b5 100644 --- a/剪映脚本生成自动化/main.py +++ b/剪映脚本生成自动化/main.py @@ -617,7 +617,7 @@ class GenerateDraft: # ======================== 调用示例(使用抽象后的方法) ======================== -def execute_workflow(): +def direct(): """生成剪映草稿""" # 实例化 draft = GenerateDraft( diff --git a/票据理赔自动化/abandoned.py b/票据理赔自动化/abandoned.py deleted file mode 100644 index 7c25a5a..0000000 --- a/票据理赔自动化/abandoned.py +++ /dev/null @@ -1,194 +0,0 @@ -def general_text_recognize(image) -> str: - """ - 通用文本识别 - :param image: 影像件 - :return: 识别文本 - """ - # 请求深圳快瞳通用文本识别接口 - response = http_client.post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/general"), - headers={ - "X-RequestId-Header": image["影像件唯一标识"] - }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 - data={ - "token": authenticator.get_token(servicer="szkt"), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image["影像件格式"].lstrip(".")};base64,{image["影像件BASE64编码"]}", - }, - guid=md5((url + image["影像件唯一标识"]).encode("utf-8")).hexdigest().upper(), - ) - # TODO: 若响应非成功则流转至人工处理 - if not (response.get("status") == 200 and response.get("code") == 0): - raise RuntimeError("请求深圳快瞳通用文本识别接口发生异常") - - blocks = [] - for block in response["data"]: - # noinspection PyTypeChecker - blocks.append( - [ - int(block["itemPolygon"]["x"]), # 文本块左上角的X坐标 - int(block["itemPolygon"]["y"]), # 文本块左上角的Y坐标 - int(block["itemPolygon"]["height"]), # 文本块左上角的高度 - block["value"], # 文本块的文本内容 - ] - ) - # 使用俄罗斯方块方法整理文本块,先按照文本块的Y坐标升序(从上到下) - blocks.sort(key=lambda x: x[1]) - - lines = [] - for idx, block in enumerate(blocks[1:]): - if idx == 0: - line = [blocks[0]] - continue - # 若当前文本块的Y坐标和当前文本行的平均Y坐标差值小于阈值则归为同一文本行,否则另起一文本行(分行) - if ( - block[1] - numpy.array([e[1] for e in line]).mean() - < numpy.array([e[2] for e in line]).mean() - ): - line.append(block) - else: - lines.append(line) - line = [block] - lines.append(line) - - blocks = [] - for line in lines: - blocks.extend( - [re.sub(r"\s", "", x[3]) for x in sorted(line, key=lambda x: x[0])] - ) # 按照文本块的X坐标升序(从左到右)并去除文本块的文本内容中所有空字符 - return "\n".join(blocks) - - class JiojioTokenizer: - """中文分词器""" - - def __init__(self): - # 初始化jiojio分词器 - # noinspection PyBroadException - try: - jiojio.init() - except: - raise RuntimeError("初始化jiojio分词器发生异常") - - # noinspection PyShadowingNames - @staticmethod - def callback(text: str, flags: int, cursor) -> None: - """ - 分词回调函数 - :param text: 待分词文本 - :param flags: FTS5分词场景标记位 - :param cursor: FTS5分词回传游标 - return 无 - """ - if not text or not isinstance(text, str): - return - - tokens = [] - begin_idx = 0 # 当前分词开始索引 - for word in jiojio.cut(text): - if word.strip() == "": - begin_idx += len(word) - continue - tokens.append( - (word, begin_idx, end_idx := begin_idx + len(word)) - ) # SQLite FTS5要求回传分词语音文本开始和结束索引 - begin_idx = end_idx - - for token, begin_idx, end_idx in tokens: - cursor.send((token, begin_idx, end_idx)) - - # 实例化jiojio分词器 - self.threads.jiojio_tokenizer = self.JiojioTokenizer() - - # 创建分词器方法 - def create_tokenizer_module(tokenizer): - class JiojioTokenizerModule: - """创建jiojio分词器方法""" - - # noinspection PyShadowingNames - @staticmethod - def tokenize(text: str, flags: int, cursor) -> None: - tokenizer.callback(text, flags, cursor) - - return JiojioTokenizerModule() - - self.threads.connection.create_module( - "jiojio_fts5_module", - create_tokenizer_module(self.threads.jiojio_tokenizer), - ) - - self.threads.connection.execute( - """ - CREATE VIRTUAL TABLE IF NOT EXISTS jiojio_tokenizer USING fts5tokenizer(jiojio_fts5_module) - """ - ) - - -{ - "code": 0, - "status": 200, - "message": "success", - "serialNo": "3a08935648632621760512", - "data": [ - {"desc": "金额", "value": "175.22"}, - { - "desc": "项目名称", - "value": "*化学药品制剂*[海露]玻璃酸钠滴眼液0.1%*10ml支/盒", - }, - {"desc": "数量", "value": "2"}, - {"desc": "规格型号", "value": ""}, - {"desc": "税额", "value": "22.78"}, - {"desc": "税率", "value": "13%"}, - {"desc": "单位", "value": ""}, - {"desc": "单价", "value": "87.61"}, - {"desc": "金额1", "value": "-69.42"}, - { - "desc": "项目名称1", - "value": "*化学药品制剂*[海露]玻璃酸钠滴眼液0.1%*10ml/支/盒", - }, - {"desc": "数量1", "value": ""}, - {"desc": "规格型号1", "value": ""}, - {"desc": "税额1", "value": "-9.02"}, - {"desc": "税率1", "value": "13%"}, - {"desc": "单位1", "value": ""}, - {"desc": "单价1", "value": ""}, - {"desc": "发票名称", "value": "电子发票(普通发票)"}, - {"desc": "全电票标签", "value": ""}, - {"desc": "发票号码", "value": "25447200000045325946"}, - {"desc": "开票日期", "value": "2025年01月20日"}, - {"desc": "购买方名称", "value": "唐敏华"}, - {"desc": "购买方识别号", "value": ""}, - {"desc": "销售方名称", "value": "广州美团大药房有限公司"}, - {"desc": "销售方识别号", "value": "91440100MAC1CAJH27"}, - {"desc": "合计金额", "value": "¥105.80"}, - {"desc": "合计税额", "value": "¥13.76"}, - {"desc": "金额小计", "value": ""}, - {"desc": "税额小计", "value": ""}, - {"desc": "价税合计(大写)", "value": "壹佰壹拾玖圆伍角陆分"}, - {"desc": "小写金额", "value": "¥119.56"}, - {"desc": "备注", "value": ""}, - {"desc": "开票人", "value": "张景景"}, - {"desc": "发票类型", "value": "电子发票(普通发票)"}, - {"desc": "监制章存在性判断", "value": "True"}, - {"desc": "总页数", "value": ""}, - {"desc": "当前页数", "value": ""}, - ], -} - -""" - - - - - - - -with open(f"dossiers/{case_number}.html", "w", encoding="utf-8") as file: - file.write( - template.render( - { - "dossier": dossier, - } - ) - ) - - -""" diff --git a/票据理赔自动化/common.py b/票据理赔自动化/common.py new file mode 100644 index 0000000..d2fc51f --- /dev/null +++ b/票据理赔自动化/common.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +from pathlib import Path + +from masterdata import MasterData +from utils.rule_engine import RuleEngine + +# 初始化赔案档案(保险公司将提供投保公司、保险分公司和报案时间等,TPA作业系统签收后生成赔案号) +dossier = { + "report_layer": {}, # 报案层 + "images_layer": [], # 影像件层 + "insured_person_layer": {}, # 出险人层 + "insured_persons_layer": [], # 被保险人层 + "receipts_layer": [], # 票据层 + "adjustment_layer": {}, # 理算层 +} + +# 实例化主数据 +master_data = MasterData() + +# 实例化规则引擎 +rule_engine = RuleEngine(rules_path=Path("rules")) diff --git a/票据理赔自动化/database.db b/票据理赔自动化/database.db index dff7ab8..dc3147a 100644 Binary files a/票据理赔自动化/database.db and b/票据理赔自动化/database.db differ diff --git a/票据理赔自动化/image.py b/票据理赔自动化/image.py new file mode 100644 index 0000000..457078f --- /dev/null +++ b/票据理赔自动化/image.py @@ -0,0 +1,1278 @@ +# -*- coding: utf-8 -*- + +import json +import re +from base64 import b64encode +from datetime import datetime +from decimal import Decimal, ROUND_HALF_UP +from hashlib import md5 +from pathlib import Path +from typing import Optional, Tuple, Dict, Any + +import cv2 +import numpy +import pandas +from fuzzywuzzy import fuzz +from jionlp import parse_location + +from common import dossier, master_data, rule_engine +from utils.client import Authenticator, HTTPClient + +# 实例化认证器 +authenticator = Authenticator() +# 实例化请求客户端 +http_client = HTTPClient(timeout=300, cache_enabled=True) # 使用缓存 + + +# noinspection PyShadowingNames +def image_classify(image_index: int, image_path: Path) -> Optional[Tuple[str, str]]: + """ + 分类影像件并旋正 + :param image_index: 影像件编号 + :param image_path: 影像件路径(path对象) + :return: 无 + """ + + # noinspection PyShadowingNames + def image_read( + image_path: Path, + ) -> Optional[numpy.ndarray | None]: + """ + 打开并读取影像件 + :param image_path: 影像件路径(path对象) + :return: 影像件数据(numpy.ndarray对象) + """ + # noinspection PyBroadException + try: + # 打开并读取影像件(默认转为单通道灰度图) + image_ndarray = cv2.imread(image_path.as_posix(), cv2.IMREAD_GRAYSCALE) + if image_ndarray is None: + raise + return image_ndarray + except Exception as exception: + raise RuntimeError(f"打开并读取影像件发生异常:{str(exception)}") + + # noinspection PyShadowingNames + def image_serialize(image_format: str, image_ndarray: numpy.ndarray) -> str: + """ + 生成影像件唯一标识 + :param image_format: 影像件格式 + :param image_ndarray: 影像件数据 + :return: 影像件唯一标识 + """ + success, image_ndarray_encoded = cv2.imencode(image_format, image_ndarray) + if not success or image_ndarray_encoded is None: + raise RuntimeError("编码影像件发生异常") + + # 转为字节流并生成影像件唯一标识 + image_guid = md5(image_ndarray_encoded.tobytes()).hexdigest().upper() + return image_guid + + # noinspection PyShadowingNames + def image_compress( + image_format: str, + image_ndarray: numpy.ndarray, + image_size_specified: float = 2.0, + ) -> Optional[str]: + """ + 压缩影像件 + :param image_format: 影像件格式 + :param image_ndarray: 影像件数据 + :param image_size_specified: 指定压缩影像件大小,单位为兆字节(MB) + :return: 压缩后影像件BASE64编码 + """ + # 转为字节 + image_size_specified = image_size_specified * 1024 * 1024 + + # 通过调整影像件质量和尺寸达到压缩影像件目的(先调整影像件质量再调整影像件尺寸) + for quality in range(100, 50, -10): + image_ndarray_copy = image_ndarray.copy() + for _ in range(10): + success, image_ndarray_encoded = cv2.imencode( + image_format, + image_ndarray_copy, + params=( + [cv2.IMWRITE_PNG_COMPRESSION, 10 - quality // 10] + if image_format == "png" + else [cv2.IMWRITE_JPEG_QUALITY, quality] + ), + ) + if not success or image_ndarray_encoded is None: + break + + # 影像件BASE64编码 + image_base64 = b64encode(image_ndarray_encoded.tobytes()).decode( + "utf-8" + ) + if len(image_base64) <= image_size_specified: + return image_base64 + + image_ndarray_copy = cv2.resize( + image_ndarray_copy, + ( + int(image_ndarray_copy.shape[0] * 0.95), + int(image_ndarray_copy.shape[1] * 0.95), + ), + interpolation=cv2.INTER_AREA, + ) + # 若调整影像件尺寸后宽/高小于350像素则终止循环 + if min(image_ndarray_copy.shape[:2]) < 350: + break + + return None + + # 打开并读取影像件 + image_ndarray = image_read(image_path) + image_index = f"{image_index:02d}" + image_format = image_path.suffix.lower() # 影像件格式 + + # 生成影像件唯一标识 + # noinspection PyTypeChecker + image_guid = image_serialize(image_format, image_ndarray) + + # 压缩影像件 + image_base64 = image_compress( + image_format, image_ndarray, image_size_specified=2 + ) # 深圳快瞳要求影像件BASE64编码后大小小于等于2兆字节 + # TODO: 若压缩影像件发生异常则流转至人工处理 + if not image_base64: + raise + + # 请求深圳快瞳影像件分类接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/genalClassify"), + headers={ + "X-RequestId-Header": image_guid + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token(servicer="szkt"), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image_format.lstrip(".")};base64,{image_base64}", # 影像件BASE64编码嵌入数据统一资源标识符 + }, + guid=md5((url + image_guid).encode("utf-8")).hexdigest().upper(), + ) + # TODO: 若响应非成功则流转至人工处理 + if not (response.get("status") == 200 and response.get("code") == 0): + raise + + # 匹配影像件类型 + # noinspection PyTypeChecker + match (response["data"]["flag"], response["data"]["type"]): + case (14, _): + image_type = "居民户口簿" + case (7, "idcard-front-back"): + image_type = "居民身份证(国徽、头像面)" + case (7, "idcard-front"): + image_type = "居民身份证(国徽面)" + case (7, "idcard-back"): + image_type = "居民身份证(头像面)" + case (11, _): + image_type = "中国港澳台地区及境外护照" + case (8, _): + image_type = "银行卡" + case (4, _): + image_type = "增值税发票" + case (1, _): + image_type = "医疗费用清单" + case (5, _): + image_type = "医疗门诊收费票据" + case (3, _): + image_type = "医疗住院收费票据" + case (18, _): + image_type = "理赔申请书" + case _: + image_type = "其它" + + # 匹配影像件方向 + # noinspection PyTypeChecker + image_orientation = { + "0": "0度", + "90": "顺时针90度", + "180": "180度", + "270": "逆时针90度", + }.get(response["data"]["angle"], "0度") + # 若影像件方向非0度则旋正 + if image_orientation != "0度": + image_ndarray = cv2.rotate( + image_ndarray, + { + "顺时针90度": cv2.ROTATE_90_COUNTERCLOCKWISE, # 逆时针旋转90度 + "180度": cv2.ROTATE_180, # 旋转180度 + "逆时针90度": cv2.ROTATE_90_CLOCKWISE, # 顺时针旋转90度 + }[image_orientation], + ) + # 旋正后再次压缩影像件 + image_base64 = image_compress( + image_format, image_ndarray, image_size_specified=2 + ) + # TODO: 若旋正后再次压缩影像件发生异常则流转至人工处理 + if not image_base64: + raise + + dossier["images_layer"].append( + { + "image_index": image_index, + "image_path": image_path.as_posix(), + "image_name": image_path.stem, + "image_format": image_format, + "image_guid": image_guid, + "image_base64": image_base64, + "image_type": image_type, + } + ) + + +# noinspection PyShadowingNames +def image_recognize( + image, + insurer_company, +) -> None: + """ + 识别影像件并整合至赔案档案 + :param image: 影像件 + :param insurer_company: 保险分公司 + :return: 无 + """ + + # TODO: 后续添加居民身份证(国徽面)和居民身份证(头像面)合并 + # noinspection PyShadowingNames + def identity_card_recognize(image, insurer_company) -> None: + """ + 识别居民身份证并整合至赔案档案 + :param image: 影像件 + :param insurer_company: 保险分公司 + :return: 无 + """ + + # noinspection PyShadowingNames + def calculate_age(report_time: datetime, birth_date: datetime) -> int: + """ + 根据报案时间计算周岁 + :param report_time: 报案时间 + :param birth_date: 出生日期 + :return 周岁 + """ + age = report_time.year - birth_date.year + + return ( + age - 1 + if (report_time.month, report_time.day) + < ( + birth_date.month, + birth_date.day, + ) + else age + ) # 若报案时间的月日小于生成日期的月日则前推一年 + + # 请求深圳快瞳居民身份证识别接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/identityCard"), + headers={ + "X-RequestId-Header": image["image_guid"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token( + servicer="szkt" + ), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 + }, # 深圳快瞳支持同时识别居民国徽面和头像面 + guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(), + ) + # TODO: 若请求深圳快瞳居民身份证识别接口发生异常则流转至人工处理 + if not (response.get("status") == 200 and response.get("code") == 0): + raise + + if image["image_type"] in [ + "居民身份证(国徽、头像面)", + "居民身份证(头像面)", + ]: + # noinspection PyTypeChecker + dossier["insured_person_layer"].update( + { + "insured_person": ( + insured_person := response["data"]["name"] + ), # 被保险人 + "identity_type": (identity_type := "居民身份证"), # 证件类型 + "identity_number": ( + indentity_number := response["data"]["idNo"] + ), # 证件号码 + "gender": response["data"]["sex"], # 性别 + "birth_date": ( + birth_date := datetime.strptime( + response["data"]["birthday"], "%Y-%m-%d" + ) + ), # 出生日期,转为日期时间(datetime对象),格式默认为%Y-%m-%d + "age": calculate_age( + dossier["report_layer"]["report_time"], birth_date + ), # 年龄 + "province": ( + residential_address := parse_location( + response["data"]["address"] + ) + ).get( + "province" + ), # 就住址解析为所在省、市、区和详细地址 + "city": residential_address.get("city"), + "district": residential_address.get("county"), + "detailed_address": residential_address.get("detail"), + } + ) + + # 根据保险分公司、被保险人、证件类型、证件号码和出险时间查询个单 + dossier["insured_persons_layer"] = master_data.query_liabilities( + insurer_company, + insured_person, + identity_type, + indentity_number, + dossier["report_layer"]["report_time"].strftime("%Y-%m-%d"), + ) + + if image["image_type"] in [ + "居民身份证(国徽、头像面)", + "居民身份证(国徽面)", + ]: + # noinspection PyTypeChecker + dossier["insured_person_layer"].update( + { + "commencement_date": datetime.strptime( + (period := response["data"]["validDate"].split("-"))[0], + "%Y.%m.%d", + ), # 就有效期限解析为有效起期和有效止期。其中,若有效止期为长期则默认为9999-12-31 + "termination_date": ( + datetime(9999, 12, 31) + if period[1] == "长期" + else datetime.strptime(period[1], "%Y.%m.%d") + ), + } + ) + + # noinspection PyShadowingNames + def application_recognize(image, insurer_company) -> None: + """ + 识别理赔申请书并整合至赔案档案 + :param image: 影像件 + :param insurer_company: 保险分公司 + :return: 无 + """ + + # noinspection PyShadowingNames + def mlm_recognize(image, schema) -> Optional[Dict[str, Any]]: + """ + 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果 + :param image: 影像件 + :param schema: JSON格式 + :return: 结构化后识别结果 + """ + # 请求火山引擎多模态大模型接口并就消息内容JSON反序列化 + response = http_client.post( + url="https://ark.cn-beijing.volces.com/api/v3/chat/completions", + headers={ + "Authorization": "Bearer 2c28ab07-888c-45be-84a2-fc4b2cb5f3f2", + "Content-Type": "application/json; charset=utf-8", + }, + json=( + json_ := { + "model": "doubao-seed-1-6-251015", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}" + }, # 影像件BASE64编码嵌入数据统一资源标识符 + }, + { + "type": "text", + "text": "请就理赔申请书进行光学字符识别、结构化识别结果并返回符合Schema的JSON数据", + }, + ], + } + ], + "temperature": 0.2, # 采样温度,基于strict平衡稳定和容错 + "thinking": { + "type": "disabled", + }, # 不使用深度思考能力 + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "就理赔申请书进行光学字符识别并结构化识别结果", + "schema": schema, + "strict": True, # 启用严格遵循模式 + }, + }, + } + ), + guid=md5( + json.dumps( + json_, + sort_keys=True, + ensure_ascii=False, + ).encode("utf-8") + ) + .hexdigest() + .upper(), + ) + + # 就响应中消息内容JSON反序列化 + # noinspection PyBroadException + try: + # noinspection PyTypeChecker + return json.loads(response["choices"][0]["message"]["content"]) + except: + return None + + # noinspection PyShadowingNames + def boc_application_recognize(image: str) -> None: + """ + 识别中银保险有限公司的理赔申请书并整合至赔案档案 + :param image: 影像件 + :return: 无 + """ + # JSON格式 + schema = { + "type": "object", + "description": "识别结果对象", + "properties": { + "申请人": { + "type": "string", + "description": "申请人,若无数据则为空字符串", + }, + "性别": { + "type": "string", + "description": "性别,若无数据则为空字符串", + }, + "年龄": { + "type": "string", + "description": "年龄,若无数据则为空字符串", + }, + "手机": { + "type": "string", + "description": "手机,若无数据则为空字符串", + }, + "所属分支行及部门": { + "type": "string", + "description": "所属分支行及部门,若无数据则为空字符串", + }, + "身份证号码": { + "type": "string", + "description": "身份证号码,若无数据则为空字符串", + }, + "就诊记录": { + "type": "array", + "description": "所有就诊记录数组", + "items": { + "type": "object", + "description": "每条就诊记录对象", + "properties": { + "就诊序号": { + "type": "string", + "description": "就诊序号,若无数据则为空字符串", + }, + "发票日期": { + "type": "string", + "description": "发票日期,若无数据则为空字符串,若有数据则格式为YYYY/MM/DD", + }, + "发票上的就诊医院/药店": { + "type": "string", + "description": "发票上的就诊医院/药店,若无数据则为空字符串", + }, + "票据张数": { + "type": "string", + "description": "票据张数,若无数据则为空字符串", + }, + "票据金额": { + "type": "string", + "description": "票据金额,若无数据则为空字符串,若有数据则保留两位小数", + }, + "诊断": { + "type": "string", + "description": "诊断,若无数据则为空字符串", + }, + }, + "required": [ + "发票日期", + "发票上的就诊医院/药店", + "诊断", + ], # 就诊记录必须字段 + "additionalProperties": False, # 禁止就就诊记录新增属性 + }, + }, + "票据金额合计": { + "type": "string", + "description": "票据金额合计,若无数据则为空字符串,若有数据则保留两位小数", + }, + "开户银行": { + "type": "string", + "description": "开户银行,若无数据则为空字符串,请注意开户银行可能为多行", + }, + "户名": { + "type": "string", + "description": "户名,若无数据则为空字符串", + }, + "账号": { + "type": "string", + "description": "账号,若无数据则为空字符串", + }, + }, + "required": [ + "申请人", + "手机", + "身份证号码", + "就诊记录", + "开户银行", + "户名", + "账号", + ], # JSON结构必须字段 + "additionalProperties": False, # 禁止就JSON结构新增属性 + } + + # 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果 + recognition = mlm_recognize(image, schema) + # TODO: 若识别中银保险有限公司的理赔申请书并整合至赔案档案发生异常则流转至人工处理 + if not recognition: + raise + dossier["insured_person_layer"].update( + { + "phone_number": recognition["手机"], + "account": recognition["户名"], + "account_bank": recognition["开户银行"], + "account_number": recognition["账号"], + } + ) + + # 根据保险分公司匹配处理方法 + match insurer_company: + # 中银保险有限公司 + case _ if insurer_company.startswith("中银保险有限公司"): + boc_application_recognize(image) + + # noinspection PyShadowingNames + def receipt_recognize(image, insurer_company) -> None: + """ + 识别票据并整合至赔案档案 + :param image: 影像件 + :param insurer_company: 保险分公司 + :return: 空 + """ + + # noinspection PyShadowingNames + def fuzzy_match(contents: list, key: str) -> Optional[str]: + """ + 根据内容列表(基于深圳快瞳增值税发票和医疗收费票据识别结果)模糊匹配键名 + :param contents: 内容列表 + :param key: 键名 + :return 值 + """ + # 若内容列表为空值则返回None + if not contents: + return None + + # noinspection PyInconsistentReturns + match contents[0].keys(): + # 对应深圳快瞳增值税发票识别结果 + case _ if "desc" in contents[0].keys(): + for content in contents: + if content["desc"] == key: + return content["value"] if content["value"] else None + + candidates = [] + for content in contents: + candidates.append( + ( + content["value"], + fuzz.WRatio( + content["desc"], key, force_ascii=False + ), # 基于加权莱文斯坦距离算法计算所有键名和指定键名的相似度 + ) + ) + + return ( + (result[0] if result[0] else None) + if (result := max(candidates, key=lambda x: x[1]))[1] >= 80 + else None + ) # 返回似度>=80且最大的值 + + # 对应深圳快瞳医疗收费票据识别结果 + case _ if "name" in contents[0].keys(): + for content in contents: + if content["name"] == key: + return ( + content["word"]["value"] + if content["word"]["value"] + else None + ) + + candidates = [] + for content in contents: + candidates.append( + ( + content["word"]["value"], + fuzz.WRatio( + content["name"], key, force_ascii=False + ), # 基于加权莱文斯坦距离算法计算所有键名和指定键名的相似度 + ) + ) + + return ( + (result[0] if result[0] else None) + if (result := max(candidates, key=lambda x: x[1]))[1] >= 80 + else None + ) # 返回>=80且最大的相似度的值 + + def parse_item(item: str) -> Tuple[str, Optional[str]]: + """ + 根据明细项解析明细项类别和具体内容,并根据具体内容查询药品/医疗服务 + :param item: 明细项 + return 明细项类别和药品/医疗服务 + """ + if match := re.match( + r"^\*(?P.*?)\*(?P.*)$", + item, + ): + return match.group("category"), master_data.query_medicine( + match.group("specific") + ) + # 一般增值税发票明细项格式形如*{category}*{specific},其中category为明细项类别,例如中成药;specific为明细项具体内容,例如[同仁堂]金贵肾气水蜜丸 300丸/瓶,需要据此查询药品。其它格式则将明细项内容作为明细项类别,药品为空值 + else: + return item, None + + # 初始化票据数据 + receipt = {"image_index": image["image_index"]} + # 请求深圳快瞳票据查验接口(兼容增值税发票、医疗门诊/住院收费票据) + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll"), + headers={ + "X-RequestId-Header": image["image_guid"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token( + servicer="szkt" + ), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 + }, + guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(), + ) + # 若查验状态为真票或红票则直接整合至赔案档案 + if response.get("status") == 200 and response.get("code") == 10000: + # noinspection PyTypeChecker + match response["data"]["productCode"]: + # 增值税发票,目前深圳快瞳支持全电发票和全电纸质发票、区块链发票和增值税发票查验 + case "003082": + # noinspection PyTypeChecker + receipt.update( + { + "verification": ( + "真票" + if response["data"]["details"]["invoiceTypeNo"] == "0" + else "红票" + ), # 红票为状态为失控、作废、已红冲、部分红冲和全额红冲的票据 + "number": response["data"]["details"]["number"], + "code": ( + response["data"]["details"]["code"] + if response["data"]["details"]["code"] + else None + ), + "date": datetime.strptime( + response["data"]["details"]["date"], "%Y年%m月%d日" + ), # 转为日期时间(datetime对象) + "verification_code": response["data"]["details"][ + "check_code" + ], + "amount": Decimal( + response["data"]["details"]["total"] + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 深圳快瞳票据查验接口中开票金额由字符串转为Decimal,保留两位小数 + "payer": response["data"]["details"]["buyer"], + "institution": response["data"]["details"]["seller"], + "items": [ + { + "item": item["name"], + "quantity": ( + Decimal(item["quantity"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + if item["quantity"] + else Decimal("0.00") + ), # 深圳快瞳票据查验接口中明细单位由空字符转为None,若非空字符由字符串转为Decimal,保留两位小数 + "amount": ( + Decimal(item["total"]) + Decimal(item["tax"]) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 深圳快瞳票据查验接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 + } + for item in response["data"]["details"]["items"] + ], + "remarks": ( + response["data"]["details"]["remark"] + if response["data"]["details"]["remark"] + else None + ), + } + ) + # 医疗门诊、住院收费票据 + case "003081": + # noinspection PyTypeChecker + receipt.update( + { + "verification": ( + "真票" + if response["data"]["flushedRed"] == "true" + else "红票" + ), + "number": response["data"]["billNumber"], + "code": response["data"]["billCode"], + "date": datetime.strptime( + response["data"]["invoiceDate"], "%Y-%m-%d %H:%M:%S" + ), # 转为日期时间(datetime对象) + "admission_date": ( + datetime.strptime( + response["data"]["hospitalizationDate"].split("-")[ + 0 + ], + "%Y%m%d", + ) + if response["data"]["hospitalizationDate"] + else None + ), # 深圳快瞳票据查验接口中住院日期解析为入院日期和出院日期 + "discharge_date": ( + datetime.strptime( + response["data"]["hospitalizationDate"].split("-")[ + 1 + ], + "%Y%m%d", + ) + if response["data"]["hospitalizationDate"] + else None + ), + "verification_code": response["data"]["checkCode"], + "amount": Decimal(response["data"]["amount"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "payer": response["data"]["payer"], + "institution": response["data"]["receivablesInstitution"], + "items": [ + { + "item": item["itemName"], + "quantity": Decimal(item["number"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "amount": Decimal(item["totalAmount"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + } + for item in response["data"]["feeitems"] + ], + "personal_self_payment": Decimal( + response["data"]["personalExpense"] + if response["data"]["personalExpense"] + else Decimal("0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "non_medical_payment": Decimal( + response["data"]["personalPay"] + if response["data"]["personalPay"] + else Decimal("0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "medical_payment": ( + Decimal(response["data"]["medicarePay"]) + if response["data"]["medicarePay"] + else Decimal("0.00") + + Decimal( + response["data"]["otherPayment"] + if response["data"]["otherPayment"] + else Decimal("0.00") + ) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 包括医保统筹基金支付和其它支付(例如,退休补充支付) + } + ) + # 若查验状态为假票或无法查验则再请求深圳快瞳票据识别接口接整合至赔案档案 + else: + receipt["verification"] = ( + "假票" + if response.get("status") == 400 + and (response.get("code") == 10100 or response.get("code") == 10001) + else "无法查验" + ) # 假票:查无此票或查验成功五要素不一致 + + match image["image_type"]: + case "增值税发票": + # 请求深圳快瞳增值税发票识别接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/vatInvoice"), + headers={ + "X-RequestId-Header": image["image_guid"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token( + servicer="szkt" + ), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 + }, + guid=md5((url + image["image_guid"]).encode("utf-8")) + .hexdigest() + .upper(), + ) + # TODO: 若请求深圳快瞳增值税发票识别接口发生异常则流转至人工处理 + if not ( + response.get("status") == 200 and response.get("code") == 0 + ): + raise + + match fuzzy_match(response["data"], "发票类型"): + case "电子发票(普通发票)": + # noinspection PyTypeChecker + receipt.update( + { + "number": fuzzy_match(response["data"], "发票号码"), + "code": fuzzy_match(response["data"], "发票代码"), + "date": datetime.strptime( + fuzzy_match(response["data"], "开票日期"), + "%Y年%m月%d日", + ), + "verification_code": fuzzy_match( + response["data"], "校验码" + ), + "amount": Decimal( + fuzzy_match( + response["data"], "小写金额" + ).replace("¥", "") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "payer": fuzzy_match( + response["data"], "购买方名称" + ), + "institution": fuzzy_match( + response["data"], "销售方名称" + ), + "items": [ + { + "item": name, + "quantity": Decimal(quantity).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "amount": ( + Decimal(amount) + Decimal(tax) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 + } + for name, quantity, amount, tax in zip( + [ + x["value"] + for x in response["data"] + if re.match( + r"^项目名称(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in response["data"] + if re.match( + r"^数量(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in response["data"] + if re.match( + r"^金额(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in response["data"] + if re.match( + r"^税额(\d+)?$", + x["desc"], + ) + ], + ) + ], + "remarks": fuzzy_match(response["data"], "备注"), + } + ) + case "增值税普通发票(卷票)": + # noinspection PyTypeChecker + receipt.update( + { + "number": fuzzy_match(response["data"], "发票号码"), + "code": fuzzy_match(response["data"], "发票代码"), + "date": datetime.strptime( + fuzzy_match(response["data"], "开票日期"), + "%Y-%m-%d", + ), + "verification_code": fuzzy_match( + response["data"], "校验码" + ), + "amount": Decimal( + fuzzy_match( + response["data"], "合计金额(小写)" + ).replace("¥", "") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "payer": fuzzy_match( + response["data"], "购买方名称" + ), + "institution": fuzzy_match( + response["data"], "销售方名称" + ), + "items": [ + { + "item": name, + "quantity": Decimal(quantity).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "amount": Decimal(amount).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 + } + for name, quantity, amount in zip( + [ + x["value"] + for x in response["data"] + if re.match( + r"^项目名称明细(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in response["data"] + if re.match( + r"^项目数量明细(\d+)?$", + x["desc"], + ) + ], + [ + x["value"] + for x in response["data"] + if re.match( + r"^项目金额明细(\d+)?$", + x["desc"], + ) + ], + ) + ], + "remarks": fuzzy_match(response["data"], "备注"), + } + ) + case "医疗门诊收费票据" | "医疗住院收费票据": + # 请求深圳快瞳医疗收费票据识别接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/medical"), + headers={ + "X-RequestId-Header": image["image_guid"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token( + servicer="szkt" + ), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 + }, + guid=md5((url + image["image_guid"]).encode("utf-8")) + .hexdigest() + .upper(), + ) + # TODO: 若请求深圳快瞳医疗收费票据识别接口发生异常则流转至人工处理 + if not ( + response.get("status") == 200 and response.get("code") == 0 + ): + raise + + # noinspection PyTypeChecker + receipt.update( + { + "number": ( + receipt := ( + response["data"]["insured"][ + ( + "receipt_hospitalization" + if image["image_type"] == "医疗门诊收费票据" + else "receipt_outpatient" + ) + ] + )["receipts"][0] + )["receipt_no"][ + "value" + ], # 默认为第一张票据 + "code": receipt["global_detail"]["invoice_code"]["value"], + "date": datetime.strptime( + receipt["global_detail"]["invoice_date"]["value"], + "%Y-%m-%d", + ), + "admission_date": ( + datetime.strptime( + receipt["starttime"]["value"], "%Y-%m-%d" + ) + if isinstance(receipt["starttime"], dict) + else None + ), + "discharge_date": ( + datetime.strptime( + receipt["endtime"]["value"], "%Y-%m-%d" + ) + if isinstance(receipt["endtime"], dict) + else None + ), + "verification_code": fuzzy_match( + receipt["global_detail"]["region_specific"], + "校验码", + ), + "amount": Decimal( + receipt["total_amount"]["value"] + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "payer": receipt["name"]["value"], + "institution": receipt["hospital_name"]["value"], + "items": [ + { + "item": ( + item["item"]["value"] + if isinstance(item["item"], dict) + else None + ), + "quantity": Decimal( + item["number"]["value"] + if isinstance(item["number"], dict) + else Decimal("1.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "amount": Decimal( + item["total_amount"]["value"] + if isinstance(item["total_amount"], dict) + else Decimal("1.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + } + for item in receipt["feeitems"] + if isinstance(item, dict) + ], + "personal_self_payment": ( + Decimal(receipt["self_cost"]["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + ), + "non_medical_payment": ( + Decimal(receipt["self_pay"]["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + ), + "medical_payment": ( + Decimal( + receipt["medicare_pay"]["value"] + ) # 医保基金统筹支付 + + ( + Decimal(receipt["addition_pay"]["value"]) + if isinstance(receipt["addition_pay"], dict) + else Decimal("0.00") + ) # 附加支付 + + ( + Decimal(receipt["third_pay"]["value"]) + if isinstance(receipt["third_pay"], dict) + else Decimal("0.00") + ) # 第三方支付 + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + } + ) + + # 根据购药及就医机构查询购药及就医机构类型 + receipt["institution_type"] = master_data.query_institution_type( + receipt["institution"] + ) + + # 根据影像件类型和购药及就医机构类型匹配处理方法 + match (image["image_type"], receipt["institution_type"]): + case ("增值税发票", "药店"): + items = ( + pandas.DataFrame(receipt["items"]) + .groupby("item") # 就相同明细项合并数量和金额 + .agg(quantity=("quantity", "sum"), amount=("amount", "sum")) + .loc[ + lambda dataframe: dataframe["amount"] != 0 + ] # 仅保留金额非0的明细项 + .reset_index() + .pipe( + lambda dataframe: dataframe.join( + dataframe["item"] + .apply( + parse_item + ) # 根据明细项解析明细项类别和具体内容,并根据具体内容查询药品/医疗服务 + .apply( + pandas.Series + ) # 就明细项类别和药品/医疗服务元组展开为两列 + .rename(columns={0: "category", 1: "medicine"}) + ) + ) + .assign( + reasonable_amount=lambda dataframe: dataframe.apply( + lambda row: Decimal( + rule_engine.evaluate( + decision="扣除明细项不合理费用", + inputs={ + "insurer_company": insurer_company, + "category": row["category"], + "medicine": row["medicine"], + "amount": row["amount"], + }, + )["reasonable_amount"] + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + axis="columns", + ) + ) # 扣除明细项不合理费用 + ) + + receipt.update( + { + "payer": ( + dossier["insured_person_layer"]["insured_person"] + if dossier["insured_person_layer"]["insured_person"] + in receipt["payer"] + else None + ), # 出险人 + "accident": "药店购药", # 出险事故 + "diagnosis": "购药拟诊", # 医疗诊断 + "personal_self_payment": Decimal("0.00"), # 个人自费金额 + "non_medical_payment": Decimal("0.00"), # 个人自付金额 + "medical_payment": Decimal("0.00"), # 医保支付金额 + "unreasonable_amount": Decimal( + receipt["amount"] - items["reasonable_amount"].sum() + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 不合理金额 + "reasonable_amount": Decimal( + items["reasonable_amount"].sum() + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), # 合理金额 + "items": items.to_dict("records"), + } + ) + # TODO: 后续完善就购药及就医类型为门诊就诊(私立医院)处理 + case ("增值税发票", "私立医院"): + receipt["购药及就医类型"] = "门诊就医" + # TODO: 后续完善就购药及就医类型为门诊就诊(公立医院)处理 + case ("医疗门诊收费票据", "公立医院"): + receipt["购药及就医类型"] = "门诊就医" + # TODO: 后续完善就购药及就医类型为住院治疗处理 + case ("医疗住院收费票据", "公立医院"): + receipt["购药及就医类型"] = "住院治疗" + # TODO: 若根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常则流转至人工处理 + case _: + raise RuntimeError( + "根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常" + ) + + dossier["receipts_layer"].append(receipt) + + # noinspection PyShadowingNames + def bank_card_recognize(image) -> None: + """ + 识别银行卡并整合至赔案档案 + :param image: 影像件 + :return: 空 + """ + # 请求深圳快瞳银行卡识别接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/bankCard"), + headers={ + "X-RequestId-Header": image["image_guid"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token( + servicer="szkt" + ), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 + }, + guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(), + ) + # TODO: 若响应非成功则流转至人工处理 + if not ( + response.get("status") == 200 + and response.get("code") == 0 + and response.get("data", {}).get("bankCardType") + == 1 # # 实际作业亦仅支持借记卡 + ): + raise RuntimeError("请求深圳快瞳银行卡识别接口发生异常或非借记卡") + # noinspection PyTypeChecker + dossier["insured_person_layer"].update( + { + "phone_number": None, + "account": None, + "account_bank": response["data"]["bankInfo"], + "account_number": response["data"]["cardNo"].replace(" ", ""), + } + ) + + # 基于影像件识别使能规则评估影像件是否识别 + if not rule_engine.evaluate( + decision="影像件识别使能", + inputs={ + "insurer_company": insurer_company, + "image_type": image["image_type"], + }, + )["recognize_enabled"]: + return + + # 根据影像件类型匹配影像件识别方法 + match image["image_type"]: + # TODO: 后续添加居民户口簿识别和整合方法 + case "居民户口簿": + raise RuntimeError("暂不支持居民户口簿") + case "居民身份证(国徽、头像面)" | "居民身份证(国徽面)" | "居民身份证(头像面)": + # 居民身份证识别并整合至赔案档案 + identity_card_recognize(image, insurer_company) + # TODO: 后续添加居民户口簿识别和整合方法 + case "中国港澳台地区及境外护照": + raise RuntimeError("暂不支持中国港澳台地区及境外护照") + # TODO: 暂仅支持增值税发票识别且购药及就医类型为药店购药整合至赔案档案,后续逐步添加 + case "理赔申请书": + application_recognize(image, insurer_company) + case "增值税发票" | "医疗门诊收费票据" | "医疗住院收费票据": + # 票据识别并整合至赔案档案 + receipt_recognize(image, insurer_company) + case "银行卡": + # 银行卡识别并整合至赔案档案 + bank_card_recognize(image) diff --git a/票据理赔自动化/main.py b/票据理赔自动化/main.py index 29f67e4..a21152d 100644 --- a/票据理赔自动化/main.py +++ b/票据理赔自动化/main.py @@ -5,24 +5,17 @@ 功能清单 https://liubiren.feishu.cn/docx/WFjTdBpzroUjQvxxrNIcKvGnneh?from=from_copylink """ -import json -import re -from base64 import b64encode from datetime import datetime -from decimal import Decimal, ROUND_HALF_UP -from hashlib import md5 from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List -import cv2 -import numpy import pandas -from fuzzywuzzy import fuzz from jinja2 import Environment, FileSystemLoader -from jionlp import parse_location -from utils.client import Authenticator, HTTPClient, SQLiteClient -from utils.rule_engine import RuleEngine +from common import dossier, rule_engine +from image import image_classify +from image import image_recognize +from utils.client import Authenticator, HTTPClient # ------------------------- # 主逻辑 @@ -38,205 +31,6 @@ if __name__ == "__main__": workplace_path = Path("directory") workplace_path.mkdir(parents=True, exist_ok=True) # 若工作目录不存在则创建 - # 实例化规则引擎 - rule_engine = RuleEngine(rules_path=Path("rules")) - - class MasterData(SQLiteClient): - """主数据""" - - def __init__(self): - """ - 初始化主数据 - """ - # 初始化SQLite客户端 - super().__init__(database="database.db") - try: - with self: - # 初始化被保险人表 - self._execute( - sql=""" - CREATE TABLE IF NOT EXISTS insured_persons - ( - --团单号 - group_policy TEXT NOT NULL, - --个单号 - person_policy TEXT NOT NULL, - --保险分公司 - insurer_company TEXT NOT NULL, - --主被保险人 - master_insured_person TEXT NOT NULL, - --被保险人 - insured_person TEXT NOT NULL, - --证件类型 - identity_type TEXT NOT NULL, - --证件号码 - identity_number TEXT NOT NULL, - --与主被保险人关系,包括本人和附属(附属包括配偶、父母和子女等) - relationship TEXT NOT NULL, - --保险起期(取个单和团单起期最大值) - commencement_date TEXT NOT NULL, - --保险止期(取个单和团单止期最小值) - termination_date TEXT NOT NULL, - --联合主键(被保险人+证件类型+证件号码+保险分公司) - PRIMARY KEY (insurer_company, insured_person, identity_type, - identity_number) - ) - """ - ) - # 初始化购药及就医机构表 - self._execute( - sql=""" - CREATE TABLE IF NOT EXISTS institutions - ( - --购药及就医机构 - institution TEXT PRIMARY KEY, - --购药及就医机构类型 - institution_type TEXT NOT NULL, - --所在省 - province TEXT NOT NULL, - --所在市 - city TEXT NOT NULL - ) - """ - ) - # 初始化药品表 - self._execute( - sql=""" - CREATE TABLE IF NOT EXISTS medicines - ( - --药品/医疗服务 - medicine TEXT PRIMARY KEY - ) - """ - ) - except Exception as exception: - raise RuntimeError(f"初始化数据库发生异常:{str(exception)}") - - # noinspection PyShadowingNames - def query_institution_type(self, institution: str) -> Optional[str]: - """ - 根据购药及就医机构查询购药及就医机构类型 - :param institution: 购药及就医机构 - :return: 购药及就医机构类型 - """ - # noinspection PyBroadException - try: - with self: - # noinspection SqlResolve - result = self._query_one( - sql=""" - SELECT institution_type - FROM institutions - WHERE institution = ? - """, - parameters=(institution,), - ) - if result: - return result["institution_type"] - raise - # TODO: 若根据购药及就医机构查询购药及就医机构类型发生异常则流转至主数据人工处理 - except Exception: - raise - - # noinspection PyShadowingNames - def query_insured_persons( - self, - insurer_company: str, - insured_person: str, - identity_type: str, - identity_number: str, - report_date: str, - ) -> Optional[List[Dict[str, Any]]]: - """ - 根据保险分公司、被保险人、证件类型、证件号码和出险时间查询个单(列表,若夫妻同在一家投保公司且互为附加被保险人,则一方的持有二张个单) - :param insurer_company: 保险分公司 - :param insured_person: 被保险人 - :param identity_type: 证件类型 - :param identity_number: 证件号码 - :param report_date: 报案时间 - :return: 个单列表 - """ - # noinspection PyBroadException - try: - with self: - # noinspection SqlResolve - result = self._query_all( - sql=""" - SELECT group_policy, - person_policy, - master_insured_person, - insured_person, - relationship, - commencement_date, - termination_date - FROM insured_persons - WHERE insurer_company = ? - AND insured_person = ? - AND identity_type = ? - AND identity_number = ? - AND ? BETWEEN commencement_date AND termination_date - ORDER BY termination_date - """, - parameters=( - insurer_company, - insured_person, - identity_type, - identity_number, - report_date, - ), - ) - if result: - return [ - { - k: ( - datetime.strptime(v, "%Y-%m-%d") - if k in ["commencement_date", "termination_date"] - else v - ) # 若为保险起期、止期则转为日期时间(datetime对象) - for k, v in e.items() - } - for e in result - ] # 将保险起期和保险止期转为日期(datetime对象) - raise - # TODO: 若根据保险分公司、被保险人、证件类型、证件号码和出险时间查询被保险人发生异常则流转至主数据人工处理 - except: - raise - - # noinspection PyShadowingNames - def query_medicine( - self, - content: str, - ) -> Optional[str]: - """ - 根据明细项中具体内容查询药品/医疗服务 - :param content: 明细项具体内容 - :return: 药品/医疗服务 - """ - # TODO: 暂仅支持查询药品、通过药品/医疗服务包含明细项中具体内容查询 - # noinspection PyBroadException - try: - with self: - # noinspection SqlResolve - result = self._query_all( - sql=""" - SELECT medicine - FROM medicines - WHERE ? LIKE '%' || medicine || '%' - """, - parameters=(content,), - ) - if result: - return max(result, key=lambda x: len(x["medicine"]))[ - "medicine" - ] # 返回最大长度的药品/医疗服务 - raise - # TODO: 若根据明细项中具体内容查询药品/医疗服务发生异常则流转至主数据人工处理 - except Exception: - raise - - # 实例化主数据 - master_data = MasterData() - # 实例化JINJA2环境 environment = Environment(loader=FileSystemLoader(".")) # 添加DATE过滤器 @@ -251,1337 +45,113 @@ if __name__ == "__main__": # ------------------------- # noinspection PyShadowingNames - def image_classify(image_index: int, image_path: Path) -> Optional[Tuple[str, str]]: + def case_adjust() -> None: """ - 分类影像件并旋正 - :param image_index: 影像件编号 - :param image_path: 影像件路径(path对象) + 理算赔案并整合至赔案档案 :return: 无 """ - # noinspection PyShadowingNames - def image_read( - image_path: Path, - ) -> Optional[numpy.ndarray | None]: + def receipt_adjust(row: pandas.Series) -> List[Dict[str, Any]]: """ - 打开并读取影像件 - :param image_path: 影像件路径(path对象) - :return: 影像件数据(numpy.ndarray对象) + 票据理算 + :param row: 票据 + :return: 理算记录 """ - # noinspection PyBroadException - try: - # 打开并读取影像件(默认转为单通道灰度图) - image_ndarray = cv2.imread(image_path.as_posix(), cv2.IMREAD_GRAYSCALE) - if image_ndarray is None: - raise - return image_ndarray - except Exception as exception: - raise RuntimeError(f"打开并读取影像件发生异常:{str(exception)}") + date = row["date"] + current_type = row["就诊类型"] + current_amount = row["合理金额"] + remaining_claim = current_amount + claim_details = [] - # noinspection PyShadowingNames - def image_serialize(image_format: str, image_ndarray: numpy.ndarray) -> str: - """ - 生成影像件唯一标识 - :param image_format: 影像件格式 - :param image_ndarray: 影像件数据 - :return: 影像件唯一标识 - """ - success, image_ndarray_encoded = cv2.imencode(image_format, image_ndarray) - if not success or image_ndarray_encoded is None: - raise RuntimeError("编码影像件发生异常") + if current_amount <= 0: + return [] - # 转为字节流并生成影像件唯一标识 - image_guid = md5(image_ndarray_encoded.tobytes()).hexdigest().upper() - return image_guid - - # noinspection PyShadowingNames - def image_compress( - image_format, image_ndarray, image_size_specified=2 - ) -> Optional[str]: - """ - 压缩影像件 - :param image_format: 影像件格式 - :param image_ndarray: 影像件数据 - :param image_size_specified: 指定压缩影像件大小,单位为兆字节(MB) - :return: 压缩后影像件BASE64编码 - """ - # 转为字节 - image_size_specified = image_size_specified * 1024 * 1024 - - # 通过调整影像件质量和尺寸达到压缩影像件目的(先调整影像件质量再调整影像件尺寸) - for quality in range(100, 50, -10): - image_ndarray_copy = image_ndarray.copy() - for _ in range(10): - success, image_ndarray_encoded = cv2.imencode( - image_format, - image_ndarray_copy, - params=( - [cv2.IMWRITE_PNG_COMPRESSION, 10 - quality // 10] - if image_format == "png" - else [cv2.IMWRITE_JPEG_QUALITY, quality] - ), - ) - if not success or image_ndarray_encoded is None: - break - - # 影像件BASE64编码 - image_base64 = b64encode(image_ndarray_encoded.tobytes()).decode( - "utf-8" - ) - if len(image_base64) <= image_size_specified: - return image_base64 - - image_ndarray_copy = cv2.resize( - image_ndarray_copy, - ( - int(image_ndarray_copy.shape[0] * 0.95), - int(image_ndarray_copy.shape[1] * 0.95), - ), - interpolation=cv2.INTER_AREA, - ) - # 若调整影像件尺寸后宽/高小于350像素则终止循环 - if min(image_ndarray_copy.shape[:2]) < 350: - break - - return None - - # 打开并读取影像件 - image_ndarray = image_read(image_path) - image_index = f"{image_index:02d}" - image_format = image_path.suffix.lower() # 影像件格式 - - # 生成影像件唯一标识 - # noinspection PyTypeChecker - image_guid = image_serialize(image_format, image_ndarray) - - # 压缩影像件 - image_base64 = image_compress( - image_format, image_ndarray, image_size_specified=2 - ) # 深圳快瞳要求影像件BASE64编码后大小小于等于2兆字节 - # TODO: 若压缩影像件发生异常则流转至人工处理 - if not image_base64: - raise - - # 请求深圳快瞳影像件分类接口 - response = http_client.post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/genalClassify"), - headers={ - "X-RequestId-Header": image_guid - }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 - data={ - "token": authenticator.get_token( - servicer="szkt" - ), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image_format.lstrip(".")};base64,{image_base64}", # 影像件BASE64编码嵌入数据统一资源标识符 - }, - guid=md5((url + image_guid).encode("utf-8")).hexdigest().upper(), - ) - # TODO: 若响应非成功则流转至人工处理 - if not (response.get("status") == 200 and response.get("code") == 0): - raise - - # 匹配影像件类型 - # noinspection PyTypeChecker - match (response["data"]["flag"], response["data"]["type"]): - case (14, _): - image_type = "居民户口簿" - case (7, "idcard-front-back"): - image_type = "居民身份证(国徽、头像面)" - case (7, "idcard-front"): - image_type = "居民身份证(国徽面)" - case (7, "idcard-back"): - image_type = "居民身份证(头像面)" - case (11, _): - image_type = "中国港澳台地区及境外护照" - case (8, _): - image_type = "银行卡" - case (4, _): - image_type = "增值税发票" - case (1, _): - image_type = "医疗费用清单" - case (5, _): - image_type = "医疗门诊收费票据" - case (3, _): - image_type = "医疗住院收费票据" - case (18, _): - image_type = "理赔申请书" - case _: - image_type = "其它" - - # 匹配影像件方向 - # noinspection PyTypeChecker - image_orientation = { - "0": "0度", - "90": "顺时针90度", - "180": "180度", - "270": "逆时针90度", - }.get(response["data"]["angle"], "0度") - # 若影像件方向非0度则旋正 - if image_orientation != "0度": - image_ndarray = cv2.rotate( - image_ndarray, - { - "顺时针90度": cv2.ROTATE_90_COUNTERCLOCKWISE, # 逆时针旋转90度 - "180度": cv2.ROTATE_180, # 旋转180度 - "逆时针90度": cv2.ROTATE_90_CLOCKWISE, # 顺时针旋转90度 - }[image_orientation], + # 筛选有效保单并排序 + valid_rules = sorted( + [ + r + for r in policy_rules + if current_type in r["就诊类型"] + and r["生效日期"] <= current_date <= r["失效日期"] + and r["剩余额度"] > 0.0 + ], + key=lambda x: x["剩余额度"], + reverse=True, ) - # 旋正后再次压缩影像件 - image_base64 = image_compress( - image_format, image_ndarray, image_size_specified=2 - ) - # TODO: 若旋正后再次压缩影像件发生异常则流转至人工处理 - if not image_base64: - raise - dossier["images_layer"].append( - { - "image_index": image_index, - "image_path": image_path.as_posix(), - "image_name": image_path.stem, - "image_format": image_format, - "image_guid": image_guid, - "image_base64": image_base64, - "image_type": image_type, - } - ) + # 循环分摊赔付,生成分明细列表 + for rule in valid_rules: + if remaining_claim <= 0.0: + break - # noinspection PyShadowingNames - def image_recognize( - image, - insurer_company, - ) -> None: - """ - 识别影像件并整合至赔案档案 - :param image: 影像件 - :param insurer_company: 保险分公司 - :return: 无 - """ + pay_ratio = rule["赔付比例"] + rule_name = rule["责任名称"] + remaining_quota = rule["剩余额度"] - # TODO: 后续添加居民身份证(国徽面)和居民身份证(头像面)合并 - # noinspection PyShadowingNames - def identity_card_recognize(image, insurer_company) -> None: - """ - 识别居民身份证并整合至赔案档案 - :param image: 影像件 - :param insurer_company: 保险分公司 - :return: 无 - """ + max_payable = remaining_claim * pay_ratio + actual_pay = min(remaining_quota, max_payable) - # noinspection PyShadowingNames - def calculate_age(report_time: datetime, birth_date: datetime) -> int: - """ - 根据报案时间计算周岁 - :param report_time: 报案时间 - :param birth_date: 出生日期 - :return 周岁 - """ - age = report_time.year - birth_date.year - - return ( - age - 1 - if (report_time.month, report_time.day) - < ( - birth_date.month, - birth_date.day, - ) - else age - ) # 若报案时间的月日小于生成日期的月日则前推一年 - - # 请求深圳快瞳居民身份证识别接口 - response = http_client.post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/identityCard"), - headers={ - "X-RequestId-Header": image["image_guid"] - }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 - data={ - "token": authenticator.get_token( - servicer="szkt" - ), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 - }, # 深圳快瞳支持同时识别居民国徽面和头像面 - guid=md5((url + image["image_guid"]).encode("utf-8")) - .hexdigest() - .upper(), - ) - # TODO: 若请求深圳快瞳居民身份证识别接口发生异常则流转至人工处理 - if not (response.get("status") == 200 and response.get("code") == 0): - raise - - if image["image_type"] in [ - "居民身份证(国徽、头像面)", - "居民身份证(头像面)", - ]: - # noinspection PyTypeChecker - dossier["insured_person_layer"].update( - { - "insured_person": ( - insured_person := response["data"]["name"] - ), # 被保险人 - "identity_type": (identity_type := "居民身份证"), # 证件类型 - "identity_number": ( - indentity_number := response["data"]["idNo"] - ), # 证件号码 - "gender": response["data"]["sex"], # 性别 - "birth_date": ( - birth_date := datetime.strptime( - response["data"]["birthday"], "%Y-%m-%d" - ) - ), # 出生日期,转为日期时间(datetime对象),格式默认为%Y-%m-%d - "age": calculate_age( - dossier["report_layer"]["report_time"], birth_date - ), # 年龄 - "province": ( - residential_address := parse_location( - response["data"]["address"] - ) - ).get( - "province" - ), # 就住址解析为所在省、市、区和详细地址 - "city": residential_address.get("city"), - "district": residential_address.get("county"), - "detailed_address": residential_address.get("detail"), + if actual_pay > 0.0: + corresponding_actual_amount = actual_pay / pay_ratio + # 构建明细字典(字段与后续DataFrame列对应) + detail = { + "就诊类型": current_type, + "就诊合理金额": current_amount, + "保单责任名称": rule_name, + "保单赔付比例": pay_ratio, + "保单本次赔付金额": round(actual_pay, 2), + "本次对应合理金额部分": round(corresponding_actual_amount, 2), + "保单赔付后剩余额度": round(remaining_quota - actual_pay, 2), } - ) + claim_details.append(detail) - # 根据保险分公司、被保险人、证件类型、证件号码和出险时间查询个单 - dossier["person_policies_layer"] = master_data.query_insured_persons( - insurer_company, - insured_person, - identity_type, - indentity_number, - dossier["report_layer"]["report_time"].strftime("%Y-%m-%d"), - ) + # 更新保单额度和剩余待赔付金额 + rule["剩余额度"] -= actual_pay + remaining_claim -= corresponding_actual_amount - if image["image_type"] in [ - "居民身份证(国徽、头像面)", - "居民身份证(国徽面)", - ]: - # noinspection PyTypeChecker - dossier["insured_person_layer"].update( - { - "commencement_date": datetime.strptime( - (period := response["data"]["validDate"].split("-"))[0], - "%Y.%m.%d", - ), # 就有效期限解析为有效起期和有效止期。其中,若有效止期为长期则默认为9999-12-31 - "termination_date": ( - datetime(9999, 12, 31) - if period[1] == "长期" - else datetime.strptime(period[1], "%Y.%m.%d") - ), - } - ) + return claim_details - # noinspection PyShadowingNames - def application_recognize(image, insurer_company) -> None: - """ - 识别理赔申请书并整合至赔案档案 - :param image: 影像件 - :param insurer_company: 保险分公司 - :return: 无 - """ - - # noinspection PyShadowingNames - def mlm_recognize(image, schema) -> Optional[Dict[str, Any]]: - """ - 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果 - :param image: 影像件 - :param schema: JSON格式 - :return: 结构化后识别结果 - """ - # 请求火山引擎多模态大模型接口并就消息内容JSON反序列化 - response = http_client.post( - url="https://ark.cn-beijing.volces.com/api/v3/chat/completions", - headers={ - "Authorization": "Bearer 2c28ab07-888c-45be-84a2-fc4b2cb5f3f2", - "Content-Type": "application/json; charset=utf-8", - }, - json=( - json_ := { - "model": "doubao-seed-1-6-251015", - "messages": [ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}" - }, # 影像件BASE64编码嵌入数据统一资源标识符 - }, - { - "type": "text", - "text": "请就理赔申请书进行光学字符识别、结构化识别结果并返回符合Schema的JSON数据", - }, - ], - } - ], - "temperature": 0.2, # 采样温度,基于strict平衡稳定和容错 - "thinking": { - "type": "disabled", - }, # 不使用深度思考能力 - "response_format": { - "type": "json_schema", - "json_schema": { - "name": "就理赔申请书进行光学字符识别并结构化识别结果", - "schema": schema, - "strict": True, # 启用严格遵循模式 - }, - }, - } - ), - guid=md5( - json.dumps( - json_, - sort_keys=True, - ensure_ascii=False, - ).encode("utf-8") - ) - .hexdigest() - .upper(), - ) - - # 就响应中消息内容JSON反序列化 - # noinspection PyBroadException - try: - # noinspection PyTypeChecker - return json.loads(response["choices"][0]["message"]["content"]) - except: - return None - - # noinspection PyShadowingNames - def boc_application_recognize(image: str) -> None: - """ - 识别中银保险有限公司的理赔申请书并整合至赔案档案 - :param image: 影像件 - :return: 无 - """ - # JSON格式 - schema = { - "type": "object", - "description": "识别结果对象", - "properties": { - "申请人": { - "type": "string", - "description": "申请人,若无数据则为空字符串", - }, - "性别": { - "type": "string", - "description": "性别,若无数据则为空字符串", - }, - "年龄": { - "type": "string", - "description": "年龄,若无数据则为空字符串", - }, - "手机": { - "type": "string", - "description": "手机,若无数据则为空字符串", - }, - "所属分支行及部门": { - "type": "string", - "description": "所属分支行及部门,若无数据则为空字符串", - }, - "身份证号码": { - "type": "string", - "description": "身份证号码,若无数据则为空字符串", - }, - "就诊记录": { - "type": "array", - "description": "所有就诊记录数组", - "items": { - "type": "object", - "description": "每条就诊记录对象", - "properties": { - "就诊序号": { - "type": "string", - "description": "就诊序号,若无数据则为空字符串", - }, - "发票日期": { - "type": "string", - "description": "发票日期,若无数据则为空字符串,若有数据则格式为YYYY/MM/DD", - }, - "发票上的就诊医院/药店": { - "type": "string", - "description": "发票上的就诊医院/药店,若无数据则为空字符串", - }, - "票据张数": { - "type": "string", - "description": "票据张数,若无数据则为空字符串", - }, - "票据金额": { - "type": "string", - "description": "票据金额,若无数据则为空字符串,若有数据则保留两位小数", - }, - "诊断": { - "type": "string", - "description": "诊断,若无数据则为空字符串", - }, - }, - "required": [ - "发票日期", - "发票上的就诊医院/药店", - "诊断", - ], # 就诊记录必须字段 - "additionalProperties": False, # 禁止就就诊记录新增属性 - }, - }, - "票据金额合计": { - "type": "string", - "description": "票据金额合计,若无数据则为空字符串,若有数据则保留两位小数", - }, - "开户银行": { - "type": "string", - "description": "开户银行,若无数据则为空字符串,请注意开户银行可能为多行", - }, - "户名": { - "type": "string", - "description": "户名,若无数据则为空字符串", - }, - "账号": { - "type": "string", - "description": "账号,若无数据则为空字符串", - }, - }, - "required": [ - "申请人", - "手机", - "身份证号码", - "就诊记录", - "开户银行", - "户名", - "账号", - ], # JSON结构必须字段 - "additionalProperties": False, # 禁止就JSON结构新增属性 - } - - # 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果 - recognition = mlm_recognize(image, schema) - # TODO: 若识别中银保险有限公司的理赔申请书并整合至赔案档案发生异常则流转至人工处理 - if not recognition: - raise - dossier["insured_person_layer"].update( - { - "phone_number": recognition["手机"], - "account": recognition["户名"], - "account_bank": recognition["开户银行"], - "account_number": recognition["账号"], - } - ) - - # 根据保险分公司匹配处理方法 - match insurer_company: - # 中银保险有限公司 - case _ if insurer_company.startswith("中银保险有限公司"): - boc_application_recognize(image) - - # noinspection PyShadowingNames - def receipt_recognize(image, insurer_company) -> None: - """ - 识别票据并整合至赔案档案 - :param image: 影像件 - :param insurer_company: 保险分公司 - :return: 空 - """ - - # noinspection PyShadowingNames - def fuzzy_match(contents: list, key: str) -> Optional[str]: - """ - 根据内容列表(基于深圳快瞳增值税发票和医疗收费票据识别结果)模糊匹配键名 - :param contents: 内容列表 - :param key: 键名 - :return 值 - """ - # 若内容列表为空值则返回None - if not contents: - return None - - # noinspection PyInconsistentReturns - match contents[0].keys(): - # 对应深圳快瞳增值税发票识别结果 - case _ if "desc" in contents[0].keys(): - for content in contents: - if content["desc"] == key: - return content["value"] if content["value"] else None - - candidates = [] - for content in contents: - candidates.append( - ( - content["value"], - fuzz.WRatio( - content["desc"], key, force_ascii=False - ), # 基于加权莱文斯坦距离算法计算所有键名和指定键名的相似度 - ) - ) - - return ( - (result[0] if result[0] else None) - if (result := max(candidates, key=lambda x: x[1]))[1] >= 80 - else None - ) # 返回似度>=80且最大的值 - - # 对应深圳快瞳医疗收费票据识别结果 - case _ if "name" in contents[0].keys(): - for content in contents: - if content["name"] == key: - return ( - content["word"]["value"] - if content["word"]["value"] - else None - ) - - candidates = [] - for content in contents: - candidates.append( - ( - content["word"]["value"], - fuzz.WRatio( - content["name"], key, force_ascii=False - ), # 基于加权莱文斯坦距离算法计算所有键名和指定键名的相似度 - ) - ) - - return ( - (result[0] if result[0] else None) - if (result := max(candidates, key=lambda x: x[1]))[1] >= 80 - else None - ) # 返回>=80且最大的相似度的值 - - def parse_item(item: str) -> Tuple[str, Optional[str]]: - """ - 根据明细项解析明细项类别和具体内容,并根据具体内容查询药品/医疗服务 - :param item: 明细项 - return 明细项类别和药品/医疗服务 - """ - if match := re.match( - r"^\*(?P.*?)\*(?P.*)$", - item, - ): - return match.group("category"), master_data.query_medicine( - match.group("specific") - ) - # 一般增值税发票明细项格式形如*{category}*{specific},其中category为明细项类别,例如中成药;specific为明细项具体内容,例如[同仁堂]金贵肾气水蜜丸 300丸/瓶,需要据此查询药品。其它格式则将明细项内容作为明细项类别,药品为空值 - else: - return item, None - - # 初始化票据数据 - receipt = {"image_index": image["image_index"]} - # 请求深圳快瞳票据查验接口(兼容增值税发票、医疗门诊/住院收费票据) - response = http_client.post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll"), - headers={ - "X-RequestId-Header": image["image_guid"] - }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 - data={ - "token": authenticator.get_token( - servicer="szkt" - ), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 - }, - guid=md5((url + image["image_guid"]).encode("utf-8")) - .hexdigest() - .upper(), - ) - # 若查验状态为真票或红票则直接整合至赔案档案 - if response.get("status") == 200 and response.get("code") == 10000: - # noinspection PyTypeChecker - match response["data"]["productCode"]: - # 增值税发票,目前深圳快瞳支持全电发票和全电纸质发票、区块链发票和增值税发票查验 - case "003082": - # noinspection PyTypeChecker - receipt.update( - { - "verification": ( - "真票" - if response["data"]["details"]["invoiceTypeNo"] - == "0" - else "红票" - ), # 红票为状态为失控、作废、已红冲、部分红冲和全额红冲的票据 - "number": response["data"]["details"]["number"], - "code": ( - response["data"]["details"]["code"] - if response["data"]["details"]["code"] - else None - ), - "date": datetime.strptime( - response["data"]["details"]["date"], "%Y年%m月%d日" - ), # 转为日期时间(datetime对象) - "verification_code": response["data"]["details"][ - "check_code" - ], - "amount": Decimal( - response["data"]["details"]["total"] - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 深圳快瞳票据查验接口中开票金额由字符串转为Decimal,保留两位小数 - "payer": response["data"]["details"]["buyer"], - "institution": response["data"]["details"]["seller"], - "items": [ - { - "item": item["name"], - "quantity": ( - Decimal(item["quantity"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ) - if item["quantity"] - else Decimal("0.00") - ), # 深圳快瞳票据查验接口中明细单位由空字符转为None,若非空字符由字符串转为Decimal,保留两位小数 - "amount": ( - Decimal(item["total"]) - + Decimal(item["tax"]) - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 深圳快瞳票据查验接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 - } - for item in response["data"]["details"]["items"] - ], - "remarks": ( - response["data"]["details"]["remark"] - if response["data"]["details"]["remark"] - else None - ), - } - ) - # 医疗门诊、住院收费票据 - case "003081": - # noinspection PyTypeChecker - receipt.update( - { - "verification": ( - "真票" - if response["data"]["flushedRed"] == "true" - else "红票" - ), - "number": response["data"]["billNumber"], - "code": response["data"]["billCode"], - "date": datetime.strptime( - response["data"]["invoiceDate"], "%Y-%m-%d %H:%M:%S" - ), # 转为日期时间(datetime对象) - "admission_date": ( - datetime.strptime( - response["data"]["hospitalizationDate"].split( - "-" - )[0], - "%Y%m%d", - ) - if response["data"]["hospitalizationDate"] - else None - ), # 深圳快瞳票据查验接口中住院日期解析为入院日期和出院日期 - "discharge_date": ( - datetime.strptime( - response["data"]["hospitalizationDate"].split( - "-" - )[1], - "%Y%m%d", - ) - if response["data"]["hospitalizationDate"] - else None - ), - "verification_code": response["data"]["checkCode"], - "amount": Decimal(response["data"]["amount"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "payer": response["data"]["payer"], - "institution": response["data"][ - "receivablesInstitution" - ], - "items": [ - { - "item": item["itemName"], - "quantity": Decimal(item["number"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "amount": Decimal(item["totalAmount"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - } - for item in response["data"]["feeitems"] - ], - "personal_self_payment": Decimal( - response["data"]["personalExpense"] - if response["data"]["personalExpense"] - else Decimal("0.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "non_medical_payment": Decimal( - response["data"]["personalPay"] - if response["data"]["personalPay"] - else Decimal("0.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "medical_payment": ( - Decimal(response["data"]["medicarePay"]) - if response["data"]["medicarePay"] - else Decimal("0.00") - + Decimal( - response["data"]["otherPayment"] - if response["data"]["otherPayment"] - else Decimal("0.00") - ) - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 包括医保统筹基金支付和其它支付(例如,退休补充支付) - } - ) - # 若查验状态为假票或无法查验则再请求深圳快瞳票据识别接口接整合至赔案档案 - else: - receipt["verification"] = ( - "假票" - if response.get("status") == 400 - and (response.get("code") == 10100 or response.get("code") == 10001) - else "无法查验" - ) # 假票:查无此票或查验成功五要素不一致 - - match image["image_type"]: - case "增值税发票": - # 请求深圳快瞳增值税发票识别接口 - response = http_client.post( - url=( - url := "https://ai.inspirvision.cn/s/api/ocr/vatInvoice" - ), - headers={ - "X-RequestId-Header": image["image_guid"] - }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 - data={ - "token": authenticator.get_token( - servicer="szkt" - ), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 - }, - guid=md5((url + image["image_guid"]).encode("utf-8")) - .hexdigest() - .upper(), - ) - # TODO: 若请求深圳快瞳增值税发票识别接口发生异常则流转至人工处理 - if not ( - response.get("status") == 200 and response.get("code") == 0 - ): - raise - - match fuzzy_match(response["data"], "发票类型"): - case "电子发票(普通发票)": - # noinspection PyTypeChecker - receipt.update( - { - "number": fuzzy_match( - response["data"], "发票号码" - ), - "code": fuzzy_match( - response["data"], "发票代码" - ), - "date": datetime.strptime( - fuzzy_match(response["data"], "开票日期"), - "%Y年%m月%d日", - ), - "verification_code": fuzzy_match( - response["data"], "校验码" - ), - "amount": Decimal( - fuzzy_match( - response["data"], "小写金额" - ).replace("¥", "") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "payer": fuzzy_match( - response["data"], "购买方名称" - ), - "institution": fuzzy_match( - response["data"], "销售方名称" - ), - "items": [ - { - "item": name, - "quantity": Decimal(quantity).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "amount": ( - Decimal(amount) + Decimal(tax) - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 - } - for name, quantity, amount, tax in zip( - [ - x["value"] - for x in response["data"] - if re.match( - r"^项目名称(\d+)?$", - x["desc"], - ) - ], - [ - x["value"] - for x in response["data"] - if re.match( - r"^数量(\d+)?$", - x["desc"], - ) - ], - [ - x["value"] - for x in response["data"] - if re.match( - r"^金额(\d+)?$", - x["desc"], - ) - ], - [ - x["value"] - for x in response["data"] - if re.match( - r"^税额(\d+)?$", - x["desc"], - ) - ], - ) - ], - "remarks": fuzzy_match( - response["data"], "备注" - ), - } - ) - case "增值税普通发票(卷票)": - # noinspection PyTypeChecker - receipt.update( - { - "number": fuzzy_match( - response["data"], "发票号码" - ), - "code": fuzzy_match( - response["data"], "发票代码" - ), - "date": datetime.strptime( - fuzzy_match(response["data"], "开票日期"), - "%Y-%m-%d", - ), - "verification_code": fuzzy_match( - response["data"], "校验码" - ), - "amount": Decimal( - fuzzy_match( - response["data"], "合计金额(小写)" - ).replace("¥", "") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "payer": fuzzy_match( - response["data"], "购买方名称" - ), - "institution": fuzzy_match( - response["data"], "销售方名称" - ), - "items": [ - { - "item": name, - "quantity": Decimal(quantity).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "amount": Decimal(amount).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal,保留两位小数,并求和 - } - for name, quantity, amount in zip( - [ - x["value"] - for x in response["data"] - if re.match( - r"^项目名称明细(\d+)?$", - x["desc"], - ) - ], - [ - x["value"] - for x in response["data"] - if re.match( - r"^项目数量明细(\d+)?$", - x["desc"], - ) - ], - [ - x["value"] - for x in response["data"] - if re.match( - r"^项目金额明细(\d+)?$", - x["desc"], - ) - ], - ) - ], - "remarks": fuzzy_match( - response["data"], "备注" - ), - } - ) - case "医疗门诊收费票据" | "医疗住院收费票据": - # 请求深圳快瞳医疗收费票据识别接口 - response = http_client.post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/medical"), - headers={ - "X-RequestId-Header": image["image_guid"] - }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 - data={ - "token": authenticator.get_token( - servicer="szkt" - ), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 - }, - guid=md5((url + image["image_guid"]).encode("utf-8")) - .hexdigest() - .upper(), - ) - # TODO: 若请求深圳快瞳医疗收费票据识别接口发生异常则流转至人工处理 - if not ( - response.get("status") == 200 and response.get("code") == 0 - ): - raise - - # noinspection PyTypeChecker - receipt.update( - { - "number": ( - receipt := ( - response["data"]["insured"][ - ( - "receipt_hospitalization" - if image["image_type"] - == "医疗门诊收费票据" - else "receipt_outpatient" - ) - ] - )["receipts"][0] - )["receipt_no"][ - "value" - ], # 默认为第一张票据 - "code": receipt["global_detail"]["invoice_code"][ - "value" - ], - "date": datetime.strptime( - receipt["global_detail"]["invoice_date"]["value"], - "%Y-%m-%d", - ), - "admission_date": ( - datetime.strptime( - receipt["starttime"]["value"], "%Y-%m-%d" - ) - if isinstance(receipt["starttime"], dict) - else None - ), - "discharge_date": ( - datetime.strptime( - receipt["endtime"]["value"], "%Y-%m-%d" - ) - if isinstance(receipt["endtime"], dict) - else None - ), - "verification_code": fuzzy_match( - receipt["global_detail"]["region_specific"], - "校验码", - ), - "amount": Decimal( - receipt["total_amount"]["value"] - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "payer": receipt["name"]["value"], - "institution": receipt["hospital_name"]["value"], - "items": [ - { - "item": ( - item["item"]["value"] - if isinstance(item["item"], dict) - else None - ), - "quantity": Decimal( - item["number"]["value"] - if isinstance(item["number"], dict) - else Decimal("1.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "amount": Decimal( - item["total_amount"]["value"] - if isinstance(item["total_amount"], dict) - else Decimal("1.00") - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - } - for item in receipt["feeitems"] - if isinstance(item, dict) - ], - "personal_self_payment": ( - Decimal(receipt["self_cost"]["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ) - ), - "non_medical_payment": ( - Decimal(receipt["self_pay"]["value"]).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ) - ), - "medical_payment": ( - Decimal( - receipt["medicare_pay"]["value"] - ) # 医保基金统筹支付 - + ( - Decimal(receipt["addition_pay"]["value"]) - if isinstance(receipt["addition_pay"], dict) - else Decimal("0.00") - ) # 附加支付 - + ( - Decimal(receipt["third_pay"]["value"]) - if isinstance(receipt["third_pay"], dict) - else Decimal("0.00") - ) # 第三方支付 - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - } - ) - - # 根据购药及就医机构查询购药及就医机构类型 - receipt["institution_type"] = master_data.query_institution_type( - receipt["institution"] - ) - - # 根据影像件类型和购药及就医机构类型匹配处理方法 - match (image["image_type"], receipt["institution_type"]): - case ("增值税发票", "药店"): - items = ( - pandas.DataFrame(receipt["items"]) - .groupby("item") # 就相同明细项合并数量和金额 - .agg(quantity=("quantity", "sum"), amount=("amount", "sum")) - .loc[ - lambda dataframe: dataframe["amount"] != 0 - ] # 仅保留金额非0的明细项 - .reset_index() - .pipe( - lambda dataframe: dataframe.join( - dataframe["item"] - .apply( - parse_item - ) # 根据明细项解析明细项类别和具体内容,并根据具体内容查询药品/医疗服务 - .apply( - pandas.Series - ) # 就明细项类别和药品/医疗服务元组展开为两列 - .rename(columns={0: "category", 1: "medicine"}) - ) - ) - .assign( - reasonable_amount=lambda dataframe: dataframe.apply( - lambda row: Decimal( - rule_engine.evaluate( - decision="扣除明细项不合理费用", - inputs={ - "insurer_company": insurer_company, - "category": row["category"], - "medicine": row["medicine"], - "amount": row["amount"], - }, - )["reasonable_amount"] - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - axis="columns", - ) - ) # 扣除明细项不合理费用 - ) - - receipt.update( - { - "payer": ( - dossier["insured_person_layer"]["insured_person"] - if dossier["insured_person_layer"]["insured_person"] - in receipt["payer"] - else None - ), - "accident": "药店购药", - "diagnosis": "购药拟诊", - "occurrence_date": receipt["date"], - "end_date": receipt["date"], - "personal_self_payment": Decimal("0.00"), - "non_medical_payment": Decimal("0.00"), - "medical_payment": Decimal("0.00"), - "unreasonable_amount": Decimal( - receipt["amount"] - items["reasonable_amount"].sum() - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "reasonable_amount": Decimal( - items["reasonable_amount"].sum() - ).quantize( - Decimal("0.00"), - rounding=ROUND_HALF_UP, - ), - "items": items.to_dict("records"), - } - ) - # TODO: 后续完善就购药及就医类型为门诊就诊(私立医院)处理 - case ("增值税发票", "私立医院"): - receipt["购药及就医类型"] = "门诊就医" - # TODO: 后续完善就购药及就医类型为门诊就诊(公立医院)处理 - case ("医疗门诊收费票据", "公立医院"): - receipt["购药及就医类型"] = "门诊就医" - # TODO: 后续完善就购药及就医类型为住院治疗处理 - case ("医疗住院收费票据", "公立医院"): - receipt["购药及就医类型"] = "住院治疗" - # TODO: 若根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常则流转至人工处理 - case _: - raise RuntimeError( - "根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常" - ) - - dossier["receipts_layer"].append(receipt) - - # noinspection PyShadowingNames - def bank_card_recognize(image) -> None: - """ - 识别银行卡并整合至赔案档案 - :param image: 影像件 - :return: 空 - """ - # 请求深圳快瞳银行卡识别接口 - response = http_client.post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/bankCard"), - headers={ - "X-RequestId-Header": image["image_guid"] - }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 - data={ - "token": authenticator.get_token( - servicer="szkt" - ), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符 - }, - guid=md5((url + image["image_guid"]).encode("utf-8")) - .hexdigest() - .upper(), - ) - # TODO: 若响应非成功则流转至人工处理 - if not ( - response.get("status") == 200 - and response.get("code") == 0 - and response.get("data", {}).get("bankCardType") - == 1 # # 实际作业亦仅支持借记卡 - ): - raise RuntimeError("请求深圳快瞳银行卡识别接口发生异常或非借记卡") - # noinspection PyTypeChecker - dossier["insured_person_layer"].update( - { - "phone_number": None, - "account": None, - "account_bank": response["data"]["bankInfo"], - "account_number": response["data"]["cardNo"].replace(" ", ""), - } - ) - - # 基于影像件识别使能规则评估影像件是否识别 - if not rule_engine.evaluate( - decision="影像件识别使能", - inputs={ - "insurer_company": insurer_company, - "image_type": image["image_type"], - }, - )["recognize_enabled"]: - return - - # 根据影像件类型匹配影像件识别方法 - match image["image_type"]: - # TODO: 后续添加居民户口簿识别和整合方法 - case "居民户口簿": - raise RuntimeError("暂不支持居民户口簿") - case ( - "居民身份证(国徽、头像面)" | "居民身份证(国徽面)" | "居民身份证(头像面)" - ): - # 居民身份证识别并整合至赔案档案 - identity_card_recognize(image, insurer_company) - # TODO: 后续添加居民户口簿识别和整合方法 - case "中国港澳台地区及境外护照": - raise RuntimeError("暂不支持中国港澳台地区及境外护照") - # TODO: 暂仅支持增值税发票识别且购药及就医类型为药店购药整合至赔案档案,后续逐步添加 - case "理赔申请书": - application_recognize(image, insurer_company) - case "增值税发票" | "医疗门诊收费票据" | "医疗住院收费票据": - # 票据识别并整合至赔案档案 - receipt_recognize(image, insurer_company) - case "银行卡": - # 银行卡识别并整合至赔案档案 - bank_card_recognize(image) - - # noinspection PyShadowingNames - def claim_adjust() -> None: - """ - 赔案理算 - :return: 无 - """ - - def select_person_policy(): - pass - - # 基于据拒付规则评估,根据评估结果匹配处理方法 + # 基于据拒付规则评估 if not (result := rule_engine.evaluate(decision="拒付", inputs=dossier)): - # TODO: 若评估结果为空值则流转至人工处理 + # TODO: 若评估结果为空值(保险分公司未配置拒付规则)则流转至人工处理 raise dossier["adjustment_layer"].update( { - "conclusion": result["conclusion"], - "explanation": result["explanation"], + "conclusion": result["conclusion"], # 理赔结论 + "explanation": result["explanation"], # 结论说明 } ) + if result["conclusion"] == "拒付": + return - receipts = pandas.DataFrame(dossier["receipts_layer"]) - print(receipts["end_date"]) + adjustments = ( + pandas.DataFrame(dossier["receipts_layer"]).assing( + adjustments=lambda dataframe: dataframe.apply( + receipt_adjust, axis="columns" + ) + ) + ).explode("adjustments", ignore_index=True) + print(adjustments) # 遍历工作目录中赔案目录并创建赔案档案(模拟自动化域就待自动化任务创建理赔档案) - for claim_path in [x for x in workplace_path.iterdir() if x.is_dir()]: + for case_path in [x for x in workplace_path.iterdir() if x.is_dir()]: # 初始化赔案档案(保险公司将提供投保公司、保险分公司和报案时间等,TPA作业系统签收后生成赔案号) - dossier = { - "report_layer": { + dossier["report_layer"].update( + { + "report_time": datetime(2025, 7, 25, 12, 0, 0), # 指定报案时间 + "case_number": case_path.stem, # 设定:赔案目录名称为赔案号 "insurer_company": ( insurer_company := "中银保险有限公司苏州分公司" ), # 指定保险分公司 - "report_time": datetime(2025, 7, 25, 12, 0, 0), # 指定报案时间 - "claim_number": claim_path.stem, # 设定:赔案目录名称为赔案号 - }, # 报案层 - "images_layer": [], # 影像件层 - "insured_person_layer": {}, # 出险人层 - "person_policies_layer": [], # 个单层 - "receipts_layer": [], # 票据层 - "adjustment_layer": {}, # 理算层 - } - + } + ) # 遍历赔案目录中影像件 for image_index, image_path in enumerate( sorted( [ x - for x in claim_path.glob(pattern="*") + for x in case_path.glob(pattern="*") if x.is_file() and x.suffix.lower() in [".jpg", ".jpeg", ".png"] ], # 实际作业亦仅支持JPG、JPEG或PNG key=lambda x: x.stat().st_ctime, # 根据影像件创建时间顺序排序 @@ -1591,7 +161,7 @@ if __name__ == "__main__": # 分类影像件并旋正(较初审自动化无使能检查) image_classify(image_index, image_path) - # 就影像件层按照影像件类型排序 + # 就影像件层按照影像件类型指定排序 dossier["images_layer"].sort( key=lambda x: [ "居民户口簿", @@ -1608,6 +178,7 @@ if __name__ == "__main__": "其它", ].index(x["image_type"]) ) + # 遍历影像件层中影像件 for image in dossier["images_layer"]: # 识别影像件并整合至赔案档案 @@ -1615,10 +186,15 @@ if __name__ == "__main__": image, insurer_company, ) - # 就票据层按照事故止期和票据号顺序排序 - dossier["receipts_layer"].sort(key=lambda x: (x["end_date"], x["number"])) - claim_adjust() + # 就票据层按照开票日期和票据号顺序排序 + dossier["receipts_layer"].sort(key=lambda x: (x["date"], x["number"])) + + print(dossier["insured_persons_layer"]) + exit() + + # 理算 + case_adjust() print(dossier["adjustment_layer"]) @@ -1627,7 +203,7 @@ if __name__ == "__main__": print(dossier["report_layer"]) print(dossier["insured_person_layer"]) - print(dossier["person_policies_layer"]) + print(dossier["insured_persons_layer"]) dossier.pop("images_layer") dossier.pop("receipts_layer") diff --git a/票据理赔自动化/masterdata.py b/票据理赔自动化/masterdata.py new file mode 100644 index 0000000..8921ea4 --- /dev/null +++ b/票据理赔自动化/masterdata.py @@ -0,0 +1,321 @@ +# -*- coding: utf-8 -*- + +from datetime import datetime +from decimal import Decimal, ROUND_HALF_UP +from typing import Any, Dict, List, Optional + +from utils.client import SQLiteClient + + +class MasterData(SQLiteClient): + """主数据""" + + def __init__(self): + """ + 初始化主数据 + """ + # 初始化SQLite客户端 + super().__init__(database="database.db") + try: + with self: + # 初始化团单表 + self._execute( + sql=""" + CREATE TABLE IF NOT EXISTS group_policies + ( + --团单唯一标识 + guid TEXT PRIMARY KEY, + --团单号 + group_policy TEXT NOT NULL, + --保险分公司名称 + insurer_company TEXT NOT NULL, + --保险起期 + commencement_date TEXT NOT NULL, + --保险止期 + termination_date TEXT NOT NULL + ) + """ + ) + # 初始化个单表 + self._execute( + sql=""" + CREATE TABLE IF NOT EXISTS person_policies + ( + --个单唯一标识 + guid TEXT PRIMARY KEY, + --个单号 + person_policy TEXT NOT NULL, + --保险起期 + commencement_date TEXT NOT NULL, + --保险止期 + termination_date TEXT NOT NULL, + --团单唯一标识,用于联查团案 + group_policy_guid TEXT NOT NULL + ) + """ + ) + # 初始化被保险人表,保司推送赔案时,一般无团单号,需先根据保险分公司名称、被保险人姓名、证件类型和证件号码查询被保人,再在票据理算时根据事故起期确定个单和相应责任 + self._execute( + sql=""" + CREATE TABLE IF NOT EXISTS insured_persons + ( + --被保险人唯一标识 + guid TEXT PRIMARY KEY, + --被保险人姓名 + insured_person TEXT NOT NULL, + --证件类型 + identity_type TEXT NOT NULL, + --证件号码 + identity_number TEXT NOT NULL, + --与主被保险人关系,包括本人、父母、配偶和子女等 + relationship TEXT NOT NULL, + --个单唯一标识,用于联查个单 + person_policy_guid TEXT NOT NULL + + ) + """ + ) + # 初始化责任表 + self._execute( + sql=""" + CREATE TABLE IF NOT EXISTS liabilities + ( + --责任唯一标识 + guid TEXT PRIMARY KEY, + --责任名称 + liability TEXT NOT NULL, + --出险事故 + accident TEXT NOT NULL, + --个人自费理算比例 + personal_self_ratio TEXT NOT NULL, + --个人自付理算比例 + non_medical_ratio TEXT NOT NULL, + --合理理算比例 + reasonable_ratio TEXT NOT NULL, + --理算保单唯一标识 + adjust_policy_guid TEXT NOT NULL, + --个单唯一标识 + person_policy_guid TEXT NOT NULL + ) + """ + ) + # 初始化保额变动表 + self._execute( + sql=""" + CREATE TABLE IF NOT EXISTS coverage_changes + ( + --保额变动唯一标识 + guid TEXT PRIMARY KEY, + --变动类型,包括承保和理算等 + change_type TEXT NOT NULL, + --变动前金额 + before_change_amount TEXT NOT NULL, + --变动金额 + change_amount TEXT NOT NULL, + --变动后金额 + after_change_amount TEXT NOT NULL, + --变动时间 + change_time TEXT NOT NULL, + --变动保单唯一标识 + change_policy_guid TEXT NOT NULL + ) + """ + ) + # 初始化购药及就医机构表 + self._execute( + sql=""" + CREATE TABLE IF NOT EXISTS institutions + ( + --购药及就医机构 + institution TEXT PRIMARY KEY, + --购药及就医机构类型 + institution_type TEXT NOT NULL, + --所在省 + province TEXT NOT NULL, + --所在市 + city TEXT NOT NULL + ) + """ + ) + # 初始化药品表 + self._execute( + sql=""" + CREATE TABLE IF NOT EXISTS medicines + ( + --药品/医疗服务 + medicine TEXT PRIMARY KEY + ) + """ + ) + except Exception as exception: + raise RuntimeError(f"初始化数据库发生异常:{str(exception)}") + + # noinspection PyShadowingNames + def query_liabilities( + self, + insurer_company: str, + insured_person: str, + identity_type: str, + identity_number: str, + report_date: str, + ) -> Optional[List[Dict[str, Any]]]: + """ + 根据保险分公司名称、被保险人姓名、证件类型、证件号码和出险时间查询责任列表 + :param insurer_company: 保险分公司名称 + :param insured_person: 被保险人姓名 + :param identity_type: 证件类型 + :param identity_number: 证件号码 + :param report_date: 报案时间 + :return: 责任列表 + """ + # noinspection PyBroadException + try: + with self: + # noinspection SqlResolve + result = self._query_all( + sql=""" + SELECT group_policies.group_policy, + group_policies.insurer_company, + person_policies.person_policy, + person_policy_coverage_changes.after_change_amount AS remaining_amount, + master_insured_persons.insured_person AS master_insured_person, + insured_persons.insured_person, + insured_persons.identity_type, + insured_persons.identity_number, + insured_persons.relationship, + MAX(group_policies.commencement_date, + person_policies.commencement_date) AS commencement_date, + MIN(group_policies.termination_date, + person_policies.termination_date) AS termination_date, + liabilities.liability, + liabilities.accident, + liabilities.personal_self_ratio, + liabilities.non_medical_ratio, + liabilities.reasonable_ratio, + liabilities.adjust_policy_guid + FROM insured_persons + INNER JOIN insured_persons master_insured_persons + ON person_policies.guid = master_insured_persons.person_policy_guid + AND master_insured_persons.relationship = "本人" + INNER JOIN person_policies + ON insured_persons.person_policy_guid = person_policies.guid + INNER JOIN group_policies + ON person_policies.group_policy_guid = group_policies.guid + INNER JOIN liabilities + ON person_policies.guid = liabilities.person_policy_guid + INNER JOIN coverage_changes person_policy_coverage_changes + ON person_policies.guid = + person_policy_coverage_changes.change_policy_guid + AND + person_policy_coverage_changes.change_time = (SELECT MAX(change_time) + FROM coverage_changes + WHERE change_policy_guid = person_policies.guid) + INNER JOIN coverage_changes + ON liabilities.adjust_policy_guid = coverage_changes.change_policy_guid + AND coverage_changes.change_time = (SELECT MAX(change_time) + FROM coverage_changes + WHERE liabilities.adjust_policy_guid = change_policy_guid) + WHERE group_policies.insurer_company = ? + AND insured_persons.insured_person = ? + AND insured_persons.identity_type = ? + AND insured_persons.identity_number = ? + AND ? BETWEEN group_policies.commencement_date AND group_policies.termination_date + AND ? BETWEEN person_policies.commencement_date AND person_policies.termination_date + AND CAST(coverage_changes.after_change_amount AS REAL) > 0 + """, + parameters=( + insurer_company, + insured_person, + identity_type, + identity_number, + report_date, + report_date, + ), + ) + if result: + return [ + { + k: ( + datetime.strptime(v, "%Y-%m-%d") + if k in ["commencement_date", "termination_date"] + else ( + Decimal(v).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + if k + in [ + "remaining_amount", + "personal_self_ratio", + "non_medical_ratio", + "reasonable_ratio", + ] + else v + ) + ) # 就保险起期、止期则转为日期时间(datetime对象),个人自费比例、个人自付比例和合理比例转为小数(decimal对象) + for k, v in e.items() + } + for e in result + ] # 将保险起期和保险止期转为日期(datetime对象) + raise RuntimeError("查无数据") + # TODO: 若根据保险分公司名称、被保险人姓名、证件类型、证件号码和出险时间查询被保险人发生异常则流转至主数据人工处理 + except Exception as exception: + raise RuntimeError(f"{str(exception)}") + + # noinspection PyShadowingNames + def query_institution_type(self, institution: str) -> Optional[str]: + """ + 根据购药及就医机构查询购药及就医机构类型 + :param institution: 购药及就医机构 + :return: 购药及就医机构类型 + """ + # noinspection PyBroadException + try: + with self: + # noinspection SqlResolve + result = self._query_one( + sql=""" + SELECT institution_type + FROM institutions + WHERE institution = ? + """, + parameters=(institution,), + ) + if result: + return result["institution_type"] + raise + # TODO: 若根据购药及就医机构查询购药及就医机构类型发生异常则流转至主数据人工处理 + except Exception: + raise + + # noinspection PyShadowingNames + def query_medicine( + self, + content: str, + ) -> Optional[str]: + """ + 根据明细项中具体内容查询药品/医疗服务 + :param content: 明细项具体内容 + :return: 药品/医疗服务 + """ + # TODO: 暂仅支持查询药品、通过药品/医疗服务包含明细项中具体内容查询 + # noinspection PyBroadException + try: + with self: + # noinspection SqlResolve + result = self._query_all( + sql=""" + SELECT medicine + FROM medicines + WHERE ? LIKE '%' || medicine || '%' + """, + parameters=(content,), + ) + if result: + return max(result, key=lambda x: len(x["medicine"]))[ + "medicine" + ] # 返回最大长度的药品/医疗服务 + raise + # TODO: 若根据明细项中具体内容查询药品/医疗服务发生异常则流转至主数据人工处理 + except Exception: + raise