Python/票据理赔自动化/image.py

1279 lines
61 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
import json
import re
from base64 import b64encode
from datetime import datetime
from decimal import Decimal, ROUND_HALF_UP
from hashlib import md5
from pathlib import Path
from typing import Optional, Tuple, Dict, Any
import cv2
import numpy
import pandas
from fuzzywuzzy import fuzz
from jionlp import parse_location
from common import dossier, master_data, rule_engine
from utils.client import Authenticator, HTTPClient
# 实例化认证器
authenticator = Authenticator()
# 实例化请求客户端
http_client = HTTPClient(timeout=300, cache_enabled=True) # 使用缓存
# noinspection PyShadowingNames
def image_classify(image_index: int, image_path: Path) -> Optional[Tuple[str, str]]:
"""
分类影像件并旋正
:param image_index: 影像件编号
:param image_path: 影像件路径path对象
:return: 无
"""
# noinspection PyShadowingNames
def image_read(
image_path: Path,
) -> Optional[numpy.ndarray | None]:
"""
打开并读取影像件
:param image_path: 影像件路径path对象
:return: 影像件数据numpy.ndarray对象
"""
# noinspection PyBroadException
try:
# 打开并读取影像件(默认转为单通道灰度图)
image_ndarray = cv2.imread(image_path.as_posix(), cv2.IMREAD_GRAYSCALE)
if image_ndarray is None:
raise
return image_ndarray
except Exception as exception:
raise RuntimeError(f"打开并读取影像件发生异常:{str(exception)}")
# noinspection PyShadowingNames
def image_serialize(image_format: str, image_ndarray: numpy.ndarray) -> str:
"""
生成影像件唯一标识
:param image_format: 影像件格式
:param image_ndarray: 影像件数据
:return: 影像件唯一标识
"""
success, image_ndarray_encoded = cv2.imencode(image_format, image_ndarray)
if not success or image_ndarray_encoded is None:
raise RuntimeError("编码影像件发生异常")
# 转为字节流并生成影像件唯一标识
image_guid = md5(image_ndarray_encoded.tobytes()).hexdigest().upper()
return image_guid
# noinspection PyShadowingNames
def image_compress(
image_format: str,
image_ndarray: numpy.ndarray,
image_size_specified: float = 2.0,
) -> Optional[str]:
"""
压缩影像件
:param image_format: 影像件格式
:param image_ndarray: 影像件数据
:param image_size_specified: 指定压缩影像件大小单位为兆字节MB
:return: 压缩后影像件BASE64编码
"""
# 转为字节
image_size_specified = image_size_specified * 1024 * 1024
# 通过调整影像件质量和尺寸达到压缩影像件目的(先调整影像件质量再调整影像件尺寸)
for quality in range(100, 50, -10):
image_ndarray_copy = image_ndarray.copy()
for _ in range(10):
success, image_ndarray_encoded = cv2.imencode(
image_format,
image_ndarray_copy,
params=(
[cv2.IMWRITE_PNG_COMPRESSION, 10 - quality // 10]
if image_format == "png"
else [cv2.IMWRITE_JPEG_QUALITY, quality]
),
)
if not success or image_ndarray_encoded is None:
break
# 影像件BASE64编码
image_base64 = b64encode(image_ndarray_encoded.tobytes()).decode(
"utf-8"
)
if len(image_base64) <= image_size_specified:
return image_base64
image_ndarray_copy = cv2.resize(
image_ndarray_copy,
(
int(image_ndarray_copy.shape[0] * 0.95),
int(image_ndarray_copy.shape[1] * 0.95),
),
interpolation=cv2.INTER_AREA,
)
# 若调整影像件尺寸后宽/高小于350像素则终止循环
if min(image_ndarray_copy.shape[:2]) < 350:
break
return None
# 打开并读取影像件
image_ndarray = image_read(image_path)
image_index = f"{image_index:02d}"
image_format = image_path.suffix.lower() # 影像件格式
# 生成影像件唯一标识
# noinspection PyTypeChecker
image_guid = image_serialize(image_format, image_ndarray)
# 压缩影像件
image_base64 = image_compress(
image_format, image_ndarray, image_size_specified=2
) # 深圳快瞳要求影像件BASE64编码后大小小于等于2兆字节
# TODO: 若压缩影像件发生异常则流转至人工处理
if not image_base64:
raise
# 请求深圳快瞳影像件分类接口
response = http_client.post(
url=(url := "https://ai.inspirvision.cn/s/api/ocr/genalClassify"),
headers={
"X-RequestId-Header": image_guid
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
data={
"token": authenticator.get_token(servicer="szkt"), # 获取深圳快瞳访问令牌
"imgBase64": f"data:image/{image_format.lstrip(".")};base64,{image_base64}", # 影像件BASE64编码嵌入数据统一资源标识符
},
guid=md5((url + image_guid).encode("utf-8")).hexdigest().upper(),
)
# TODO: 若响应非成功则流转至人工处理
if not (response.get("status") == 200 and response.get("code") == 0):
raise
# 匹配影像件类型
# noinspection PyTypeChecker
match (response["data"]["flag"], response["data"]["type"]):
case (14, _):
image_type = "居民户口簿"
case (7, "idcard-front-back"):
image_type = "居民身份证(国徽、头像面)"
case (7, "idcard-front"):
image_type = "居民身份证(国徽面)"
case (7, "idcard-back"):
image_type = "居民身份证(头像面)"
case (11, _):
image_type = "中国港澳台地区及境外护照"
case (8, _):
image_type = "银行卡"
case (4, _):
image_type = "增值税发票"
case (1, _):
image_type = "医疗费用清单"
case (5, _):
image_type = "医疗门诊收费票据"
case (3, _):
image_type = "医疗住院收费票据"
case (18, _):
image_type = "理赔申请书"
case _:
image_type = "其它"
# 匹配影像件方向
# noinspection PyTypeChecker
image_orientation = {
"0": "0度",
"90": "顺时针90度",
"180": "180度",
"270": "逆时针90度",
}.get(response["data"]["angle"], "0度")
# 若影像件方向非0度则旋正
if image_orientation != "0度":
image_ndarray = cv2.rotate(
image_ndarray,
{
"顺时针90度": cv2.ROTATE_90_COUNTERCLOCKWISE, # 逆时针旋转90度
"180度": cv2.ROTATE_180, # 旋转180度
"逆时针90度": cv2.ROTATE_90_CLOCKWISE, # 顺时针旋转90度
}[image_orientation],
)
# 旋正后再次压缩影像件
image_base64 = image_compress(
image_format, image_ndarray, image_size_specified=2
)
# TODO: 若旋正后再次压缩影像件发生异常则流转至人工处理
if not image_base64:
raise
dossier["images_layer"].append(
{
"image_index": image_index,
"image_path": image_path.as_posix(),
"image_name": image_path.stem,
"image_format": image_format,
"image_guid": image_guid,
"image_base64": image_base64,
"image_type": image_type,
}
)
# noinspection PyShadowingNames
def image_recognize(
image,
insurer_company,
) -> None:
"""
识别影像件并整合至赔案档案
:param image: 影像件
:param insurer_company: 保险分公司
:return: 无
"""
# TODO: 后续添加居民身份证(国徽面)和居民身份证(头像面)合并
# noinspection PyShadowingNames
def identity_card_recognize(image, insurer_company) -> None:
"""
识别居民身份证并整合至赔案档案
:param image: 影像件
:param insurer_company: 保险分公司
:return: 无
"""
# noinspection PyShadowingNames
def calculate_age(report_time: datetime, birth_date: datetime) -> int:
"""
根据报案时间计算周岁
:param report_time: 报案时间
:param birth_date: 出生日期
:return 周岁
"""
age = report_time.year - birth_date.year
return (
age - 1
if (report_time.month, report_time.day)
< (
birth_date.month,
birth_date.day,
)
else age
) # 若报案时间的月日小于生成日期的月日则前推一年
# 请求深圳快瞳居民身份证识别接口
response = http_client.post(
url=(url := "https://ai.inspirvision.cn/s/api/ocr/identityCard"),
headers={
"X-RequestId-Header": image["image_guid"]
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
data={
"token": authenticator.get_token(
servicer="szkt"
), # 获取深圳快瞳访问令牌
"imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符
}, # 深圳快瞳支持同时识别居民国徽面和头像面
guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(),
)
# TODO: 若请求深圳快瞳居民身份证识别接口发生异常则流转至人工处理
if not (response.get("status") == 200 and response.get("code") == 0):
raise
if image["image_type"] in [
"居民身份证(国徽、头像面)",
"居民身份证(头像面)",
]:
# noinspection PyTypeChecker
dossier["insured_person_layer"].update(
{
"insured_person": (
insured_person := response["data"]["name"]
), # 被保险人
"identity_type": (identity_type := "居民身份证"), # 证件类型
"identity_number": (
indentity_number := response["data"]["idNo"]
), # 证件号码
"gender": response["data"]["sex"], # 性别
"birth_date": (
birth_date := datetime.strptime(
response["data"]["birthday"], "%Y-%m-%d"
)
), # 出生日期转为日期时间datetime对象格式默认为%Y-%m-%d
"age": calculate_age(
dossier["report_layer"]["report_time"], birth_date
), # 年龄
"province": (
residential_address := parse_location(
response["data"]["address"]
)
).get(
"province"
), # 就住址解析为所在省、市、区和详细地址
"city": residential_address.get("city"),
"district": residential_address.get("county"),
"detailed_address": residential_address.get("detail"),
}
)
# 根据保险分公司、被保险人、证件类型、证件号码和出险时间查询个单
dossier["insured_persons_layer"] = master_data.query_liabilities(
insurer_company,
insured_person,
identity_type,
indentity_number,
dossier["report_layer"]["report_time"].strftime("%Y-%m-%d"),
)
if image["image_type"] in [
"居民身份证(国徽、头像面)",
"居民身份证(国徽面)",
]:
# noinspection PyTypeChecker
dossier["insured_person_layer"].update(
{
"commencement_date": datetime.strptime(
(period := response["data"]["validDate"].split("-"))[0],
"%Y.%m.%d",
), # 就有效期限解析为有效起期和有效止期。其中若有效止期为长期则默认为9999-12-31
"termination_date": (
datetime(9999, 12, 31)
if period[1] == "长期"
else datetime.strptime(period[1], "%Y.%m.%d")
),
}
)
# noinspection PyShadowingNames
def application_recognize(image, insurer_company) -> None:
"""
识别理赔申请书并整合至赔案档案
:param image: 影像件
:param insurer_company: 保险分公司
:return: 无
"""
# noinspection PyShadowingNames
def mlm_recognize(image, schema) -> Optional[Dict[str, Any]]:
"""
使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果
:param image: 影像件
:param schema: JSON格式
:return: 结构化后识别结果
"""
# 请求火山引擎多模态大模型接口并就消息内容JSON反序列化
response = http_client.post(
url="https://ark.cn-beijing.volces.com/api/v3/chat/completions",
headers={
"Authorization": "Bearer 2c28ab07-888c-45be-84a2-fc4b2cb5f3f2",
"Content-Type": "application/json; charset=utf-8",
},
json=(
json_ := {
"model": "doubao-seed-1-6-251015",
"messages": [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}"
}, # 影像件BASE64编码嵌入数据统一资源标识符
},
{
"type": "text",
"text": "请就理赔申请书进行光学字符识别、结构化识别结果并返回符合Schema的JSON数据",
},
],
}
],
"temperature": 0.2, # 采样温度基于strict平衡稳定和容错
"thinking": {
"type": "disabled",
}, # 不使用深度思考能力
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "就理赔申请书进行光学字符识别并结构化识别结果",
"schema": schema,
"strict": True, # 启用严格遵循模式
},
},
}
),
guid=md5(
json.dumps(
json_,
sort_keys=True,
ensure_ascii=False,
).encode("utf-8")
)
.hexdigest()
.upper(),
)
# 就响应中消息内容JSON反序列化
# noinspection PyBroadException
try:
# noinspection PyTypeChecker
return json.loads(response["choices"][0]["message"]["content"])
except:
return None
# noinspection PyShadowingNames
def boc_application_recognize(image: str) -> None:
"""
识别中银保险有限公司的理赔申请书并整合至赔案档案
:param image: 影像件
:return: 无
"""
# JSON格式
schema = {
"type": "object",
"description": "识别结果对象",
"properties": {
"申请人": {
"type": "string",
"description": "申请人,若无数据则为空字符串",
},
"性别": {
"type": "string",
"description": "性别,若无数据则为空字符串",
},
"年龄": {
"type": "string",
"description": "年龄,若无数据则为空字符串",
},
"手机": {
"type": "string",
"description": "手机,若无数据则为空字符串",
},
"所属分支行及部门": {
"type": "string",
"description": "所属分支行及部门,若无数据则为空字符串",
},
"身份证号码": {
"type": "string",
"description": "身份证号码,若无数据则为空字符串",
},
"就诊记录": {
"type": "array",
"description": "所有就诊记录数组",
"items": {
"type": "object",
"description": "每条就诊记录对象",
"properties": {
"就诊序号": {
"type": "string",
"description": "就诊序号,若无数据则为空字符串",
},
"发票日期": {
"type": "string",
"description": "发票日期若无数据则为空字符串若有数据则格式为YYYY/MM/DD",
},
"发票上的就诊医院/药店": {
"type": "string",
"description": "发票上的就诊医院/药店,若无数据则为空字符串",
},
"票据张数": {
"type": "string",
"description": "票据张数,若无数据则为空字符串",
},
"票据金额": {
"type": "string",
"description": "票据金额,若无数据则为空字符串,若有数据则保留两位小数",
},
"诊断": {
"type": "string",
"description": "诊断,若无数据则为空字符串",
},
},
"required": [
"发票日期",
"发票上的就诊医院/药店",
"诊断",
], # 就诊记录必须字段
"additionalProperties": False, # 禁止就就诊记录新增属性
},
},
"票据金额合计": {
"type": "string",
"description": "票据金额合计,若无数据则为空字符串,若有数据则保留两位小数",
},
"开户银行": {
"type": "string",
"description": "开户银行,若无数据则为空字符串,请注意开户银行可能为多行",
},
"户名": {
"type": "string",
"description": "户名,若无数据则为空字符串",
},
"账号": {
"type": "string",
"description": "账号,若无数据则为空字符串",
},
},
"required": [
"申请人",
"手机",
"身份证号码",
"就诊记录",
"开户银行",
"户名",
"账号",
], # JSON结构必须字段
"additionalProperties": False, # 禁止就JSON结构新增属性
}
# 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果
recognition = mlm_recognize(image, schema)
# TODO: 若识别中银保险有限公司的理赔申请书并整合至赔案档案发生异常则流转至人工处理
if not recognition:
raise
dossier["insured_person_layer"].update(
{
"phone_number": recognition["手机"],
"account": recognition["户名"],
"account_bank": recognition["开户银行"],
"account_number": recognition["账号"],
}
)
# 根据保险分公司匹配处理方法
match insurer_company:
# 中银保险有限公司
case _ if insurer_company.startswith("中银保险有限公司"):
boc_application_recognize(image)
# noinspection PyShadowingNames
def receipt_recognize(image, insurer_company) -> None:
"""
识别票据并整合至赔案档案
:param image: 影像件
:param insurer_company: 保险分公司
:return: 空
"""
# noinspection PyShadowingNames
def fuzzy_match(contents: list, key: str) -> Optional[str]:
"""
根据内容列表(基于深圳快瞳增值税发票和医疗收费票据识别结果)模糊匹配键名
:param contents: 内容列表
:param key: 键名
:return 值
"""
# 若内容列表为空值则返回None
if not contents:
return None
# noinspection PyInconsistentReturns
match contents[0].keys():
# 对应深圳快瞳增值税发票识别结果
case _ if "desc" in contents[0].keys():
for content in contents:
if content["desc"] == key:
return content["value"] if content["value"] else None
candidates = []
for content in contents:
candidates.append(
(
content["value"],
fuzz.WRatio(
content["desc"], key, force_ascii=False
), # 基于加权莱文斯坦距离算法计算所有键名和指定键名的相似度
)
)
return (
(result[0] if result[0] else None)
if (result := max(candidates, key=lambda x: x[1]))[1] >= 80
else None
) # 返回似度>=80且最大的值
# 对应深圳快瞳医疗收费票据识别结果
case _ if "name" in contents[0].keys():
for content in contents:
if content["name"] == key:
return (
content["word"]["value"]
if content["word"]["value"]
else None
)
candidates = []
for content in contents:
candidates.append(
(
content["word"]["value"],
fuzz.WRatio(
content["name"], key, force_ascii=False
), # 基于加权莱文斯坦距离算法计算所有键名和指定键名的相似度
)
)
return (
(result[0] if result[0] else None)
if (result := max(candidates, key=lambda x: x[1]))[1] >= 80
else None
) # 返回>=80且最大的相似度的值
def parse_item(item: str) -> Tuple[str, Optional[str]]:
"""
根据明细项解析明细项类别和具体内容,并根据具体内容查询药品/医疗服务
:param item: 明细项
return 明细项类别和药品/医疗服务
"""
if match := re.match(
r"^\*(?P<category>.*?)\*(?P<specific>.*)$",
item,
):
return match.group("category"), master_data.query_medicine(
match.group("specific")
)
# 一般增值税发票明细项格式形如*{category}*{specific}其中category为明细项类别例如中成药specific为明细项具体内容例如[同仁堂]金贵肾气水蜜丸 300丸/瓶,需要据此查询药品。其它格式则将明细项内容作为明细项类别,药品为空值
else:
return item, None
# 初始化票据数据
receipt = {"image_index": image["image_index"]}
# 请求深圳快瞳票据查验接口(兼容增值税发票、医疗门诊/住院收费票据)
response = http_client.post(
url=(url := "https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll"),
headers={
"X-RequestId-Header": image["image_guid"]
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
data={
"token": authenticator.get_token(
servicer="szkt"
), # 获取深圳快瞳访问令牌
"imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符
},
guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(),
)
# 若查验状态为真票或红票则直接整合至赔案档案
if response.get("status") == 200 and response.get("code") == 10000:
# noinspection PyTypeChecker
match response["data"]["productCode"]:
# 增值税发票,目前深圳快瞳支持全电发票和全电纸质发票、区块链发票和增值税发票查验
case "003082":
# noinspection PyTypeChecker
receipt.update(
{
"verification": (
"真票"
if response["data"]["details"]["invoiceTypeNo"] == "0"
else "红票"
), # 红票为状态为失控、作废、已红冲、部分红冲和全额红冲的票据
"number": response["data"]["details"]["number"],
"code": (
response["data"]["details"]["code"]
if response["data"]["details"]["code"]
else None
),
"date": datetime.strptime(
response["data"]["details"]["date"], "%Y年%m月%d"
), # 转为日期时间datetime对象
"verification_code": response["data"]["details"][
"check_code"
],
"amount": Decimal(
response["data"]["details"]["total"]
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 深圳快瞳票据查验接口中开票金额由字符串转为Decimal保留两位小数
"payer": response["data"]["details"]["buyer"],
"institution": response["data"]["details"]["seller"],
"items": [
{
"item": item["name"],
"quantity": (
Decimal(item["quantity"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
)
if item["quantity"]
else Decimal("0.00")
), # 深圳快瞳票据查验接口中明细单位由空字符转为None若非空字符由字符串转为Decimal保留两位小数
"amount": (
Decimal(item["total"]) + Decimal(item["tax"])
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 深圳快瞳票据查验接口中明细的金额和税额由字符串转为Decimal保留两位小数并求和
}
for item in response["data"]["details"]["items"]
],
"remarks": (
response["data"]["details"]["remark"]
if response["data"]["details"]["remark"]
else None
),
}
)
# 医疗门诊、住院收费票据
case "003081":
# noinspection PyTypeChecker
receipt.update(
{
"verification": (
"真票"
if response["data"]["flushedRed"] == "true"
else "红票"
),
"number": response["data"]["billNumber"],
"code": response["data"]["billCode"],
"date": datetime.strptime(
response["data"]["invoiceDate"], "%Y-%m-%d %H:%M:%S"
), # 转为日期时间datetime对象
"admission_date": (
datetime.strptime(
response["data"]["hospitalizationDate"].split("-")[
0
],
"%Y%m%d",
)
if response["data"]["hospitalizationDate"]
else None
), # 深圳快瞳票据查验接口中住院日期解析为入院日期和出院日期
"discharge_date": (
datetime.strptime(
response["data"]["hospitalizationDate"].split("-")[
1
],
"%Y%m%d",
)
if response["data"]["hospitalizationDate"]
else None
),
"verification_code": response["data"]["checkCode"],
"amount": Decimal(response["data"]["amount"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"payer": response["data"]["payer"],
"institution": response["data"]["receivablesInstitution"],
"items": [
{
"item": item["itemName"],
"quantity": Decimal(item["number"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"amount": Decimal(item["totalAmount"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
}
for item in response["data"]["feeitems"]
],
"personal_self_payment": Decimal(
response["data"]["personalExpense"]
if response["data"]["personalExpense"]
else Decimal("0.00")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"non_medical_payment": Decimal(
response["data"]["personalPay"]
if response["data"]["personalPay"]
else Decimal("0.00")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"medical_payment": (
Decimal(response["data"]["medicarePay"])
if response["data"]["medicarePay"]
else Decimal("0.00")
+ Decimal(
response["data"]["otherPayment"]
if response["data"]["otherPayment"]
else Decimal("0.00")
)
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 包括医保统筹基金支付和其它支付(例如,退休补充支付)
}
)
# 若查验状态为假票或无法查验则再请求深圳快瞳票据识别接口接整合至赔案档案
else:
receipt["verification"] = (
"假票"
if response.get("status") == 400
and (response.get("code") == 10100 or response.get("code") == 10001)
else "无法查验"
) # 假票:查无此票或查验成功五要素不一致
match image["image_type"]:
case "增值税发票":
# 请求深圳快瞳增值税发票识别接口
response = http_client.post(
url=(url := "https://ai.inspirvision.cn/s/api/ocr/vatInvoice"),
headers={
"X-RequestId-Header": image["image_guid"]
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
data={
"token": authenticator.get_token(
servicer="szkt"
), # 获取深圳快瞳访问令牌
"imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符
},
guid=md5((url + image["image_guid"]).encode("utf-8"))
.hexdigest()
.upper(),
)
# TODO: 若请求深圳快瞳增值税发票识别接口发生异常则流转至人工处理
if not (
response.get("status") == 200 and response.get("code") == 0
):
raise
match fuzzy_match(response["data"], "发票类型"):
case "电子发票(普通发票)":
# noinspection PyTypeChecker
receipt.update(
{
"number": fuzzy_match(response["data"], "发票号码"),
"code": fuzzy_match(response["data"], "发票代码"),
"date": datetime.strptime(
fuzzy_match(response["data"], "开票日期"),
"%Y年%m月%d",
),
"verification_code": fuzzy_match(
response["data"], "校验码"
),
"amount": Decimal(
fuzzy_match(
response["data"], "小写金额"
).replace("¥", "")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"payer": fuzzy_match(
response["data"], "购买方名称"
),
"institution": fuzzy_match(
response["data"], "销售方名称"
),
"items": [
{
"item": name,
"quantity": Decimal(quantity).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"amount": (
Decimal(amount) + Decimal(tax)
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal保留两位小数并求和
}
for name, quantity, amount, tax in zip(
[
x["value"]
for x in response["data"]
if re.match(
r"^项目名称(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^数量(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^金额(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^税额(\d+)?$",
x["desc"],
)
],
)
],
"remarks": fuzzy_match(response["data"], "备注"),
}
)
case "增值税普通发票(卷票)":
# noinspection PyTypeChecker
receipt.update(
{
"number": fuzzy_match(response["data"], "发票号码"),
"code": fuzzy_match(response["data"], "发票代码"),
"date": datetime.strptime(
fuzzy_match(response["data"], "开票日期"),
"%Y-%m-%d",
),
"verification_code": fuzzy_match(
response["data"], "校验码"
),
"amount": Decimal(
fuzzy_match(
response["data"], "合计金额(小写)"
).replace("¥", "")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"payer": fuzzy_match(
response["data"], "购买方名称"
),
"institution": fuzzy_match(
response["data"], "销售方名称"
),
"items": [
{
"item": name,
"quantity": Decimal(quantity).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"amount": Decimal(amount).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal保留两位小数并求和
}
for name, quantity, amount in zip(
[
x["value"]
for x in response["data"]
if re.match(
r"^项目名称明细(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^项目数量明细(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^项目金额明细(\d+)?$",
x["desc"],
)
],
)
],
"remarks": fuzzy_match(response["data"], "备注"),
}
)
case "医疗门诊收费票据" | "医疗住院收费票据":
# 请求深圳快瞳医疗收费票据识别接口
response = http_client.post(
url=(url := "https://ai.inspirvision.cn/s/api/ocr/medical"),
headers={
"X-RequestId-Header": image["image_guid"]
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
data={
"token": authenticator.get_token(
servicer="szkt"
), # 获取深圳快瞳访问令牌
"imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符
},
guid=md5((url + image["image_guid"]).encode("utf-8"))
.hexdigest()
.upper(),
)
# TODO: 若请求深圳快瞳医疗收费票据识别接口发生异常则流转至人工处理
if not (
response.get("status") == 200 and response.get("code") == 0
):
raise
# noinspection PyTypeChecker
receipt.update(
{
"number": (
receipt := (
response["data"]["insured"][
(
"receipt_hospitalization"
if image["image_type"] == "医疗门诊收费票据"
else "receipt_outpatient"
)
]
)["receipts"][0]
)["receipt_no"][
"value"
], # 默认为第一张票据
"code": receipt["global_detail"]["invoice_code"]["value"],
"date": datetime.strptime(
receipt["global_detail"]["invoice_date"]["value"],
"%Y-%m-%d",
),
"admission_date": (
datetime.strptime(
receipt["starttime"]["value"], "%Y-%m-%d"
)
if isinstance(receipt["starttime"], dict)
else None
),
"discharge_date": (
datetime.strptime(
receipt["endtime"]["value"], "%Y-%m-%d"
)
if isinstance(receipt["endtime"], dict)
else None
),
"verification_code": fuzzy_match(
receipt["global_detail"]["region_specific"],
"校验码",
),
"amount": Decimal(
receipt["total_amount"]["value"]
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"payer": receipt["name"]["value"],
"institution": receipt["hospital_name"]["value"],
"items": [
{
"item": (
item["item"]["value"]
if isinstance(item["item"], dict)
else None
),
"quantity": Decimal(
item["number"]["value"]
if isinstance(item["number"], dict)
else Decimal("1.00")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"amount": Decimal(
item["total_amount"]["value"]
if isinstance(item["total_amount"], dict)
else Decimal("1.00")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
}
for item in receipt["feeitems"]
if isinstance(item, dict)
],
"personal_self_payment": (
Decimal(receipt["self_cost"]["value"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
)
),
"non_medical_payment": (
Decimal(receipt["self_pay"]["value"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
)
),
"medical_payment": (
Decimal(
receipt["medicare_pay"]["value"]
) # 医保基金统筹支付
+ (
Decimal(receipt["addition_pay"]["value"])
if isinstance(receipt["addition_pay"], dict)
else Decimal("0.00")
) # 附加支付
+ (
Decimal(receipt["third_pay"]["value"])
if isinstance(receipt["third_pay"], dict)
else Decimal("0.00")
) # 第三方支付
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
}
)
# 根据购药及就医机构查询购药及就医机构类型
receipt["institution_type"] = master_data.query_institution_type(
receipt["institution"]
)
# 根据影像件类型和购药及就医机构类型匹配处理方法
match (image["image_type"], receipt["institution_type"]):
case ("增值税发票", "药店"):
items = (
pandas.DataFrame(receipt["items"])
.groupby("item") # 就相同明细项合并数量和金额
.agg(quantity=("quantity", "sum"), amount=("amount", "sum"))
.loc[
lambda dataframe: dataframe["amount"] != 0
] # 仅保留金额非0的明细项
.reset_index()
.pipe(
lambda dataframe: dataframe.join(
dataframe["item"]
.apply(
parse_item
) # 根据明细项解析明细项类别和具体内容,并根据具体内容查询药品/医疗服务
.apply(
pandas.Series
) # 就明细项类别和药品/医疗服务元组展开为两列
.rename(columns={0: "category", 1: "medicine"})
)
)
.assign(
reasonable_amount=lambda dataframe: dataframe.apply(
lambda row: Decimal(
rule_engine.evaluate(
decision="扣除明细项不合理费用",
inputs={
"insurer_company": insurer_company,
"category": row["category"],
"medicine": row["medicine"],
"amount": row["amount"],
},
)["reasonable_amount"]
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
axis="columns",
)
) # 扣除明细项不合理费用
)
receipt.update(
{
"payer": (
dossier["insured_person_layer"]["insured_person"]
if dossier["insured_person_layer"]["insured_person"]
in receipt["payer"]
else None
), # 出险人
"accident": "药店购药", # 出险事故
"diagnosis": "购药拟诊", # 医疗诊断
"personal_self_payment": Decimal("0.00"), # 个人自费金额
"non_medical_payment": Decimal("0.00"), # 个人自付金额
"medical_payment": Decimal("0.00"), # 医保支付金额
"unreasonable_amount": Decimal(
receipt["amount"] - items["reasonable_amount"].sum()
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 不合理金额
"reasonable_amount": Decimal(
items["reasonable_amount"].sum()
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 合理金额
"items": items.to_dict("records"),
}
)
# TODO: 后续完善就购药及就医类型为门诊就诊(私立医院)处理
case ("增值税发票", "私立医院"):
receipt["购药及就医类型"] = "门诊就医"
# TODO: 后续完善就购药及就医类型为门诊就诊(公立医院)处理
case ("医疗门诊收费票据", "公立医院"):
receipt["购药及就医类型"] = "门诊就医"
# TODO: 后续完善就购药及就医类型为住院治疗处理
case ("医疗住院收费票据", "公立医院"):
receipt["购药及就医类型"] = "住院治疗"
# TODO: 若根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常则流转至人工处理
case _:
raise RuntimeError(
"根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常"
)
dossier["receipts_layer"].append(receipt)
# noinspection PyShadowingNames
def bank_card_recognize(image) -> None:
"""
识别银行卡并整合至赔案档案
:param image: 影像件
:return: 空
"""
# 请求深圳快瞳银行卡识别接口
response = http_client.post(
url=(url := "https://ai.inspirvision.cn/s/api/ocr/bankCard"),
headers={
"X-RequestId-Header": image["image_guid"]
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
data={
"token": authenticator.get_token(
servicer="szkt"
), # 获取深圳快瞳访问令牌
"imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符
},
guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(),
)
# TODO: 若响应非成功则流转至人工处理
if not (
response.get("status") == 200
and response.get("code") == 0
and response.get("data", {}).get("bankCardType")
== 1 # # 实际作业亦仅支持借记卡
):
raise RuntimeError("请求深圳快瞳银行卡识别接口发生异常或非借记卡")
# noinspection PyTypeChecker
dossier["insured_person_layer"].update(
{
"phone_number": None,
"account": None,
"account_bank": response["data"]["bankInfo"],
"account_number": response["data"]["cardNo"].replace(" ", ""),
}
)
# 基于影像件识别使能规则评估影像件是否识别
if not rule_engine.evaluate(
decision="影像件识别使能",
inputs={
"insurer_company": insurer_company,
"image_type": image["image_type"],
},
)["recognize_enabled"]:
return
# 根据影像件类型匹配影像件识别方法
match image["image_type"]:
# TODO: 后续添加居民户口簿识别和整合方法
case "居民户口簿":
raise RuntimeError("暂不支持居民户口簿")
case "居民身份证(国徽、头像面)" | "居民身份证(国徽面)" | "居民身份证(头像面)":
# 居民身份证识别并整合至赔案档案
identity_card_recognize(image, insurer_company)
# TODO: 后续添加居民户口簿识别和整合方法
case "中国港澳台地区及境外护照":
raise RuntimeError("暂不支持中国港澳台地区及境外护照")
# TODO: 暂仅支持增值税发票识别且购药及就医类型为药店购药整合至赔案档案,后续逐步添加
case "理赔申请书":
application_recognize(image, insurer_company)
case "增值税发票" | "医疗门诊收费票据" | "医疗住院收费票据":
# 票据识别并整合至赔案档案
receipt_recognize(image, insurer_company)
case "银行卡":
# 银行卡识别并整合至赔案档案
bank_card_recognize(image)