Python/票据理赔自动化/image.py

1258 lines
55 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
影像件处理模块
"""
from base64 import b64encode
from datetime import datetime
from decimal import Decimal, ROUND_HALF_UP
from hashlib import md5
import json
from pathlib import Path
import re
import sys
from typing import Any, Dict, List, Optional, Tuple
import cv2
from fuzzywuzzy import fuzz
from jionlp import parse_location
import numpy
import pandas
from common import masterdata, rules_engine
sys.path.append(Path(__file__).parent.parent.as_posix())
from utils.authenticator import Authenticator
from utils.request import Request
# 实例化认证器
authenticator = Authenticator()
# 实例化请求客户端
request = Request(timeout=300, cache_enabled=True) # 使用缓存
def image_classify(image_index: int, image_path: Path, dossier: Dict[str, Any]) -> None:
"""
分类影像件、旋正并整合至赔案档案
:param image_index: 影像件编号
:param image_path: 影像件路径
:param dossier: 赔案档案
:return: 无
"""
# 读取影像件
image_ndarray = image_read(image_path=image_path)
image_format = image_path.suffix.lower() # 影像件格式
# 影像件序列化
image_guid = image_serialize(image_format=image_format, image_ndarray=image_ndarray)
# 压缩影像件
image_base64 = image_compress(
image_format=image_format, image_ndarray=image_ndarray, image_size_specified=2
) # 深圳快瞳要求影像件BASE64编码后大小小于等于2兆字节
# 请求深圳快瞳影像件分类接口
response = request.post(
url=(url := "https://ai.inspirvision.cn/s/api/ocr/genalClassify"),
headers={
"X-RequestId-Header": image_guid
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
data={
"token": authenticator.get_token(servicer="szkt"), # 获取深圳快瞳访问令牌
"imgBase64": f"data:image/{image_format.lstrip(".")};base64,{image_base64}", # 将影像件格式和BASE64编码嵌入数据统一资源标识符
},
guid=md5((url + image_guid).encode("utf-8")).hexdigest().upper(),
)
if not (response.get("status") == 200 and response.get("code") == 0):
raise RuntimeError("请求深圳快瞳影像件分类接口发生异常")
# 根据响应匹配影像件类型
match (response["data"]["flag"], response["data"]["type"]):
case (14, _):
image_type = "居民户口簿"
case (7, "idcard-front-back"):
image_type = "居民身份证(国徽、头像面)"
case (7, "idcard-front"):
image_type = "居民身份证(国徽面)"
case (7, "idcard-back"):
image_type = "居民身份证(头像面)"
case (11, _):
image_type = "中国港澳台地区及境外护照"
case (8, _):
image_type = "银行卡"
case (4, _):
image_type = "增值税发票"
case (1, _):
image_type = "医疗费用清单"
case (5, _):
image_type = "医疗门诊收费票据"
case (3, _):
image_type = "医疗住院收费票据"
case (18, _):
image_type = "理赔申请书"
case _:
image_type = "其它"
# 根据响应匹配影像件方向
image_orientation = {
"0": "0度",
"90": "顺时针90度",
"180": "180度",
"270": "逆时针90度",
}.get(response["data"]["angle"], "0度")
if image_orientation != "0度":
image_ndarray = cv2.rotate(
image_ndarray,
{
"顺时针90度": cv2.ROTATE_90_COUNTERCLOCKWISE, # 逆时针旋转90度
"180度": cv2.ROTATE_180, # 旋转180度
"逆时针90度": cv2.ROTATE_90_CLOCKWISE, # 顺时针旋转90度
}[image_orientation],
)
# 旋正后再次压缩影像件
image_base64 = image_compress(
image_format, image_ndarray, image_size_specified=2
)
# 将已分类影像件添加至影像件层
dossier["images_layer"][f"{image_index:02d}"] = {
"image_path": image_path.as_posix(), # 影像件路径
"image_name": image_path.name, # 影像件名称
"image_format": image_format, # 影像件格式
"image_guid": image_guid, # 影像件唯一标识
"image_base64": image_base64, # 影像件BASE64编码
"image_type": image_type, # 影像件类型
"image_classified": "", # 影像件是否已分类
} # 影像件编号作为键名
def image_read(
image_path: Path,
) -> numpy.ndarray:
"""
读取影像件
:param image_path: 影像件路径
:param dossier: 赔案档案
:return: 影像件图像数组
"""
try:
# 先使用读取影像件再解码为单通道灰度图数组对象因在windows系统中cv2.imread就包含中文的影像件路径兼容较差估使用numpy.fromfile
image_ndarray = cv2.imdecode(
buf=numpy.fromfile(file=image_path, dtype=numpy.uint8),
flags=cv2.IMREAD_GRAYSCALE,
)
if image_ndarray is None:
raise RuntimeError(f"影像件不存在")
return image_ndarray
except Exception as exception:
raise RuntimeError(f"读取影像件发生异常:{str(exception)}") from exception
def image_serialize(image_format: str, image_ndarray: numpy.ndarray) -> str:
"""
影像件序列化
:param image_format: 影像件格式
:param image_ndarray: 影像件图像数组
:return: 影像件唯一标识
"""
# 将影像件图像数组编码为字节流
success, image_ndarray_encoded = cv2.imencode(ext=image_format, img=image_ndarray)
if not success or image_ndarray_encoded is None:
raise RuntimeError("影像件编码发生异常")
# 转为字节流并生成影像件唯一标识
image_guid = md5(string=image_ndarray_encoded.tobytes()).hexdigest().upper()
return image_guid
def image_compress(
image_format: str,
image_ndarray: numpy.ndarray,
image_size_specified: float = 2.0,
) -> str:
"""
压缩影像件
:param image_format: 影像件格式
:param image_ndarray: 影像件图像数组
:param image_size_specified: 指定压缩影像件大小(单位为兆字节),默认为 2
:return: 压缩后影像件BASE64编码
"""
# 通过调整影像件质量和尺寸达到压缩影像件目的(先调整影像件质量再调整影像件尺寸)
for quality in range(100, 50, -10):
image_ndarray_copy = image_ndarray.copy()
for _ in range(10):
# 调整影像件质量后将影像件图像数组编码为字节流
success, image_ndarray_encoded = cv2.imencode(
ext=image_format,
img=image_ndarray_copy,
params=(
[cv2.IMWRITE_PNG_COMPRESSION, 10 - quality // 10]
if image_format == "png"
else [cv2.IMWRITE_JPEG_QUALITY, quality]
),
)
if not success or image_ndarray_encoded is None:
break
# 影像件BASE64编码
image_base64 = b64encode(s=image_ndarray_encoded.tobytes()).decode("utf-8")
if len(image_base64) <= image_size_specified * 1_048_576:
return image_base64
# 调整影像件尺寸
image_ndarray_copy = cv2.resize(
src=image_ndarray_copy,
dsize=(
int(image_ndarray_copy.shape[0] * 0.95),
int(image_ndarray_copy.shape[1] * 0.95),
),
interpolation=cv2.INTER_AREA,
)
if min(image_ndarray_copy.shape[:2]) < 350:
break
raise RuntimeError("压缩影像件发生异常")
def image_recognize(
image_index: str,
image: Dict[str, Any],
insurer_company: str,
dossier: Dict[str, Any],
) -> None:
"""
识别影像件并整合至赔案档案
:param image_index: 影像件编号
:param image: 影像件数据字典
:param insurer_company: 保险分公司
:param dossier: 赔案档案
:return: 无
"""
# 基于影像件识别使能决策规则评估
if not rules_engine.evaluate(
decision="影像件识别使能",
inputs={
"insurer_company": insurer_company,
"image_type": image["image_type"],
},
)["recognize_enabled"]:
dossier["images_layer"][image_index]["image_recognized"] = "否,无需识别"
return
# 根据影像件类型匹配影像件识别方法
match image["image_type"]:
case "居民身份证(国徽、头像面)" | "居民身份证(国徽面)" | "居民身份证(头像面)":
# 居民身份证识别并整合至赔案档案
identity_card_recognize(
image=image, insurer_company=insurer_company, dossier=dossier
)
case "银行卡":
# 银行卡识别并整合至赔案档案
bank_card_recognize(image=image, dossier=dossier)
case "理赔申请书":
application_recognize(
image=image, insurer_company=insurer_company, dossier=dossier
)
case "增值税发票" | "医疗门诊收费票据" | "医疗住院收费票据":
# 票据识别并整合至赔案档案
receipt_recognize(
image_index=image_index,
image=image,
insurer_company=insurer_company,
dossier=dossier,
)
case _:
raise RuntimeError(f"影像件类型未配置影像件识别方法")
dossier["images_layer"][image_index]["image_recognized"] = ""
def identity_card_recognize(
image: Dict[str, Any], insurer_company: str, dossier: Dict[str, Any]
) -> None:
"""
识别居民身份证并整合至赔案档案
:param image: 影像件
:param insurer_company: 保险分公司
:param dossier: 赔案档案
:return: 无
"""
# 请求深圳快瞳居民身份证识别接口
response = request.post(
url=(url := "https://ai.inspirvision.cn/s/api/ocr/identityCard"),
headers={
"X-RequestId-Header": image["image_guid"]
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
data={
"token": authenticator.get_token(servicer="szkt"), # 获取深圳快瞳访问令牌
"imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 将影像件格式和BASE64编码嵌入数据统一资源标识符
}, # 深圳快瞳支持同时识别居民国徽面和头像面
guid=md5(string=(url + image["image_guid"]).encode("utf-8"))
.hexdigest()
.upper(),
)
if not (response.get("status") == 200 and response.get("code") == 0):
raise RuntimeError("请求深圳快瞳居民身份证识别接口发生异常")
if image["image_type"] in [
"居民身份证(国徽、头像面)",
"居民身份证(头像面)",
]:
dossier["insured_person_layer"].update(
{
"insured_person": (
insured_person := response["data"]["name"]
), # 被保险人姓名
"identity_type": (identity_type := "居民身份证"), # 证件类型
"identity_number": (
identity_number := response["data"]["idNo"]
), # 证件号码
"gender": response["data"]["sex"], # 性别
"birth_date": (
birth_date := datetime.strptime(
response["data"]["birthday"], "%Y-%m-%d"
)
), # 出生日期转为日期时间datetime对象格式默认为%Y-%m-%d
"age": calculate_age(
report_time=dossier["report_layer"]["report_time"],
birth_date=birth_date,
), # 年龄
"province": (
residential_address := parse_location(
location_text=response["data"]["address"]
)
).get(
"province"
), # 就住址解析为所在省、市、区和详细地址
"city": residential_address.get("city"),
"district": residential_address.get("county"),
"detailed_address": residential_address.get("detail"),
}
)
# 根据保险分公司名称、被保险人姓名、证件类型、证件号码和报案时间查询被保险人的理赔责任
dossier["liabilities_layer"] = masterdata.query_liabilities(
insurer_company=insurer_company,
insured_person=insured_person,
identity_type=identity_type,
identity_number=identity_number,
report_date=dossier["report_layer"]["report_time"].strftime("%Y-%m-%d"),
)
if image["image_type"] in [
"居民身份证(国徽、头像面)",
"居民身份证(国徽面)",
]:
dossier["insured_person_layer"].update(
{
"commencement_date": datetime.strptime(
(period := response["data"]["validDate"].split("-"))[0],
"%Y.%m.%d",
), # 就有效期限解析为有效起期和有效止期。其中若有效止期为长期则默认为9999-12-31
"termination_date": (
datetime(9999, 12, 31)
if period[1] == "长期"
else datetime.strptime(period[1], "%Y.%m.%d")
),
}
)
def calculate_age(report_time: datetime, birth_date: datetime) -> int:
"""
计算周岁
:param report_time: 报案时间
:param birth_date: 出生日期
:return 周岁
"""
age = report_time.year - birth_date.year
return (
age - 1
if (report_time.month, report_time.day)
< (
birth_date.month,
birth_date.day,
)
else age
) # 若报案时间的月和日小于出生日期的月和日则前减去一岁
def mlm_recognize(image: Dict[str, Any], schema: Dict[str, Any]) -> Dict[str, Any]:
"""
使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果
:param image: 影像件
:param schema: JSON格式
:return: 结构化后识别结果
"""
# 请求火山引擎多模态大模型接口并就消息内容JSON反序列化
response = request.post(
url="https://ark.cn-beijing.volces.com/api/v3/chat/completions",
headers={
"Authorization": "Bearer 2c28ab07-888c-45be-84a2-fc4b2cb5f3f2",
"Content-Type": "application/json; charset=utf-8",
},
json=(
json_ := {
"model": "doubao-seed-1-6-251015",
"messages": [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}"
}, # 将影像件格式和BASE64编码嵌入数据统一资源标识符
},
{
"type": "text",
"text": "请就理赔申请书进行光学字符识别、结构化识别结果并返回符合Schema的JSON数据",
},
],
}
],
"temperature": 0.2, # 采样温度基于strict平衡稳定和容错
"thinking": {
"type": "disabled",
}, # 不使用深度思考能力
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "就理赔申请书进行光学字符识别并结构化识别结果",
"schema": schema,
"strict": True, # 启用严格遵循模式
},
},
}
),
guid=md5(
string=json.dumps(
json_,
sort_keys=True,
ensure_ascii=False,
).encode("utf-8")
)
.hexdigest()
.upper(),
)
# 就响应中消息内容JSON反序列化
try:
return json.loads(s=response["choices"][0]["message"]["content"])
except Exception as exception:
raise RuntimeError("请求火山引擎多模态大模型接口发生异常") from exception
def boc_application_recognize(image: Dict[str, Any], dossier: Dict[str, Any]) -> None:
"""
识别中银保险有限公司的理赔申请书并整合至赔案档案
:param image: 影像件
:param dossier: 赔案档案
:return: 无
"""
# JSON格式
schema = {
"type": "object",
"description": "识别结果对象",
"properties": {
"申请人": {
"type": "string",
"description": "申请人,若无数据则为空字符串",
},
"性别": {
"type": "string",
"description": "性别,若无数据则为空字符串",
},
"年龄": {
"type": "string",
"description": "年龄,若无数据则为空字符串",
},
"手机": {
"type": "string",
"description": "手机,若无数据则为空字符串",
},
"所属分支行及部门": {
"type": "string",
"description": "所属分支行及部门,若无数据则为空字符串",
},
"身份证号码": {
"type": "string",
"description": "身份证号码,若无数据则为空字符串",
},
"就诊记录": {
"type": "array",
"description": "所有就诊记录数组",
"items": {
"type": "object",
"description": "每条就诊记录对象",
"properties": {
"就诊序号": {
"type": "string",
"description": "就诊序号,若无数据则为空字符串",
},
"发票日期": {
"type": "string",
"description": "发票日期若无数据则为空字符串若有数据则格式为YYYY/MM/DD",
},
"发票上的就诊医院/药店": {
"type": "string",
"description": "发票上的就诊医院/药店,若无数据则为空字符串",
},
"票据张数": {
"type": "string",
"description": "票据张数,若无数据则为空字符串",
},
"票据金额": {
"type": "string",
"description": "票据金额,若无数据则为空字符串,若有数据则保留两位小数",
},
"诊断": {
"type": "string",
"description": "诊断,若无数据则为空字符串",
},
},
"required": [
"发票日期",
"发票上的就诊医院/药店",
"诊断",
], # 就诊记录必须字段
"additionalProperties": False, # 禁止就就诊记录新增属性
},
},
"票据金额合计": {
"type": "string",
"description": "票据金额合计,若无数据则为空字符串,若有数据则保留两位小数",
},
"开户银行": {
"type": "string",
"description": "开户银行,若无数据则为空字符串,请注意开户银行可能为多行",
},
"户名": {
"type": "string",
"description": "户名,若无数据则为空字符串",
},
"账号": {
"type": "string",
"description": "账号,若无数据则为空字符串",
},
},
"required": [
"申请人",
"手机",
"身份证号码",
"就诊记录",
"开户银行",
"户名",
"账号",
], # JSON结构必须字段
"additionalProperties": False, # 禁止就JSON结构新增属性
}
# 使用多模态大模型就理赔申请书进行光学字符识别并结构化识别结果
recognition = mlm_recognize(image=image, schema=schema)
dossier["insured_person_layer"].update(
{
"phone_number": recognition["手机"],
"account": recognition["户名"],
"account_bank": recognition["开户银行"],
"account_number": recognition["账号"],
}
)
def application_recognize(
image: Dict[str, Any], insurer_company: str, dossier: Dict[str, Any]
) -> None:
"""
识别理赔申请书并整合至赔案档案
:param image: 影像件
:param insurer_company: 保险分公司
:param dossier: 赔案档案
:return: 无
"""
# 根据保险分公司匹配处理方法
match insurer_company:
# 中银保险有限公司
case _ if insurer_company.startswith("中银保险有限公司"):
boc_application_recognize(image=image, dossier=dossier)
def receipt_recognize(
image_index: str,
image: Dict[str, Any],
insurer_company: str,
dossier: Dict[str, Any],
) -> None:
"""
识别票据并整合至赔案档案
:param image_index: 影像件编号
:param image: 影像件
:param insurer_company: 保险分公司
:param dossier: 赔案档案
:return: 空
"""
# 初始化票据数据
receipt = {"image_index": image_index, "image_path": image["image_path"]}
# 请求深圳快瞳票据查验接口(兼容增值税发票、医疗门诊/住院收费票据)
response = request.post(
url=(url := "https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll"),
headers={
"X-RequestId-Header": image["image_guid"]
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
data={
"token": authenticator.get_token(servicer="szkt"), # 获取深圳快瞳访问令牌
"imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 将影像件格式和BASE64编码嵌入数据统一资源标识符
},
guid=md5(string=(url + image["image_guid"]).encode("utf-8"))
.hexdigest()
.upper(),
)
# 若查验状态为真票或红票则直接整合至赔案档案
if response.get("status") == 200 and response.get("code") == 10000:
match response["data"]["productCode"]:
# 增值税发票,目前深圳快瞳支持全电发票和全电纸质发票、区块链发票和增值税发票查验
case "003082":
receipt.update(
{
"verification": (
"真票"
if response["data"]["details"]["invoiceTypeNo"] == "0"
else "红票"
), # 查验状态,红票对应查验状态为失控、作废、已红冲、部分红冲和全额红冲
"number": response["data"]["details"]["number"], # 票据号
"code": (
response["data"]["details"]["code"]
if response["data"]["details"]["code"]
else None
), # 票据代码
"date": datetime.strptime(
response["data"]["details"]["date"], "%Y年%m月%d"
), # 开票日期
"check_code": response["data"]["details"][
"check_code"
], # 校验码
"amount": Decimal(
response["data"]["details"]["total"]
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 开票金额
"payer": response["data"]["details"]["buyer"], # 出险人
"institution": response["data"]["details"][
"seller"
], # 购药及就医机构
"items": [
{
"item": item["name"],
"quantity": (
Decimal(item["quantity"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
)
if item["quantity"]
else Decimal("0.00")
),
"amount": (
Decimal(item["total"]) + Decimal(item["tax"])
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
}
for item in response["data"]["details"]["items"]
],
"remarks": (
response["data"]["details"]["remark"]
if response["data"]["details"]["remark"]
else None
),
}
)
# 医疗门诊、住院收费票据
case "003081":
receipt.update(
{
"verification": (
"真票"
if response["data"]["flushedRed"] == "true"
else "红票"
),
"number": response["data"]["billNumber"],
"code": response["data"]["billCode"],
"date": datetime.strptime(
response["data"]["invoiceDate"], "%Y-%m-%d %H:%M:%S"
), # 转为日期时间datetime对象
"admission_date": (
datetime.strptime(
response["data"]["hospitalizationDate"].split("-")[0],
"%Y%m%d",
)
if response["data"]["hospitalizationDate"]
else None
), # 深圳快瞳票据查验接口中住院日期解析为入院日期和出院日期
"discharge_date": (
datetime.strptime(
response["data"]["hospitalizationDate"].split("-")[1],
"%Y%m%d",
)
if response["data"]["hospitalizationDate"]
else None
),
"check_code": response["data"]["checkCode"],
"amount": Decimal(response["data"]["amount"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"payer": response["data"]["payer"],
"institution": response["data"]["receivablesInstitution"],
"items": [
{
"item": item["itemName"],
"quantity": Decimal(item["number"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"amount": Decimal(item["totalAmount"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
}
for item in response["data"]["feeitems"]
],
"personal_self_payment": Decimal(
response["data"]["personalExpense"]
if response["data"]["personalExpense"]
else Decimal("0.00")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"non_medical_payment": Decimal(
response["data"]["personalPay"]
if response["data"]["personalPay"]
else Decimal("0.00")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"medical_payment": (
Decimal(response["data"]["medicarePay"])
if response["data"]["medicarePay"]
else Decimal("0.00")
+ Decimal(
response["data"]["otherPayment"]
if response["data"]["otherPayment"]
else Decimal("0.00")
)
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 包括医保统筹基金支付和其它支付(例如,退休补充支付)
}
)
# 若查验状态为假票或无法查验则再请求深圳快瞳票据识别接口接整合至赔案档案
else:
receipt["verification"] = (
"假票"
if response.get("status") == 400
and (response.get("code") == 10100 or response.get("code") == 10001)
else "无法查验"
) # 假票:查无此票或查验成功五要素不一致
match image["image_type"]:
case "增值税发票":
# 请求深圳快瞳增值税发票识别接口
response = request.post(
url=(url := "https://ai.inspirvision.cn/s/api/ocr/vatInvoice"),
headers={
"X-RequestId-Header": image["image_guid"]
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
data={
"token": authenticator.get_token(
servicer="szkt"
), # 获取深圳快瞳访问令牌
"imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符
},
guid=md5((url + image["image_guid"]).encode("utf-8"))
.hexdigest()
.upper(),
)
if not (response.get("status") == 200 and response.get("code") == 0):
raise RuntimeError("请求深圳快瞳增值税发票识别接口发生异常")
match fuzzy_match(response["data"], "发票类型"):
case "电子发票(普通发票)":
receipt.update(
{
"number": fuzzy_match(response["data"], "发票号码"),
"code": fuzzy_match(response["data"], "发票代码"),
"date": datetime.strptime(
fuzzy_match(response["data"], "开票日期"),
"%Y年%m月%d",
),
"check_code": fuzzy_match(response["data"], "校验码"),
"amount": Decimal(
fuzzy_match(response["data"], "小写金额").replace(
"¥", ""
)
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"payer": fuzzy_match(response["data"], "购买方名称"),
"institution": fuzzy_match(
response["data"], "销售方名称"
),
"items": [
{
"item": name,
"quantity": Decimal(quantity).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"amount": (
Decimal(amount) + Decimal(tax)
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal保留两位小数并求和
}
for name, quantity, amount, tax in zip(
[
x["value"]
for x in response["data"]
if re.match(
r"^项目名称(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^数量(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^金额(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^税额(\d+)?$",
x["desc"],
)
],
)
],
"remarks": fuzzy_match(response["data"], "备注"),
}
)
case "增值税普通发票(卷票)":
receipt.update(
{
"number": fuzzy_match(response["data"], "发票号码"),
"code": fuzzy_match(response["data"], "发票代码"),
"date": datetime.strptime(
fuzzy_match(response["data"], "开票日期"),
"%Y-%m-%d",
),
"check_code": fuzzy_match(response["data"], "校验码"),
"amount": Decimal(
fuzzy_match(
response["data"], "合计金额(小写)"
).replace("¥", "")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"payer": fuzzy_match(response["data"], "购买方名称"),
"institution": fuzzy_match(
response["data"], "销售方名称"
),
"items": [
{
"item": name,
"quantity": Decimal(quantity).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"amount": Decimal(amount).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
}
for name, quantity, amount in zip(
[
x["value"]
for x in response["data"]
if re.match(
r"^项目名称明细(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^项目数量明细(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^项目金额明细(\d+)?$",
x["desc"],
)
],
)
],
"remarks": fuzzy_match(response["data"], "备注"),
}
)
case "医疗门诊收费票据" | "医疗住院收费票据":
# 请求深圳快瞳医疗收费票据识别接口
response = request.post(
url=(url := "https://ai.inspirvision.cn/s/api/ocr/medical"),
headers={
"X-RequestId-Header": image["image_guid"]
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
data={
"token": authenticator.get_token(
servicer="szkt"
), # 获取深圳快瞳访问令牌
"imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符
},
guid=md5((url + image["image_guid"]).encode("utf-8"))
.hexdigest()
.upper(),
)
if not (response.get("status") == 200 and response.get("code") == 0):
raise
receipt.update(
{
"number": (
receipt := (
response["data"]["insured"][
(
"receipt_hospitalization"
if image["image_type"] == "医疗门诊收费票据"
else "receipt_outpatient"
)
]
)["receipts"][0]
)["receipt_no"][
"value"
], # 默认为第一张票据
"code": receipt["global_detail"]["invoice_code"]["value"],
"date": datetime.strptime(
receipt["global_detail"]["invoice_date"]["value"],
"%Y-%m-%d",
),
"admission_date": (
datetime.strptime(receipt["starttime"]["value"], "%Y-%m-%d")
if isinstance(receipt["starttime"], dict)
else None
),
"discharge_date": (
datetime.strptime(receipt["endtime"]["value"], "%Y-%m-%d")
if isinstance(receipt["endtime"], dict)
else None
),
"check_code": fuzzy_match(
receipt["global_detail"]["region_specific"],
"校验码",
),
"amount": Decimal(receipt["total_amount"]["value"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"payer": receipt["name"]["value"],
"institution": receipt["hospital_name"]["value"],
"items": [
{
"item": (
item["item"]["value"]
if isinstance(item["item"], dict)
else None
),
"quantity": Decimal(
item["number"]["value"]
if isinstance(item["number"], dict)
else Decimal("1.00")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"amount": Decimal(
item["total_amount"]["value"]
if isinstance(item["total_amount"], dict)
else Decimal("1.00")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
}
for item in receipt["feeitems"]
if isinstance(item, dict)
],
"personal_self_payment": (
Decimal(receipt["self_cost"]["value"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
)
),
"non_medical_payment": (
Decimal(receipt["self_pay"]["value"]).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
)
),
"medical_payment": (
Decimal(
receipt["medicare_pay"]["value"]
) # 医保基金统筹支付
+ (
Decimal(receipt["addition_pay"]["value"])
if isinstance(receipt["addition_pay"], dict)
else Decimal("0.00")
) # 附加支付
+ (
Decimal(receipt["third_pay"]["value"])
if isinstance(receipt["third_pay"], dict)
else Decimal("0.00")
) # 第三方支付
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
}
)
# 根据购药及就医机构查询购药及就医机构类型
receipt["institution_type"] = masterdata.query_institution_type(
receipt["institution"]
)
# 根据影像件类型和购药及就医机构类型匹配处理方法
match (image["image_type"], receipt["institution_type"]):
case ("增值税发票", "药店"):
items = (
pandas.DataFrame(receipt["items"])
.groupby("item") # 就相同明细项合并数量和金额
.agg(quantity=("quantity", "sum"), amount=("amount", "sum"))
.loc[
lambda dataframe: dataframe["amount"] != 0
] # 仅保留金额非0的明细项
.reset_index()
.pipe(
lambda dataframe: dataframe.join(
dataframe["item"]
.apply(
parse_item
) # 根据明细项解析明细项类别和具体内容,并根据具体内容查询药品/医疗服务
.apply(
pandas.Series
) # 就明细项类别和药品/医疗服务元组展开为两列
.rename(columns={0: "category", 1: "medicine"})
)
)
.assign(
personal_self_payment=Decimal("0.00"), # 个人自费项
non_medical_payment=Decimal("0.00"), # 个人自付项
reasonable_amount=lambda dataframe: dataframe.apply(
lambda row: Decimal(
# 基于扣除明细项不合理费用决策规则评估
rules_engine.evaluate(
decision="扣除明细项不合理费用",
inputs={
"insurer_company": insurer_company,
"category": row["category"],
"medicine": row["medicine"],
"amount": row["amount"],
},
)["reasonable_amount"]
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
axis="columns",
), # 合理项
)
.assign(
total=lambda dataframe: dataframe["reasonable_amount"], # 合计
)
)
if items["amount"].sum() != receipt["amount"]:
raise RuntimeError("明细项金额合计与开票金额不一致")
receipt.update(
{
"payer": (
dossier["insured_person_layer"]["insured_person"]
if dossier["insured_person_layer"]["insured_person"]
in receipt["payer"]
else "非被保险人"
), # 出险人姓名
"accident": "药店购药", # 理赔类型
"diagnosis": "购药拟诊", # 医疗诊断
"personal_self_payment": Decimal("0.00"), # 个人自费金额
"non_medical_payment": Decimal("0.00"), # 个人自付金额
"medical_payment": Decimal("0.00"), # 医保支付金额
"unreasonable_amount": Decimal(
receipt["amount"] - items["reasonable_amount"].sum()
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 不合理金额
"reasonable_amount": Decimal(
items["reasonable_amount"].sum()
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 合理金额
"total": Decimal(items["reasonable_amount"].sum()).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 合计
"items": items.to_dict("records"),
}
)
case ("增值税发票", "私立医院"):
receipt["购药及就医类型"] = "门诊就医"
case ("医疗门诊收费票据", "公立医院"):
receipt["购药及就医类型"] = "门诊就医"
case ("医疗住院收费票据", "公立医院"):
receipt["购药及就医类型"] = "住院治疗"
case _:
raise RuntimeError(
"根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常"
)
dossier["receipts_layer"].append(receipt)
def fuzzy_match(contents: List[Dict[str, Any]], key: str) -> str:
"""
根据内容列表(基于深圳快瞳增值税发票和医疗收费票据识别结果)模糊匹配键名
:param contents: 内容列表
:param key: 键名
:return
"""
match contents[0].keys():
# 对应深圳快瞳增值税发票识别结果
case _ if "desc" in contents[0].keys():
for content in contents:
if content["desc"] == key:
return content["value"] if content["value"] else ""
candidates = []
for content in contents:
candidates.append(
(
content["value"],
fuzz.WRatio(
content["desc"], key, force_ascii=False
), # 基于加权莱文斯坦距离算法计算所有键名和指定键名的相似度
)
)
return (
(result[0] if result[0] else "")
if (result := max(candidates, key=lambda x: x[1]))[1] >= 80
else ""
) # 返回似度>=80且最大的值
# 对应深圳快瞳医疗收费票据识别结果
case _ if "name" in contents[0].keys():
for content in contents:
if content["name"] == key:
return content["word"]["value"] if content["word"]["value"] else ""
candidates = []
for content in contents:
candidates.append(
(
content["word"]["value"],
fuzz.WRatio(
content["name"], key, force_ascii=False
), # 基于加权莱文斯坦距离算法计算所有键名和指定键名的相似度
)
)
return (
(result[0] if result[0] else "")
if (result := max(candidates, key=lambda x: x[1]))[1] >= 80
else ""
) # 返回>=80且最大的相似度的值
return ""
def parse_item(item: str) -> Tuple[str, Optional[str]]:
"""
根据明细项解析明细项类别和具体内容,并根据具体内容查询药品/医疗服务
:param item: 明细项
return 明细项类别和药品/医疗服务
"""
if match := re.match(
r"^\*(?P<category>.*?)\*(?P<specific>.*)$",
item,
):
return match.group("category"), masterdata.query_medicine(
match.group("specific")
)
# 一般增值税发票明细项格式形如*{category}*{specific}其中category为明细项类别例如中成药specific为明细项具体内容例如[同仁堂]金贵肾气水蜜丸 300丸/瓶,需要据此查询药品。其它格式则将明细项内容作为明细项类别,药品为空值
else:
return item, None
def bank_card_recognize(image: Dict[str, Any], dossier: Dict[str, Any]) -> None:
"""
识别银行卡并整合至赔案档案
:param image: 影像件
:param dossier: 赔案档案
:return: 空
"""
# 请求深圳快瞳银行卡识别接口
response = request.post(
url=(url := "https://ai.inspirvision.cn/s/api/ocr/bankCard"),
headers={
"X-RequestId-Header": image["image_guid"]
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
data={
"token": authenticator.get_token(servicer="szkt"), # 获取深圳快瞳访问令牌
"imgBase64": f"data:image/{image["image_format"].lstrip(".")};base64,{image["image_base64"]}", # 影像件BASE64编码嵌入数据统一资源标识符
},
guid=md5((url + image["image_guid"]).encode("utf-8")).hexdigest().upper(),
)
if not (
response.get("status") == 200
and response.get("code") == 0
and response.get("data", {}).get("bankCardType")
== 1 # # 实际作业亦仅支持借记卡
):
raise RuntimeError("请求深圳快瞳银行卡识别接口发生异常或非借记卡")
dossier["insured_person_layer"].update(
{
"phone_number": None,
"account": None,
"account_bank": response["data"]["bankInfo"],
"account_number": response["data"]["cardNo"].replace(" ", ""),
}
)