diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..ba571ba --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/API/main.py b/API/main.py new file mode 100644 index 0000000..e03cec0 --- /dev/null +++ b/API/main.py @@ -0,0 +1,516 @@ +# -*- coding: utf-8 -*- + +"""基于FastAPI和Uvicorn的RESTfulAPI服务""" + +from contextlib import asynccontextmanager +from datetime import date, datetime +from typing import AsyncGenerator, Literal, Optional, Union +from urllib.parse import quote_plus + +import pytz +from distance import levenshtein +from fastapi import Depends, FastAPI, HTTPException, Header, status +from fastapi.exceptions import RequestValidationError +from fastapi.responses import JSONResponse +from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer +from lunarcalendar import Lunar +from pydantic import BaseModel, Field, field_validator +from sqlalchemy import create_engine, select, text +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine +from sqlalchemy.ext.automap import automap_base + +# ------------------------- +# 应用初始化配置 +# ------------------------- + +# 创建FastAPI对象 +application = FastAPI( + title="api.liubiren.cloud", + description="刘弼仁工作室的请求服务中心,如需使用请联系liubiren@qq.com", + version="0.0.1", + swagger_ui_init_oauth={ + "clientId": "api-docs-client", + "scopes": {}, + "usePkceWithAuthorizationCodeGrant": True, + }, + redoc_url=None, +) + +# 配置全局安全策略(所有请求均需认证) +application.openapi_security = [{"BearerAuth": []}] + +# ------------------------- +# 请求和响应模型 +# ------------------------- + + +class Request(BaseModel): + """统一请求模型""" + + service: Literal["divination", "query_institution", "query_drug"] = Field( + ..., + description="服务标识,数据类型为枚举,必填", + json_schema_extra={ + "枚举描述": { + "divination": "小六壬速断", + "query_institution": "根据名称精准查询医药机构信息", + } + }, + ) + data: Union["DivinationRequest", "QueryInstitutionRequest", "QueryDrugRequest"] = ( + Field( + ..., + description="请求数据模型,根据服务标识传入相应的请求数据模型", + ) + ) + + # 根据服务标识校验请求数据模型 + # noinspection PyNestedDecorators + @field_validator("data") + @classmethod + def validate_data(cls, value, values): + service = values.data.get("service") + + if service == "divination" and not isinstance(value, DivinationRequest): + raise ValueError("小六壬速断服务需要 DivinationRequest 请求数据模型") + + if service == "query_institution" and not isinstance( + value, QueryInstitutionRequest + ): + raise ValueError( + "根据名称精准查询医药机构信息服务需要 QueryInstitutionRequest 请求数据模型" + ) + + if service == "query_drug" and not isinstance(value, QueryDrugRequest): + raise ValueError( + "根据类型和名称模糊查询药品信息服务需要 QueryDrugRequest 请求数据模型" + ) + + return value + + class Config: + json_schema_extra = { + "example": { + "service": "query_institution", + "data": {"name": "浙江大学医学院附属第一医院"}, + } + } + + +class Response(BaseModel): + """统一响应模型""" + + code: int = Field( + default=0, description="错误码,0表示成功,其它表示发生错误或异常" + ) + message: str = Field( + default="成功", + description="错误描述", + ) + data: Union[ + "DivinationResponse", "QueryInstitutionResponse", "QueryDrugResponse" + ] = Field(default=None, description="响应数据模型") + + +# noinspection PyUnusedLocal +@application.exception_handler(RequestValidationError) +async def validation_exception_handler(request: Request, error: RequestValidationError): + return JSONResponse( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + content=Response( + code=422, + message="校验模型失败", + ).model_dump(), + ) + + +# noinspection PyUnusedLocal +@application.exception_handler(HTTPException) +async def http_exception_handler(request: Request, exception: HTTPException): + return JSONResponse( + status_code=exception.status_code, + content=Response( + code=exception.status_code, + message="请求发生异常", + ).model_dump(), + ) + + +# noinspection PyUnusedLocal +@application.exception_handler(Exception) +async def general_exception_handler(request: Request, exception: Exception): + return JSONResponse( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + content=Response( + code=500, + message="服务内部发生异常", + ).model_dump(), + ) + + +class DivinationRequest(BaseModel): + """小六壬速断的请求数据模型""" + + pass + + +class DivinationResponse(BaseModel): + """小六壬速断的响应数据模型""" + + fallen_palace: str = Field( + ..., description="落宫,数据类型为字符串,非空", examples=["小吉"] + ) + divination_verse: str = Field( + ..., + description="卦辞,数据类型为字符串,非空", + examples=["小吉最吉昌,路上好商量,阴人来报喜,失物在坤方"], + ) + + +class QueryInstitutionRequest(BaseModel): + """根据名称精准查询医药机构信息的请求数据模型""" + + name: str = Field( + ..., + max_length=255, + description="医药机构名称,数据类型为字符串,非空", + examples=["浙江大学医学院附属第一医院"], + ) + + # noinspection PyNestedDecorators + @field_validator("name") + @classmethod + def validate_name(cls, value: str) -> str: + """删除名称前后空格""" + return value.strip() + + +class QueryInstitutionResponse(BaseModel): + """根据名称精准查询医药机构信息的响应数据模型""" + + name: str = Field( + ..., + description="机构名称,数据类型为字符串,非空", + examples=["浙江大学医学院附属第一医院"], + ) + province: str = Field( + ..., + description="机构所在省,数据类型为字符串,非空", + examples=["浙江省"], + ) + city: str = Field( + ..., + description="机构所在地,数据类型为字符串,非空", + examples=["杭州市"], + ) + type: str = Field( + ..., + description="机构类型,数据类型为字符串,非空", + examples=["医疗机构"], + ) + incurred: str = Field( + ..., + description="是否为医保定点机构,数据类型为字符串,非空", + examples=["是"], + ) + level: Optional[str] = Field( + None, + description="机构等级,数据类型为字符串,可空", + examples=["三级甲等"], + ) + + attribute: Optional[str] = Field( + None, + description="机构属性,数据类型为字符串,可空", + examples=["公立医院、非营利性医院"], + ) + + +class QueryDrugRequest(BaseModel): + """根据类型和名称模糊查询药品信息的请求数据模型""" + + type: Literal["西药", "中草药", "中成药"] = Field( + ..., + description="药品类型,数据类型为枚举,非空", + examples=["西药"], + ) + name: str = Field( + ..., + max_length=255, + description="药品名称,数据类型为字符串,非空", + examples=["[达悦宁]盐酸二甲双胍缓释片 0.5*30"], + ) + + # noinspection PyNestedDecorators + @field_validator("name") + @classmethod + def validate_name(cls, value: str) -> str: + """删除名称前后空格""" + return value.strip() + + +class QueryDrugResponse(BaseModel): + """根据类型和名称模糊查询药品信息的响应数据模型""" + + name: str = Field( + ..., + description="药品名称,数据类型为字符串,非空", + examples=["盐酸二甲双胍缓释片"], + ) + + +# ------------------------- +# 依赖项与工具函数 +# ------------------------- + + +async def authenticate_headers( + credentials: HTTPAuthorizationCredentials = Depends(HTTPBearer()), + content_type: str = Header( + default="application/json; charset=utf-8", + description="媒体类型", + ), +) -> bool: + """校验请求头中Content-Type和Authorization Bearer token""" + + if "application/json" not in content_type: + raise HTTPException(status_code=415, detail="只接受JSON格式数据") + + if not credentials or credentials.credentials != "779E0501265CDF7B8124EB87199994B8": + raise HTTPException(status_code=403, detail="认证失败") + + return True + + +# 创建MySQL连接引擎(默认使用DATABASE数据库) +engine = create_async_engine( + url=f"mysql+asyncmy://root:{quote_plus('Te198752')}@cdb-7z9lzx4y.cd.tencentcdb.com:10039/database?charset=utf8", + pool_size=10, # 连接池常驻连接数 + max_overflow=10, # 连接池最大溢出连接数 + pool_recycle=3600, # 连接回收时间(秒) + pool_pre_ping=True, # 连接前验证有效性 +) + +# 创建ORM对象 +Base = automap_base() +Base.prepare( + autoload_with=create_engine( + f"mysql+pymysql://root:{quote_plus('Te198752')}@cdb-7z9lzx4y.cd.tencentcdb.com:10039/database?charset=utf8" + ) +) + + +# 初始化MySQL会话工厂 +AsyncSessionLocal = async_sessionmaker(bind=engine, expire_on_commit=False) + + +@asynccontextmanager +async def create_session() -> AsyncGenerator[AsyncSession, None]: + """数据库会话上下文管理器""" + async with AsyncSessionLocal() as session: + try: + yield session + await session.commit() + except: + await session.rollback() + raise + finally: + await session.close() + + +# ------------------------- +# 服务路由 +# ------------------------- + + +@application.post( + path="/", + dependencies=[Depends(authenticate_headers)], + response_model=Response, + response_description="响应成功", + responses={ + 200: { + "model": Response, + "content": { + "application/json": { + "example": { + "code": 200, + "message": "医药机构信息不存在", + "data": None, + } + } + }, + "description": "不存在", + }, + 422: { + "model": Response, + "content": { + "application/json": { + "example": {"code": 422, "message": "校验模型失败", "data": None} + } + }, + "description": "校验模型失败", + }, + 500: { + "model": Response, + "content": { + "application/json": { + "example": { + "code": 500, + "message": "服务内部发生异常", + "data": None, + } + } + }, + "description": "服务内部发生异常", + }, + }, + name="服务中心", + description="所有请求均由本中心提供响应服务", +) +async def service( + request: Request, +) -> Response: + + # 根据服务标识匹配服务 + # noinspection PyUnreachableCode + match request.service: + case "divination": + return await divination() + case "query_institution": + return await query_institution(request) + case "query_drug": + return await query_drug(request) + case _: + return Response(code=400, message="无效的服务标识") + + +async def divination() -> Response: + """小六壬速断""" + + # 起算日期时间 + starting = datetime.now(tz=pytz.timezone("Asia/Shanghai")) + # 起算日期转为农历 + lunar = Lunar.from_date(date(starting.year, starting.month, starting.day)) + + # 根据农历月日和时辰匹配落宫和卦辞 + divination = [ + { + "fallen_palace": "空亡", + "divination_verse": "空亡事不详,阴人少乖张,求财无利益,行人有灾殃", + }, + { + "fallen_palace": "大安", + "divination_verse": "大安事事昌,求财在坤方,失物去不远,宅舍保安康", + }, + { + "fallen_palace": "留连", + "divination_verse": "留连事难成,求谋日未明,官事只宜缓,去者未回程", + }, + { + "fallen_palace": "速喜", + "divination_verse": "速喜喜来临,求财向南行,失物申午未,寻人路上寻", + }, + { + "fallen_palace": "赤口", + "divination_verse": "赤口主口舌,是非要紧防,失物速速讨,行人有惊慌", + }, + { + "fallen_palace": "小吉", + "divination_verse": "小吉最吉昌,路上好商量,阴人来报喜,失物在坤方", + }, + ][ + (lunar.month + lunar.day + ((starting.hour + 3) // 2) % 12 + 4) % 6 + ] # 需先将24制小时转为时辰,再根据月、日和时辰数落宫 + + return Response( + data=DivinationResponse( + fallen_palace=divination["fallen_palace"], + divination_verse=divination["divination_verse"], + ) + ) + + +async def query_institution(request: Request) -> Response: + """根据名称精准查询医药机构信息""" + + async with create_session() as session: + # noinspection PyTypeChecker + institution = ( + await session.execute( + select(Base.classes.institution) + .join(Base.classes.institution_alias) + .where(Base.classes.institution_alias.name == request.data.name) + ) + ).scalar_one_or_none() + if institution is None: + return Response(code=204, message="医药机构信息不存在") + + return Response( + data=QueryInstitutionResponse( + name=institution.name, + province=institution.province, + city=institution.city, + type=institution.type, + incurred=institution.incurred, + level=institution.level, + attribute=institution.attribute, + ) + ) + + +async def query_drug(request: Request) -> Response: + """根据类型和名称模型查询药品信息""" + + async with create_session() as session: + # 基于MySQL全文检索能力,召回与检索词高度相关的药品名称 + drugs_name = ( + ( + await session.execute( + text( + """ + SELECT name + FROM drug + WHERE MATCH(name) AGAINST(:name IN NATURAL LANGUAGE MODE) AND type = :type + ORDER BY + (name = :name) DESC, + MATCH(name) AGAINST(:name) DESC, + LENGTH(name) ASC + LIMIT 10 + """ + ).bindparams(type=request.data.type, name=request.data.name) + ) + ) + .scalars() + .all() + ) + + result = None + for drug_name in drugs_name: + # 若检索词包含药品名称则以此作为结果 + if drug_name in request.data.name: + result = drug_name + break + + # 若 + if result is None: + + round((1 - levenshtein(string1, string2) / (len(string1) + len(string2))) * 100) + + print(drugs_name) + + if not drugs_name: + return Response(code=204, message="药品信息不存在") + + return Response( + data=QueryDrugResponse( + name=drugs_name[0], + ) + ) + + +""" + + + +""" diff --git a/regions/main.py b/regions/main.py new file mode 100644 index 0000000..a6fed81 --- /dev/null +++ b/regions/main.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- + +''' + +脚本说明: + +根据行政区划数据就待转化数据集进行转化 + +备注: + +行政区划数据集来源于 https://lbsyun.baidu.com/faq/api?title=webapi/download + +''' + +import re + +import json + +import numpy + +import pandas + +import time + +import os + +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) + +from utils.pandas_extension import SaveAsExcel + +print('1、读取Excel并创建数据集...', end = '') + +try: + + #待转化数据集 + dataset = pandas.read_excel(io = 'dataset.xlsx', sheet_name = 'Sheet1') + + #行政区划数据集 + dataset_regions = pandas.read_excel(io = 'dataset.xlsx', sheet_name = 'Sheet2') + +except: + + print('读取Excel或创建数据集发生异常,脚本终止') + print() + + exit() + +print('已完成') +print() + +print('2、转化数据') +print() + +print('2.1 基于行政区划数据集生成省级、地级和县级行政区字典', end = '') + +#县级名称和县级编码 +regions = dataset_regions[['省级名称', '省级编码', '地级名称', '地级编码', '县级名称', '县级编码']].drop_duplicates() + +print('已完成') +print() + +print('2.2 遍历并转化数据...', end = '') + +dataset.replace(to_replace = {numpy.nan: pandas.NA, None: pandas.NA, '': pandas.NA}, inplace = True) + +for index, row in dataset.iterrows(): + + province_name = row['省名称'] + + if province_name is not pandas.NA: + + try: + + #根据省名称匹配省级行政区字典并填充省区划编码 + row['省区划编码'] = str(dataset_regions.loc[dataset_regions['省级名称'] == province_name, '省级编码'].iat[0]) + + except: + + row['省区划编码'] = '未查询到省区划编码' + + row['与区划编码对比结果'] = '待确定' + + #省编码数据类型转为字符 + row['省编码'] = str(int(row['省编码'])) + + else: + + row['省区划编码'] = '省名称为空' + + row['省编码'] = '省名称为空' + + row['省名称'] = '省名称为空' + + row['与区划编码对比结果'] = '待确定' + + city_name = row['市名称'] + + if row['区划类型'] != '省': + + if city_name is not pandas.NA: + + try: + + #根据市名称匹配地级行政区字典并填充市区划编码 + row['市区划编码'] = str(dataset_regions.loc[dataset_regions['地级名称'] == city_name, '地级编码'].iat[0]) + + except: + + row['市区划编码'] = '未查询到市区划编码' + + row['与区划编码对比结果'] = '待确定' + + #市编码数据类型转为字符 + row['市编码'] = str(int(row['市编码'])) + + else: + + row['市区划编码'] = '市名称为空' + + row['市编码'] = '市名称为空' + + row['市名称'] = '市名称为空' + + row['与区划编码对比结果'] = '待确定' + + else: + + row['市区划编码'] = '' + + row['市编码'] = '' + + row['市名称'] = '' + + region_name = row['区县名称'] + + if row['区划类型'] == '区县': + + if region_name is not pandas.NA: + + try: + + #根据区县名称匹配县级行政区字典并填充区县区划编码 + row['区县区划编码'] = str(regions.loc[regions['县级名称'] == region_name, '县级编码'].iat[0]) + + if row['省名称'] == '省名称为空' or row['市名称'] == '市名称为空': + + #若省名称或市名称为空则补充说明 + row['与区划编码对比说明'] = '该区县所属{}/{}'.format(str(regions.loc[regions['县级名称'] == region_name, '省级名称'].iat[0]), str(regions.loc[regions['县级名称'] == region_name, '地级名称'].iat[0])) + + except: + + row['区县区划编码'] = '未查询到区县区划编码' + + row['与区划编码对比结果'] = '待确定' + + #县编码数据类型转为字符 + row['区县编码'] = str(int(row['区县编码'])) + + else: + + row['区县区划编码'] = '区县名称为空' + + row['区县编码'] = '区县名称为空' + + row['区县名称'] = '区县名称为空' + + row['与区划编码对比结果'] = '待确定' + + else: + + row['区县区划编码'] = '' + + row['区县编码'] = '' + + row['区县名称'] = '' + + dataset.iloc[index] = row + +dataset.fillna(value = '', inplace = True) + +print('已完成') +print() + +print('正在保存为EXCEL...', end = '') + +SaveAsExcel(worksheets = [('Sheet1', dataset)], save_path = 'results.xlsx') + +print('已完成') +print() + +''' + +修改记录 + +''' \ No newline at end of file diff --git a/reports/scorecard_report/model_evaluation.html b/reports/scorecard_report/model_evaluation.html new file mode 100644 index 0000000..ef7506f --- /dev/null +++ b/reports/scorecard_report/model_evaluation.html @@ -0,0 +1,466 @@ + + + + + Awesome-pyecharts + + + + +
+ + + diff --git a/rfm/main.py b/rfm/main.py new file mode 100644 index 0000000..cbd2bca --- /dev/null +++ b/rfm/main.py @@ -0,0 +1,398 @@ +# -*- coding: utf-8 -*- + +if __name__ == "__main__": + + """ + 基于RFM模型生成数据分析报告 + """ + + # 导入模块 + + import pandas + + from datetime import datetime + + from decimal import Decimal, ROUND_HALF_UP + + import statistics + + from jinja2 import Environment, FileSystemLoader + + from utils.client import MySQLClient + + from utils.pandas_extension import DrawAsHTML + + # 函数说明:根据RFM编码映射为客户分类 + def map_classification(r_encoded, f_encoded, m_encoded): + + # 就R、F、M指标构建独热编码并匹配客户分类 + match f"{r_encoded}{f_encoded}{m_encoded}": + + case "000": + + classification = "流失客户" + + case "010": + + classification = "一般维持客户" + + case "100": + + classification = "新客户" + + case "110": + + classification = "潜力客户" + + case "001": + + classification = "重要挽留客户" + + case "101": + + classification = "重要深耕客户" + + case "011": + + classification = "重要唤回客户" + + case "111": + + classification = "重要价值客户" + + # noinspection PyUnboundLocalVariable + return classification + + print("1 加载数据集...", end="") + + client = MySQLClient(database="data_analysis") + + dataframe = client.execute_query( + sql="select 客户ID, 交易金额, 交易日期 from rfm_dataset" + ) # customer_id 客户ID STRING,trade_date 交易日期 DATETIME.DATE,trade_amount 交易金额 DECIMAL + + print("已完成") + + print("2 预处理,删除包含缺失值的样本和重复样本...", end="") + + # 删除包含缺失值的样本 + dataframe.dropna(inplace=True) + + # 删除重复样本(保留第一例重复样本、重置索引) + dataframe = dataframe.drop_duplicates(ignore_index=True, inplace=False) + + # 仅保留交易日期为2012和2013年的样本 + dataframe = dataframe[ + dataframe["交易日期"].apply(lambda x: x.year in [2012, 2013]) + ].reset_index( + drop=True + ) # 因交易日期数据类型为DATETIME.DATE非DATETIME64,故无法使用SERIES.DT.YEAR方法 + + sample_size = Decimal(dataframe.shape[0]).quantize(Decimal("0")) + + print("已完成") + + print("3 构建RFM...", end="") + + # 最远交易日期 + min_trade_date = dataframe["交易日期"].min() + + # R为最近一次交易日期距离样本中最远一次交易日期的天数(单位:日),DECIMAL;F为交易频率(单位;次),DECIMAL;M为交易金额(单位:元),DECIMAL。均正向化 + rfm = ( + dataframe.groupby(by="客户ID") + .agg( + R=( + "交易日期", + lambda x: Decimal((x.max() - min_trade_date).days).quantize( + Decimal("0"), rounding=ROUND_HALF_UP + ), + ), + F=( + "客户ID", + lambda x: Decimal(len(x)).quantize( + Decimal("0"), rounding=ROUND_HALF_UP + ), + ), + M=( + "交易金额", + lambda x: sum(x, Decimal("0")).quantize( + Decimal("0.00"), rounding=ROUND_HALF_UP + ), # 求和时指定初始值为DECIMAL("0") + ), + ) + .reset_index() + ) + + # 客户数 + customer_counts = Decimal(rfm.shape[0]).quantize(Decimal("0")) + + # 总交易金额 + trade_amounts = sum(rfm["M"], Decimal("0.00")).quantize(Decimal("0.00")) + + print("已完成") + + print("4 基于平均数将R、F和M分为低、高两个等级并组合为八种客户分类...", end="") + + # R、F和M的平均数,使用STATISTICS.MEAN统计平均值,保证精度 + # noinspection PyUnresolvedReferences + means = { + "R": statistics.mean(rfm["R"]).quantize( + Decimal("0.00"), rounding=ROUND_HALF_UP + ), + "F": statistics.mean(rfm["F"]).quantize( + Decimal("0.00"), rounding=ROUND_HALF_UP + ), + "M": statistics.mean(rfm["M"]).quantize( + Decimal("0.00"), rounding=ROUND_HALF_UP + ), + } + + rfm = rfm.assign( + 客户分类=lambda dataframe: dataframe.apply( + lambda row: map_classification( + r_encoded=0 if row["R"] <= means["R"] else 1, + f_encoded=0 if row["F"] <= means["F"] else 1, + m_encoded=0 if row["M"] <= means["M"] else 1, + ), + axis="columns", + ) + ) + + dataframe = dataframe.merge( + right=rfm[["客户ID", "客户分类"]], on="客户ID", how="left" + ) + + print("已完成") + + print("5 生成分析报告...", end="") + + draw = DrawAsHTML() + + # 生成数据预览 + draw.table( + dataframe=dataframe.sample(5), + file_name="数据预览.html", + ) + + # 客户分类维度 + customer_types = ( + rfm.groupby(by="客户分类") # 按照客户分类分组 + .agg( + R=( + "R", + lambda x: statistics.mean(x).quantize( + Decimal("0.00"), rounding=ROUND_HALF_UP + ), + ), # R平均值 + F=( + "F", + lambda x: statistics.mean(x).quantize( + Decimal("0.00"), rounding=ROUND_HALF_UP + ), + ), # F平均值 + M=( + "M", + lambda x: statistics.mean(x).quantize( + Decimal("0.00"), rounding=ROUND_HALF_UP + ), + ), # M平均值 + 客户占比=( + "客户分类", + lambda x: (Decimal(len(x)) / customer_counts * Decimal("100")).quantize( + Decimal("0.00"), rounding=ROUND_HALF_UP + ), + ), # 统计各客户分类的客户占比 + 交易金额占比=( + "M", + lambda x: Decimal( + sum(x, Decimal("0.00")) / trade_amounts * Decimal("100") + ).quantize(Decimal("0.00"), rounding=ROUND_HALF_UP), + ), # 统计各客户分类的交易金额占比 + ) + .reset_index() + ) + + # 生成客户分类分布 + draw.scatter( + dataframe=customer_types[["客户分类", "R", "F", "M"]], + xaxis_opts_min=475, + xaxis_opts_max=750, + file_name="客户分类分布.html", + ) + + # 生成客户占比 + draw.pie( + dataframe=customer_types[["客户分类", "客户占比"]].sort_values( + by="客户占比", ascending=False + ), # 按照客户占比降序 + file_name="客户占比.html", + ) + + # 生成交易金额占比 + draw.pie( + dataframe=customer_types[["客户分类", "交易金额占比"]].sort_values( + by="交易金额占比", ascending=False + ), # 按照交易金额占比降序 + file_name="交易金额占比.html", + ) + + report_backward = pandas.DataFrame( + data=[], columns=["客户分类", "窗口期", "客户数"] + ) + + for customer_type in customer_types["客户分类"]: + + for month in range(1, 13): + + # 窗口期,从2013-01至2013-12 + period = f"2013-{month:02d}" + + # 窗口期起期(向前滑动十二个月,包括当月) + period_start = ( + pandas.Period(value=f"2013-{month:02d}", freq="M") - 11 + ).start_time.date() + + # 窗口期止期 + period_end = pandas.Period( + value=f"2013-{month:02d}", freq="M" + ).end_time.date() + + # 指定客户分类窗口期内客户数 + customer_counts = dataframe.loc[ + (dataframe["客户分类"] == customer_type) + & (dataframe["交易日期"] >= period_start) + & (dataframe["交易日期"] <= period_end), + "客户ID", + ].nunique() + + report_backward.loc[report_backward.shape[0]] = [ + customer_type, + period, + customer_counts, + ] + + # 生成近十二个自然月客户数趋势 + draw.area( + dataframe=report_backward.groupby(by="窗口期", as_index=False).agg( + 客户数=("客户数", "sum") + ), + file_name="近十二个自然月客户数趋势.html", + yaxis_opts_min=1350, + ) + + report_backward = report_backward.loc[ + report_backward["客户分类"].isin( + ["新客户", "流失客户", "重要价值客户"] + ) # 仅考虑新客户、流水客户、重要价值客户 + ].assign( + 总客户数=lambda x: x.groupby(by="窗口期")["客户数"].transform( + "sum" + ), # 统计窗口期总客户数并新增值各行 + 客户占比=lambda x: x.apply( + lambda y: ( + Decimal(y["客户数"]) / Decimal(y["总客户数"]) * Decimal("100") + ).quantize( + Decimal("0.00"), rounding=ROUND_HALF_UP + ), # 运算各项使用DECIMAL以控制精度 + axis="columns", + ), + ) + + # 生成近十二个自然月客户占比趋势(仅考虑新客户、流失客户和重要价值客户) + draw.bar( + dataframe=report_backward[ + ["客户分类", "窗口期", "客户占比"] + ], # 仅保留客户分类、窗口期和占比 + file_name="近十二个自然月客户占比趋势.html", + stack=True, + ) + + report_forward = ( + dataframe.assign( + 最早交易日期=lambda dataframe: dataframe.groupby(by="客户ID")[ + "交易日期" + ].transform("min"), + ) # 统计每位客户最早交易日期 + .assign( + 周期=lambda dataframe: ( + dataframe["交易日期"].apply(lambda x: x.year) + - dataframe["最早交易日期"].apply(lambda x: x.year) + ) + * 12 + + ( + dataframe["交易日期"].apply(lambda x: x.month) + - dataframe["最早交易日期"].apply(lambda x: x.month) + ) + ) # 每笔交易的交易日期和对客户最早交易日期的间隔作为周期,单位为月 + .assign( + 周期=lambda dataframe: dataframe["周期"].apply(lambda x: f"M+{x:02d}") + ) # 格式化周期 + .assign( + 群组=lambda dataframe: dataframe["最早交易日期"].apply( + lambda x: f"{x.year}-{x.month:02d}" + ) + ) # 截取最早交易日期的年月作为群组 + .groupby(by=["客户分类", "群组", "周期"], as_index=False) + .agg(客户数=("客户ID", "nunique")) # COHORT-ANALYSIS,群组-周期矩阵 + .groupby(by=["客户分类", "周期"], as_index=False) + .agg(客户数=("客户数", "sum")) # 统计各客户分类各周期用户数 + .assign( + 基准客户数=lambda dataframe: dataframe.groupby(by=["客户分类"])[ + "客户数" + ].transform("first") + ) + .assign( + 留存率=lambda dataframe: dataframe.apply( + lambda x: ( + Decimal(x["客户数"]) / Decimal(x["基准客户数"]) * Decimal("100") + ).quantize(Decimal("0.00"), rounding=ROUND_HALF_UP), + axis="columns", + ) + ) + .loc[ + lambda dataframe: dataframe["客户分类"].isin( + ["新客户", "流失客户", "重要价值客户"] + ) + & dataframe["周期"].isin( + [ + "M+01", + "M+02", + "M+03", + "M+04", + "M+05", + "M+06", + "M+07", + "M+08", + "M+09", + "M+10", + "M+11", + "M+12", + ] + ) + ] + .reset_index(drop=True) + ) + + # 生成近十二个自然月留存率趋势 + draw.line( + dataframe=report_forward[["客户分类", "周期", "留存率"]], + file_name="近十二个自然月留存率趋势.html", + ) + + # 获取报告模版 + template = Environment(loader=FileSystemLoader(".")).get_template("template.html") + + # 渲染模版 + rfm_report = template.render( + { + # 报告日期 + "report_date": datetime.now().strftime("%Y-%m-%d"), + "sample_size": sample_size, + } + ) + + with open("rfm_report.html", "w", encoding="utf8") as file: + + file.write(rfm_report) + + print("已完成") diff --git a/utils/logger.py b/utils/logger.py new file mode 100644 index 0000000..4ec7655 --- /dev/null +++ b/utils/logger.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- + +""" + +基于LOGGING封装日志记录器 + +""" + +# 加载模块 + +import logging + +from logging.handlers import RotatingFileHandler + +""" + +类说明:封装日志记录器 + +""" + + +class Logger(object): + + def __init__(self, log_name: str): + + # 日志文件名称 + self.log_name = f"{log_name}.log" + + # 创建日志记录器 + self.logger = logging.getLogger(self.log_name) + self.logger.setLevel(logging.INFO) + + # 设置日志信息格式 + self.formatter = logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%y-%m-%d %H:%M:%S" + ) + + # 控制台输出 + self.stream_handle = logging.StreamHandler() + self.stream_handle.setLevel("INFO") + self.stream_handle.setFormatter(self.formatter) + + # 文件输出 + self.file_handle = RotatingFileHandler( + filename=self.log_name, maxBytes=5 * 1024 * 1024, encoding="utf-8" + ) + self.file_handle.setLevel("INFO") + self.file_handle.setFormatter(self.formatter) + + # 添加控制台和文件日志记录 + if not self.logger.handlers: + self.logger.addHandler(self.stream_handle) + self.logger.addHandler(self.file_handle) + + def log(self, message): + + return self.logger.info(message) diff --git a/推荐系统/main.py b/推荐系统/main.py new file mode 100644 index 0000000..5a3badd --- /dev/null +++ b/推荐系统/main.py @@ -0,0 +1,986 @@ +# -*- coding: utf-8 -*- +''' +脚本说明: +推荐系统,召回阶段使用基于用户、物品协同过滤算法生成候选物品列表,精排阶段使用时序建模和多头注意力机制进一步精排最终形成推荐物品列表 +''' +# 导入模块 +from pydantic import BaseModel, Field, model_validator + +from typing import Literal + +from itertools import islice + +from operator import itemgetter + +import time + +import numpy + +from collections import deque, defaultdict + +import heapq + +from pyLSHash import LSHash + +''' + +编码区域 + +''' + + +# 数据模型:初始化参数 +class InitializationArguments(BaseModel): + + # 时间窗口(单位为天) + time_window: int = Field(default=30, ge=5, le=360) + + # 衰减兰布达因子 + decay_lambda: float = Field(default=0, ge=0.00, le=10) + + # 用户特征向量维度数 + attributes_dimensions: int = Field(default=10, ge=2.00, le=200) + + +# 数据模型:用户属性 +class Attributes(BaseModel): + + # 年龄,数据类型为整数 + age: int = Field(default=..., ge=1, le=99) + + # 性别 + gender: Literal['male', 'female'] = Field(default=...) + + # 市 + city: str = Field(default=...) + + # 职业 + occupation: str = Field(default=...) + + +# 数据模型:设置记录 +class SettingRecord(BaseModel): + + # 用户,数据类型为字符,无默认值,必填,六位 + user: str = Field(default=..., min_length=6, max_length=6) + + # 用户属性: + attributes: Attributes = Field(default=...) + + +# 数据模型:用户行为记录 +class BehaviorRecord(BaseModel): + + # 用户标识,数据类型为字符,无默认值,必填,六位 + user: str = Field(default=..., min_length=6, max_length=6) + + # 时间戳,数据类型为时间戳,必填 + timestamp: int = Field(default=...) + + # 行为类型,数据类型为列表,指定范围 + type: Literal['rating'] = Field(default=...) + + # 物品标识,数据类型为字符,无默认值,必填,六位 + item: str = Field(default=..., min_length=6, max_length=6) + + # 评分,数据类型为整数或空,默认为空,大于等于1,小于等于5 + rating: int | None = Field(default=None, ge=1, le=5) + + # 校验规则:若行为类型为评分则评分非空 + @model_validator(mode='after') + def _validate_rating(self): + + if self.type == 'rating' and not self.rating: + + raise ValueError( + 'the rating parameter is required when type=rating') + + return self + + +# 原型级推荐系统 +class PrototypeRecommender: + + def __init__(self, **arguments): + + # 行为参数(不同行为赋予不同的的基础兴趣分数和衰减因子权重。其中,显式行为兴趣分数根据评分转化获得) + self.behavior_arguments = {'rating': (None, 1)} + + # 校验初始化参数数据模型并转为初始参数字典 + arguments = InitializationArguments(**arguments).model_dump() + + # 时间窗口,单位为天(将连续数据流切割为有限片段,平衡推荐系统实时性和运算效率) + self.time_window = arguments.get('time_window') + + # 衰减因子兰布达系数(时间窗口内兴趣分数随时间衰减程度) + self.decay_lambda = arguments.get('decay_lambda') + + # 用户特征向量维度数 + self.attributes_dimensions = arguments.get('attributes_dimensions') + + # 用户行为数据体(基于物品协同过滤的核心数据体) + self.behaviors = defaultdict(lambda: { + + # 兴趣分数列表(时间窗口内由物品标识和兴趣分数组成的字典),为最新兴趣分数,形如{'物品标识': '兴趣分数'} + 'scores': defaultdict(float), + + # 时间戳堆(时间窗口内由时间戳和物品标识组成的元组),例如('时间戳', '物品标识') + 'timestamps_heap': [], + + # 历史物品标识列表(默认最多保存200例历史物品标识,平衡推荐系统实时性和运算效率),形如'物品标识' + 'items_history': deque(maxlen=200) + + }) + + # 在计算物品标识-物品标识余弦相似度时可分解为分子部分和分母平方部分并在新增/更新用户行为时增量更新,以优化运算效率 + + # 计算物品标识-物品标识余弦相似度时分子部分 + self.items_similarity_numerator = defaultdict(float) + + # 计算物品标识-物品标识余弦相似度时分母平方部分 + self.items_similarity_denominator_square = defaultdict(float) + + # 物品标识倒排表,形如{'物品标识': ['用户标识']} + self.items_inversion = defaultdict(list) + + # 用户特征向量数据体(基于用户协同过滤的核心数据体,用户属性以独热编码方式保存) + self.attributes = defaultdict( + lambda: numpy.zeros(self.attributes_dimensions)) + + # 基于LSHash作为用户特征向量索引器(默认哈希值的二进制位数为8,哈希表数为2,哈希矩阵持久化路径) + self.attributes_indexer = LSHash( + hash_size=8, input_dim=self.attributes_dimensions, num_hashtables=2) + + # 处理用户属性记录 + def process_attribute_record(self, attribute_record: dict): + + # 校验设置记录数据模型 + attribute_record = SettingRecord(**attribute_record).model_dump() + + user = attribute_record.get('user') + + for key, value in attribute_record.get('attributes').items(): + + # 若用户属性值非空 + if value: + + # 更新用户属性 + self.behaviors[user]['attributes'][key] = value + + return True + + # 处理用户行为记录 + def process_behavior_record(self, behavior_record: dict): + + # 校验行为记录数据模型,数据类型转为字典 + behavior_record = BehaviorRecord(**behavior_record).model_dump() + + # 用户标识 + user = behavior_record['user'] + + # 时间戳 + timestamp = behavior_record['timestamp'] + + # 行为类型 + type = behavior_record['type'] + + # 物品标识 + item = behavior_record['item'] + + # 评分,若行为类型为评分则评分必填,评分数据类型为整数,指定范围1~5 + rating = behavior_record['rating'] + + # 整理用户行为数据 + self._reorganize_behaviors(user=user) + + # 原兴趣分数 + score_past = self.behaviors[user]['scores'][item] + + # 现兴趣分数 + score = self._calculate_score( + timestamp=timestamp, type=type, rating=rating) + + # 若现兴趣分数大于原兴趣分数 + if score > score_past: + + # 更新兴趣分数列表 + self.behaviors[user]['scores'][item] = score + + # 更新时间戳堆 + heapq.heappush(self.behaviors[user] + ['timestamps_heap'], (timestamp, item)) + + # 更新历史物品标识列表 + self.behaviors[user]['items_history'].append(item) + + # 更新计算物品标识-物品标识余弦相似度的分子和分子平方部分 + self._update_items_similarity_components( + user=user, item=item, score_old=score_past, score_new=score) + + # 若用户标识在物品标识倒排表中索引为物品标识的用户标识列表 + if user not in self.items_inversion[item]: + + # 更新倒排表 + self.items_inversion[item].append(user) + + # 整理用户行为数据(基于时间窗口清理过期数据) + def _reorganize_behaviors(self, user): + + # 时间戳堆 + timestamps_heap = self.behaviors[user]['timestamps_heap'] + + # 若时间戳堆非空列表且现在距时间戳堆中第一元组的时间戳的时距大于时间窗口 + while timestamps_heap and (time.time() - timestamps_heap[0][0]) / 86400 > self.time_window: + + # 删除时间戳堆中第一元组并获取过期时间戳和过期物品标识 + timestamp_expired, item_expired = heapq.heappop(timestamps_heap) + + # 若过期物品标识的兴趣分数非空 + if self.behaviors[user]['scores'][item_expired]: + + # 获取过期物品标识的兴趣分数 + score_expired = self.behaviors[user]['scores'][item_expired] + + # 在兴趣分数列表删除索引为过期物品标识的项 + del self.behaviors[user]['scores'][item_expired] + + # 若过期物品标识在历史物品标识列表 + if item_expired in self.behaviors[user]['items_history']: + + # 在历史物品标识列表删除过期物品标识 + self.behaviors[user]['items_history'].remove(item_expired) + + # 更新更新计算物品标识-物品标识余弦相似度的分子和分子平方部分 + self._update_items_similarity_components( + + user=user, item=item_expired, score_old=score_expired, score_new=0) + + # 若用户标识在物品标识倒排表索引为过期物品标识的用户标识列表 + if user in self.items_inversion[item_expired]: + + # 在用户标识列表删除用户标识 + self.items_inversion[item_expired].remove(user) + + # 若物品标识倒排表中用户标识列表为空列表 + if not self.items_inversion[item_expired]: + + # 在物品标识倒排表删除索引为物品标识的项 + del self.items_inversion[item_expired] + + # 计算兴趣分数 + def _calculate_score(self, timestamp, type, rating): + + # 在时间窗口内,兴趣分数随时间以指数函数衰减 + + # 时距,单位为天 + time_interval = (time.time() - timestamp) / 86400 + + # 若时距大于时间窗口 + if time_interval > self.time_window: + + return 0 + + # 基础兴趣分数,衰减因子权重 + score_base, weight = self.behavior_arguments.get(type) + + # 若行为类型为评分 + if type == 'rating': + + # 基础兴趣分数经非线性转化为0.2至0.8 + score_base = 0.2 + 0.6 * (1 / (1 + numpy.exp(3 - rating))) + + # 加权后衰减因子兰布达系数 + decay_lambda_weighted = self.decay_lambda * weight + + # 基于指数函数计算兴趣评分 + score = score_base * \ + numpy.exp(0 - decay_lambda_weighted * time_interval) + + return score + + # 更新计算物品标识-物品标识余弦相似度的分子和分子平方部分 + def _update_items_similarity_components(self, user, item, score_old, score_new): + + for item_history in self.behaviors[user]['items_history']: + + if item_history != item: + + # 构建物品标识-物品标识的有序物品标识对 + pair = tuple(sorted((item_history, item))) + + self.items_similarity_numerator[pair] += ( + score_new - score_old) * self.behaviors[user]['scores'][item_history] + + self.items_similarity_denominator_square[item] += score_new**2 - score_old**2 + + # 生成推荐列表 + def generate_recommendations(self, user, k=10): + + # 推荐物品标识列表 + items_candidates = defaultdict(float) + + # 整理用户行为数据 + self._reorganize_behaviors(user=user) + + # 最大候选数 + maximum_candidates = 20 * k + + behaviors = self.behaviors[user] + + # 历史物品标识列表 + items_history = behaviors['items_history'] + + # 基于物品协同过滤算法生成的候选物品兴趣分数权重 + alpha_weight = 0.2 / \ + (1 + numpy.exp(0.05 * len(items_history) - 1.2)) + 0.65 + + # 基于物品协同过滤算法生成候选物品标识列表 + candidates_items = self._generate_items_candidates( + user=user, maximum_candidates=maximum_candidates) + + # 基于用户协同过滤算法生成候选物品标识列表 + candidates_users = self._generate_users_candidates( + user=user, maximum_candidates=maximum_candidates) + + # 合并基于物品协同过滤算法生成的候选物品标识列表和基于用户协同过滤算法生成候的选物品标识列表 + for item_candidate in candidates_items.keys() | candidates_users.keys(): + + items_candidates[item_candidate] = candidates_items[item_candidate] * \ + alpha_weight + \ + candidates_users[item_candidate] * (1 - alpha_weight) + + return dict(islice(sorted(items_candidates.items(), key=itemgetter(1), reverse=True), k)) + + # 基于物品协同过滤算法生成候选物品标识列表 + def _generate_items_candidates(self, user, maximum_candidates): + + # 召回物品标识列表 + items_recall = defaultdict(lambda: {'counts': 0, 'scores': 0}) + + behaviors = self.behaviors[user] + + # 历史物品标识列表作为启发物品标识列表 + items_heuristic = behaviors['items_history'] + + # 先通过启发式物品标识在物品标识倒排表查询索引为启发式物品标识的启发式用户标识,再通过启发式用户标识查询历史物品标识列表作为候选物品标识 + for item_heuristic in items_heuristic: + + for user_heuristic in self.items_inversion[item_heuristic]: + + # 若通过启发式物品标识在物品标识倒排表查询索引为启发式物品标识的启发式用户标识和用户标识不一致 + if user_heuristic != user: + + for item_recall in self.behaviors[user_heuristic]['items_history']: + + # 若召回物品标识不在启发物品标识列表 + if item_recall not in items_heuristic: + + items_recall[item_recall]['counts'] += 1 + + # 遍历启发式物品标识列表和召回物品标识列表(召回物品标识列表不可能包含启发式物品标识),计算余弦相似度 + for item_heuristic in items_heuristic: + + # 在物品标识倒排表查询索引为启发式物品标识的用户标识列表 + users_heuristic = self.items_inversion[item_heuristic] + + for item_recall in items_recall: + + # 在物品标识倒排表查询索引为召回物品标识的用户标识列表 + users_recall = self.items_inversion[item_recall] + + # 计算物品标识-物品标识余弦相似度时分母部分 + items_similarity_denominator = numpy.sqrt( + self.items_similarity_denominator_square[item_heuristic] * self.items_similarity_denominator_square[item_recall]) + + # 计算物品标识-物品标识余弦相似度时分母部分不为0 + if items_similarity_denominator != 0: + + # 构建物品标识-物品标识的有序物品标识对 + pair = tuple(sorted((item_heuristic, item_recall))) + + # 余弦相似度 + similarity = ( + self.items_similarity_numerator[pair] / items_similarity_denominator) + + else: + + similarity = 0 + + # 在物品标识倒排表查询索引为历史物品标识的用户标识列表和在物品标识倒排表查询索引为召回物品标识的用户标识列表共同用户标识列表 + users_common = list(set(users_heuristic) & set(users_recall)) + + # 抑制流行物品因子 + popularity_suppressed = len( + users_common) / numpy.sqrt(len(users_heuristic) * len(users_recall)) + + items_recall[item_recall]['scores'] += behaviors['scores'][item_heuristic] * \ + similarity * popularity_suppressed + + # 归一化候选物品标识列表 + candidates = self._normalize_candidates( + items_recall=items_recall, maximum_candidates=maximum_candidates) + + return candidates + + # 基于用户协同过滤算法生成候选物品标识列表 + def _generate_users_candidates(self, user, maximum_candidates): + + # 召回物品标识列表 + items_recall = defaultdict(lambda: {'counts': 0, 'scores': 0}) + + attributes = self.attributes[user] + + # 若用户特征向量非初始化特征向量 + if numpy.all(attributes != 0): + + # 基于LSHash查询与用户特征向量相似的用户标识作为召回用户标识 + for _, similarity, user_recall in self.attributes_indexer.query(query_vector=attributes, num_results=maximum_candidates, dist_func='cosine'): + + behaviors_recall = self.behaviors[user_recall] + + # 召回用户标识的历史物品标识作为召回物品标识 + for item_recall in behaviors_recall['items_history']: + + # 若召回物品标识不在历史物品标识列表 + if item_recall not in self.behaviors[user]['items_history']: + + items_recall[item_recall]['counts'] += 1 + + items_recall[item_recall]['scores'] += behaviors_recall['scores'][item_recall] * similarity + + # 归一化候选物品标识列表 + candidates = self._normalize_candidates( + items_recall=items_recall, maximum_candidates=maximum_candidates) + + return candidates + + # 归一化候选物品标识列表 + def _normalize_candidates(self, items_recall, maximum_candidates): + + # 候选物品标识列表 + candidates = defaultdict(float) + + # 若召回物品标识列表非空字典 + if items_recall: + + # 候选物品兴趣分数 + scores = [nest['scores'] for nest in items_recall.values()] + + # 候选物品相似分数最小值 + scores_minimum = min(scores, default=0) + + # 候选物品相似分数最大值 + scores_maximum = max(scores, default=0) + + # 值距 + scores_range = scores_maximum - scores_minimum + + # 若值距不为0 + if scores_range != 0: + + # 兴趣分数归一化 + for item_recall in items_recall: + + candidates[item_recall] = ( + (items_recall[item_recall]['scores'] - scores_minimum) / scores_range) * 0.6 + 0.2 + + else: + + # 兴趣分数默认为0.8 + for item_recall in items_recall: + + candidates[item_recall] = 0.8 + + # 根据兴趣分数倒序排序并截取 + candidates = dict(islice(sorted(candidates.items(), key=itemgetter( + + 1), reverse=True), maximum_candidates)) + + return candidates + + +if __name__ == "__main__": + + # 初始化引擎 + recommender = PrototypeRecommender() + + feedback_records = [ + {'user': 'aaaaaa', 'item': '111111', 'type': 'rating', + 'timestamp': int(time.time() - 3600), 'rating': 4}, + {'user': 'aaaaaa', 'item': '333333', 'type': 'rating', + 'timestamp': int(time.time() - 3200), 'rating': 4}, + {'user': 'bbbbbb', 'item': '333333', 'type': 'rating', + 'timestamp': int(time.time() - 3200), 'rating': 4}, + {'user': 'cccccc', 'item': '111111', 'type': 'rating', + 'timestamp': int(time.time() - 3200), 'rating': 5}, + {'user': 'cccccc', 'item': '222222', 'type': 'rating', + 'timestamp': int(time.time() - 3200), 'rating': 5}, + {'user': 'cccccc', 'item': '333333', 'type': 'rating', + 'timestamp': int(time.time() - 3200), 'rating': 3} + ] + + for feedback_record in feedback_records: + + recommender.process_behavior_record(behavior_record=feedback_record) + + a = recommender.generate_recommendations(user='cccccc') + + print(a) + + +exit() + +''' + + + + + + + +import numpy + +import pandas + +import zipcodes + +import re + +from collections import Counter + +from scipy.stats import chisquare + +from scipy.stats.contingency import association + +from sklearn.preprocessing import OneHotEncoder, StandardScaler + +import math + +from minisom import MiniSom + +from sklearn.cluster import KMeans + +import warnings + +#忽略警告 +warnings.simplefilter('ignore') + +import sys + +sys.path.append('..') + +from utils.pandas2chart import Pandas2chart + +from utils.algorithms import OptimalClusters + +#本脚本中所调用的函数 + +#提取性别特征时将特征值“M”映射为“male”,“F”映射为“female” +def Gender(element): + + match element: + + case 'M': + + return 'male' + + case 'F': + + return 'female' + + case default: + + return numpy.nan + +#提取年龄特征时将小于18岁映射为“under18”,大于等于18岁且小于等于24岁映射为“18~24”,大于等于25岁且小于等于34岁映射为“25~34”,大于等于35岁且小于等于44岁映射为“35~44”,大于等于45岁且小于等于54岁映射为“45~54”,大于54岁映射为“above54”” +def Age(element): + + match element: + + case age if age > 54: + + return 'above54' + + case age if age >= 45: + + return '45~54' + + case age if age >= 35: + + return '35~44' + + case age if age >= 25: + + return '25~34' + + case age if age >= 18: + + return '18~24' + + case age if age < 18: + + return 'under18' + + case default: + + return numpy.nan + +#提取职业特征时根据映射表映射 +def Occupation(element): + + match element: + + case 0: + + return 'other' + + case 1: + + return 'academic/educator' + + case 2: + + return 'artist' + + case 3: + + return 'clerical/admin' + + case 4: + + return 'college/grad student' + + case 5: + + return 'customer service' + + case 6: + + return 'doctor/health care' + + case 7: + + return 'executive/managerial' + + case 8: + + return 'farmer' + + case 9: + + return 'homemaker' + + case 10: + + return 'k-12 student' + + case 11: + + return 'lawyer' + + case 12: + + return 'programmer' + + case 13: + + return 'retired' + + case 14: + + return 'sales/marketing' + + case 15: + + return 'scientist' + + case 16: + + return 'self-employed' + + case 17: + + return 'technician/engineer' + + case 18: + + return 'tradesman/craftsman' + + case 19: + + return 'unemployed' + + case 20: + + return 'writer' + + case default: + + return numpy.nan + +#提取州级行政区特征时,根据邮政编码模糊查询州级行政区,若为空或多个则映射为“null”,否则为查询结果 +def State(element): + + #校验邮政编码格式 + if not re.match(r'^\d{4,5}$|^\d{5}-\d{4}$', element): + + #若邮政编码由9位数字组成,修改邮政编码格式,否则为“null” + if re.match(r'^\d{9}$', element): + + element = element[: 5] + '-' + element[-4: ] + + else: + + return numpy.nan + + #根据邮政编码模糊查询并解析州级行政区 + states = [element.get('state') for element in zipcodes.similar_to(element)] + + #若州级行政区数为1则将查询结果定义为州级行政区,否则为“null” + if len(set(states)) == 1: + + return states[0] + + else: + + return numpy.nan + +#提取最近最喜欢的电影体裁特征 +def Genres(series): + + #合并每位用户评价过的电影体裁并切割为单个 + genres = series.str.cat(sep = '|').split('|') + + #查询数量最多的电影体裁并返回 + return str.lower(Counter(genres).most_common(1)[0][0]) + +#独热编码特征名组合器 +def Combiner(feature, category): + + return str(feature) + ':' + str(category) + +#若本脚本被调用报错 +if __name__ != '__main__': + + print('本脚本不允许被调用') + print() + + exit() + +print('1 打开本地数据文件,读取数据集...', end = '') + +try: + + dataset_users = pandas.read_csv(filepath_or_buffer = './MovieLens10K/users.csv', low_memory = False) + + dataset_movies = pandas.read_csv(filepath_or_buffer = './MovieLens10K/movies.csv', low_memory = False) + + dataset_ratings = pandas.read_csv(filepath_or_buffer ='./MovieLens10K/ratings.csv', low_memory = False) + +except: + + print('读取失败,请检查数据文件是否存在或正确') + print() + + exit() + +print('已完成') +print() + +#评分数据集根据电影标识关联电影名称和体裁 +dataset_ratings = dataset_ratings.merge(right = dataset_movies[['movieId', 'title', 'genres']], how = 'left', on = 'movieId') + +#统计用户数 +users = dataset_users.shape[0] + +print('2 构建标签体系') +print() + +print('2.1 提取特征...', end = '') + +dataset = pandas.DataFrame(data = dataset_users['userId'].tolist(), columns = ['userId']) + +#提取性别特征 +dataset['gender'] = dataset_users['gender'].map(lambda element: Gender(element)) + +#提取年龄特征 +dataset['age'] = dataset_users['age'].map(lambda element: Age(element)) + +#提取职业特征 +dataset['occupation'] = dataset_users['occupation'].map(lambda element: Occupation(element)) + +#提取州级行政区特征 +#dataset['state'] = dataset_users['zip'].map(lambda element: State(element)) + +#就评分数据集按照userId分组,统计每位用户最喜欢的体裁 +pivottable_ratings = dataset_ratings.groupby(by = 'userId').agg( + + #最喜欢的电影体裁 + genres = pandas.NamedAgg(column = 'genres', aggfunc = Genres) + +) + +pivottable_ratings.reset_index(inplace = True) + +#合并分箱后评分数和平均评分,另最喜欢的电影体裁 +dataset = dataset.merge(right = pivottable_ratings[['userId', 'genres']], how = 'left', on = 'userId') + +#删除用户ID和包含缺失值的样本 +dataset.pop('userId').dropna(inplace = True) + +print('已完成') +print() + +#统计样本数 +samples = dataset.shape[0] + +#获取特征名称 +independents = dataset.columns + +print('特征数据集中样本数为 %d 例,特征数为 %d 个。' % (samples, len(independents))) +print() + +print('2.2 检验各特征各项样本数是否符合均匀分布') +print() + +for independent in independents: + + #按照特征分组,统计各项样本数 + pivottable = dataset.groupby(by = independent).agg( + + samples = pandas.NamedAgg(column = independent, aggfunc = 'count') + + ) + + #检验各项样本数是否均匀分布 + statistic, probability = chisquare(f_obs = pivottable['samples'].to_numpy()) + + if probability < 0.05: + + print('特征 %s 各项样本数不符合均匀分布,卡方统计量为 %.2f,概率为 %.2f 。' % (independent, statistic, probability)) + print() + + else: + + print('特征 %s 各项样本数符合均匀分布,卡方统计量为 %.2f,概率为 %.2f 。' % (independent, statistic, probability)) + print() + + pivottable.reset_index(inplace = True) + + #按照样本数倒序排序 + pivottable.sort_values(by = 'samples', ascending = False, inplace = True) + + #若项数大于指定值,则将第(指定值)项至最后一项合并为一项,指定值为6 + if pivottable.shape[0] > 6: + + pivottable_marging = pivottable.iloc[: 5] + + #合并后的项名为others,统计第(指定值-1)项至最后一项样本数的和 + pivottable_marging.loc[pivottable_marging.shape[0]] = ['others', pivottable.iloc[5: , 1].sum()] + + else: + + pivottable_marging = pivottable + + #生成环形图 + Pandas2chart(dataset = pivottable_marging, type = 'circular', path = './reports/persona_report/circular_{}.html'.format(independent)) + +print('2.3 统计特征之间相关系数') +print() + +#用于保存特征之间克莱姆相关系数矩阵 +correlation_matrix = pandas.DataFrame(data = [], index = independents, columns = independents) + +#用于保存相关特征对 +correlation_pairs = [] + +for index, independent_index in enumerate(independents): + + for column, independent_column in enumerate(independents): + + #统计特征之间克莱姆相关系数 + statistic = round(association(observed = pandas.crosstab(index = dataset[independent_index], columns = dataset[independent_column])), 2) + + correlation_matrix.loc[independent_index, independent_column] = statistic + + #获取相关特征对 + if column > index and statistic >= 0.25: + + correlation_pairs.append({'independent': independent_index, 'independent_correlation': independent_column}) + +#生成相关系数矩阵热力图 +Pandas2chart(dataset = correlation_matrix, type = 'heatmap', path = './reports/persona_report/heatmap_correlation_matrix.html') + +print('3、构建用户细分群体') +print() + +print('3.1 独热编码特征并标准化...', end = '') + +#独热编码特征,用于决策树算法模型 +onehot_encoder = OneHotEncoder(sparse_output = False, handle_unknown = 'ignore', feature_name_combiner = Combiner).fit(X = dataset.to_numpy()) + +dataset_processing = pandas.DataFrame(data = onehot_encoder.transform(X = dataset.to_numpy()), columns = onehot_encoder.get_feature_names_out(input_features = independents)).astype(dtype = 'int') + +#独热编码特征 +dataset_preprocessing = OneHotEncoder(sparse_output = False, handle_unknown = 'ignore').fit_transform(X = dataset.to_numpy()) + +#标准化特征 +dataset_preprocessing = StandardScaler().fit_transform(X = dataset_preprocessing) + +print('已完成') +print() + +print('3.2 基于自我组织映射算法初步聚类...', end = '') + +#定义竞争层的长度和高度(经验值) +competitive_layer_length = competitive_layer_heigth = math.ceil(2.25 * math.pow(samples, 0.25)) + +#创建自我组织映射算法模型 +som = MiniSom(x = competitive_layer_length, y = competitive_layer_heigth, input_len = dataset_preprocessing.shape[1], sigma = math.sqrt(math.pow(competitive_layer_length, 2) + math.pow(competitive_layer_heigth, 2)), activation_distance = 'cosine', random_seed = 0) + +#初始化模型 +som.pca_weights_init(data = dataset_preprocessing) + +#训练模型 +som.train_batch(data = dataset_preprocessing, num_iteration = 10) + +#获取各样本的竞争层中优胜点坐标 +dataset_preprocessing = [som.winner(record) for record in dataset_preprocessing] + +dataset_preprocessing = pandas.DataFrame(data = dataset_preprocessing, columns = ['axis_x', 'axis_y']) + +print('已完成') +print() + +print('3.3 就各样本的竞争层中优胜点坐标基于K均值算法再次聚类,使用间隔统计量评估聚类效果并确定最优聚类簇数...', end = '') + +#创建K均值算法模型并训练 +kmeans = KMeans(n_clusters = OptimalClusters(dataset_preprocessing.to_numpy()), n_init = 'auto').fit(dataset_preprocessing.to_numpy()) + +dataset_processing['cluster_label'] = kmeans.labels_ + +print('已完成') +print() + +print('3.4 基于决策树拟合聚类结果并输出聚类规则...', end = '') + + + + + + +print(dataset_processing['cluster_label'].max()) + + + + + + + + + + + + + +''' diff --git a/普康健康审核机器人/main.py b/普康健康审核机器人/main.py new file mode 100644 index 0000000..642672e --- /dev/null +++ b/普康健康审核机器人/main.py @@ -0,0 +1,2818 @@ +# -*- coding: utf-8 -*- + +''' +脚本说明: +本脚本基于SELENIUM实现普康健康自动审核,目前已实现登录、获取指定案件数据和案件审核(包括赔付和拒付) +作者:刘弼仁 +更新时间:2024-09-12 +备注: +1、在PAGEOBJECT尚未拆解自动审核动作 +''' + +#导入模块 + +import re + +import time + +import json + +from pageobject import PageObject + +import os + +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) + +from utils.logger import Logger, fetch_exception + +#抽取内容配置项 +extractions = { + + '永诚审核页面': [ + + { + + 'field': '赔案号', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[4]/div[1]/div/div/div' + + }, + + { + + 'field': '报案保单号', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[3]/div[2]/div' + + }, + + { + + 'field': '报案方式', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[1]/div[3]/div[2]' + + }, + + { + + 'field': '复核人', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[1]/div[2]/div[5]/span[2]' + + }, + + { + + 'field': '出险人姓名', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[5]/div[1]/div/div/div/input' + + }, + + { + + 'field': '出险人证件类型', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[5]/div[2]/div/div/div/div[1]/input' + + }, + + { + + 'field': '出险人证件号', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[5]/div[3]/div/div/div/input' + + }, + + { + + 'field': '出险人证件有效期起期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[5]/div[4]/div/div/div[1]/input[1]' + + }, + + { + + 'field': '出险人证件有效期止期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[5]/div[4]/div/div/div[1]/input[2]' + + }, + + { + + 'field': '出险人与主被保险人关系', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[6]/div[3]/div/div/div/div[1]/input' + + }, + + { + + 'field': '主被保险人姓名', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[7]/div[1]/div/div/div/input' + + }, + + { + + 'field': '主被保险人证件类型', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[7]/div[2]/div/div/div/div[1]/input' + + }, + + { + + 'field': '主被保险人证件号', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[7]/div[3]/div/div/div/input' + + }, + + { + + 'field': '主被保险人证件有效期起期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[7]/div[4]/div/div/div[1]/input[1]' + + }, + + { + + 'field': '主被保险人证件有效期止期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[7]/div[4]/div/div/div[1]/input[2]' + + }, + + { + + 'field': '领款人姓名', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[11]/div[1]/div/div/div/input' + + }, + + { + + 'field': '领款人证件类型', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[11]/div[2]/div/div/div/div[1]/input' + + }, + + { + + 'field': '领款人证件号', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[11]/div[3]/div/div/div/input' + + }, + + { + + 'field': '领款人证件有效期起期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[11]/div[4]/div/div/div[1]/input[1]' + + }, + + { + + 'field': '领款人证件有效期止期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[11]/div[4]/div/div/div[1]/input[2]' + + }, + + { + + 'field': '开户行', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[12]/div[2]/div/div/div/div[1]/input' + + }, + + { + + 'field': '银行账户', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[12]/div[1]/div/div/div/input' + + }, + + { + + 'field': '联系电话', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[12]/div[3]/div/div/div/input' + + }, + + { + + 'field': '联系地址', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[12]/div[4]/div/div/div/input' + + }, + + { + + 'field': '出险地址', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[13]/div[2]/div/div/div/div[1]/input' + + }, + + { + + 'table': '保单信息', + + 'table_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody', + + 'fields': [ + + { + + 'field': '保单号', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody/tr[index]/td[2]' + + }, + + { + + 'field': '保险分公司', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody/tr[index]/td[6]' + + }, + + { + + 'field': '出险人与主被保险人关系', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody/tr[index]/td[9]' + + }, + + { + + 'field': '保障期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody/tr[index]/td[10]' + + } + + ] + + }, + + { + + 'field': '票据提示', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[1]/div[2]' + + }, + + { + + 'table': '票据信息', + + 'table_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody', + + 'fields': [ + + { + + 'field': '票据序号', + + 'field_xpath': '//*[@id="pane-first"]/div/div[5]/div[2]/table/tbody/tr[index]/td[2]' + + }, + + { + + 'field': '票据号', + + 'field_xpath': '//*[@id="pane-first"]/div/div[5]/div[2]/table/tbody/tr[index]/td[4]' + + }, + + { + + 'field': '交款人', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[6]' + + }, + + { + + 'field': '就诊类型', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[7]' + + }, + + { + + 'field': '出险原因', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[8]' + + }, + + { + + 'field': '医保标志', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[10]' + + }, + + { + + 'field': '收款人', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[11]' + + }, + + { + + 'field': '开具日期', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[12]' + + }, + + { + + 'field': '诊断疾病', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[17]' + + }, + + { + + 'field': '票据金额', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[18]' + + }, + + { + + 'field': '合理金额', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[19]' + + }, + + { + + 'field': '部分自费', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[20]' + + }, + + { + + 'field': '全部自费', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[21]' + + }, + + { + + 'field': '统筹金额', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[22]' + + }, + + { + + 'field': '第三方赔付金额', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[24]' + + }, + + { + + 'field': '不合理金额', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[23]' + + }, + + { + + 'field': '关联责任', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[9]' + + }, + + { + + 'field': '票据验真', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[25]' + + }, + + { + + 'field': '票据备注', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[27]' + + } + + ] + + }, + + { + + 'field': '理算第一行保额', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/div[3]/div[3]/table/tbody/tr[1]/td[4]' + + }, + + { + + 'field': '理算金额', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/form/div[1]/div/div/input' + + }, + + { + + 'field': '公账赔付金额', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/form/div[2]/div/div/input' + + }, + + { + + 'field': '个账赔付金额', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/form/div[3]/div/div/input' + + }, + + { + + 'field': '审核结论', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/form/div[1]/div/div/div/div/div[1]/input' + + } + + ], + + '瑞泰审核页面': [ + + { + + 'field': '赔案号', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[4]/div/div/div/div' + + }, + + { + + 'field': '报案保单号', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[3]/div[3]/div' + + }, + + { + + 'field': '报案方式', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[1]/div[3]/div[2]' + + }, + + { + + 'field': '复核人', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[1]/div[2]/div[5]/span[2]' + + }, + + { + + 'field': '出险人姓名', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[5]/div[1]/div/div/div/input' + + }, + + { + + 'field': '出险人证件类型', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[5]/div[2]/div/div/div/div[1]/input' + + }, + + { + + 'field': '出险人证件号', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[5]/div[3]/div/div/div/input' + + }, + + { + + 'field': '出险人证件有效期起期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[5]/div[4]/div/div/div/div[1]/input' + + }, + + { + + 'field': '出险人证件有效期止期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[5]/div[4]/div/div/div/div[2]/input' + + }, + + { + + 'field': '出险人与主被保险人关系', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[6]/div[3]/div/div/div/div[1]/input' + + }, + + { + + 'field': '主被保险人姓名', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[8]/div[1]/div/div/div/input' + + }, + + { + + 'field': '主被保险人证件类型', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[8]/div[2]/div/div/div/div[1]/input' + + }, + + { + + 'field': '主被保险人证件号', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[8]/div[3]/div/div/div/input' + + }, + + { + + 'field': '主被保险人证件有效期起期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[8]/div[4]/div/div/div/div[1]/input' + + }, + + { + + 'field': '主被保险人证件有效期止期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[8]/div[4]/div/div/div/div[2]/input' + + }, + + { + + 'field': '领款人姓名', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[11]/div[1]/div/div/div/input' + + }, + + { + + 'field': '领款人证件类型', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[11]/div[2]/div/div/div/div[1]/input' + + }, + + { + + 'field': '领款人证件号', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[11]/div[3]/div/div/div/input' + + }, + + { + + 'field': '领款人证件有效期起期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[11]/div[4]/div/div/div/div[1]/input' + + }, + + { + + 'field': '领款人证件有效期止期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[11]/div[4]/div/div/div/div[2]/input' + + }, + + { + + 'field': '开户行', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[12]/div[2]/div[1]/div/div/div[1]/input' + + }, + + { + + 'field': '开户行分行', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[13]/div[2]/div/div/div/input' + + }, + + { + + 'field': '开户行分行所在省市', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[13]/div[1]/div/div/div/div[1]/input' + + }, + + { + + 'field': '银行账户', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[12]/div[1]/div[1]/div/div/input' + + }, + + { + + 'field': '联系电话', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[12]/div[3]/div/div/div/input' + + }, + + { + + 'field': '联系地址', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[12]/div[4]/div/div/div/input' + + }, + + { + + 'field': '出险日期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[7]/div[2]/div/div/div/input' + + }, + + { + + 'field': '出险地址', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[7]/div[3]/div/div/div/div[1]/input' + + }, + + { + + 'field': '出险类型', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[7]/div[1]/div/div/div/div[1]/input' + + }, + + { + + 'table': '保单信息', + + 'table_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody', + + 'fields': [ + + { + + 'field': '保单号', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody/tr[index]/td[2]' + + }, + + { + + 'field': '保险分公司', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody/tr[index]/td[6]' + + }, + + { + + 'field': '出险人与主被保险人关系', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody/tr[index]/td[9]' + + }, + + { + + 'field': '保障期', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody/tr[index]/td[10]' + + } + + ] + + }, + + { + + 'field': '票据提示', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[1]/div[2]' + + }, + + { + + 'table': '票据信息', + + 'table_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody', + + 'fields': [ + + { + + 'field': '票据序号', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[2]' + + }, + + { + + 'field': '票据号', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[4]' + + }, + + { + + 'field': '交款人', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[8]' + + }, + + { + + 'field': '就诊类型', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[9]' + + }, + + { + + 'field': '医保标志', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[11]' + + }, + + { + + 'field': '收款人', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[14]' + + }, + + { + + 'field': '开具日期', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[15]' + + }, + + { + + 'field': '诊断疾病', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[18]' + + }, + + { + + 'field': '票据金额', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[19]' + + }, + + { + + 'field': '合理金额', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[20]' + + }, + + { + + 'field': '部分自费', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[21]' + + }, + + { + + 'field': '全部自费', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[22]' + + }, + + { + + 'field': '统筹金额', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[23]' + + }, + + { + + 'field': '第三方赔付金额', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[25]' + + }, + + { + + 'field': '不合理金额', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[24]' + + }, + + { + + 'field': '关联责任', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[10]' + + }, + + { + + 'field': '票据验真', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[6]' + + }, + + { + + 'field': '票据备注', + + 'field_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[26]' + + } + + ] + + }, + + { + + 'field': '理算第一行保额', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/div[3]/div[3]/table/tbody/tr[1]/td[4]' + + }, + + { + + 'field': '理算金额', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/form/div/div[1]/div/div/input' + + }, + + { + + 'field': '公账赔付金额', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/form/div/div[2]/div/div/input' + + }, + + { + + 'field': '个账赔付金额', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/form/div/div[3]/div/div/input' + + }, + + { + + 'field': '审核结论', + + 'field_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/form/div[1]/div/div/div/div/div[1]/input' + + } + + ] + +} + +#动作组配置项(固化为登录、赔案查询和赔案审核) +actions = { + + #登录运营管理平台动作组 + 'login': [ + + #在当前标签页打开普康运营管理平台链接 + { + + 'action_type': 'open_link', + + 'object': 'https://boss.pukangpay.com.cn/login' + + }, + + #等待至标签页标题为普康运营管理平台 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'browser_tab_title_is', + + 'content': '普康运营管理平台' + + }, + + #点击切换邮箱验证按钮 + { + + 'action_type': 'click', + + 'object_name': '切换邮箱验证按钮', + + 'object': '//*[@id="app"]/div/form/div[4]/button' + + }, + + #输入登录账号 + { + + 'action_type': 'input', + + 'object_name': '登录账号', + + 'object': '//*[@id="app"]/div/form/div[2]/div/div/input' + + }, + + #点击发送邮件验证码按钮 + { + + 'action_type': 'click', + + 'object_name': '发送邮件验证码按钮', + + 'object': '//*[@id="app"]/div/form/div[3]/div/button' + + }, + + #输入邮箱验证码,目前通过飞书邮箱获取邮件验证码,但是不能同时获取多个 + { + + 'action_type': 'input', + + 'object_name': '邮件验证码', + + 'object': '//*[@id="app"]/div/form/div[3]/div/div/input', + + 'content': 'execute: FeishuMail().get_verification_code()' + + }, + + #点击登录按钮 + { + + 'action_type': 'click', + + 'object_name': '登录按钮', + + 'object': '//*[@id="app"]/div/form/button' + + }, + + #等待至欢迎页面的面包屑为首页 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'element_text_is', + + 'object_name': '欢迎页面的面包屑', + + 'object': '//*[@id="breadcrumb-container"]/span/span/span[1]/span', + + 'content': '首页' + + } + + ], + + 'cases_query': [ + + #点击顶部菜单-新TPA + { + + 'action_type': 'click', + + 'object_name': '顶部菜单-新TPA', + + 'object': '//*[@id="app"]/div/div/div[2]/div/div[2]/ul/li[3]' + + }, + + #点击左侧菜单-综合查询 + { + + 'action_type': 'click', + + 'object_name': '左侧菜单-综合查询', + + 'object': '//*[@id="app"]/div/div/div[1]/div[2]/div[1]/div/ul/div[4]/li/div' + + }, + + #点击左侧菜单-综合查询-赔案查询 + { + + 'action_type': 'click', + + 'object_name': '左侧菜单-综合查询-赔案查询', + + 'object': '//*[@id="app"]/div/div/div[1]/div[2]/div[1]/div/ul/div[4]/li/ul/div[2]' + + }, + + #等待至赔案查询页面的面包屑为赔案查询 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'element_text_is', + + 'object_name': '赔案查询页面的面包屑', + + 'object': '//*[@id="app"]/div/div/div/div[1]/div[1]/span/span[3]/span[1]/span', + + 'content': '赔案查询' + + }, + + #点击更多按钮 + { + + 'action_type': 'click', + + 'object_name': '更多按钮', + + 'object': '//*[@id="indexPage"]/div/form[2]/div/div/div/button[1][contains(@class,"success")]' + + }, + + #输入保险分公司 + { + + 'action_type': 'input', + + 'object_name': '保险分公司', + + 'object': '//*[@id="indexPage"]/div/form[1]/div[12]/div/div/input[contains(@placeholder,"请输入保险分公司")]' + + }, + + #点击查询按钮 + { + + 'action_type': 'click', + + 'object_name': '查询按钮', + + 'object': '//*[@id="indexPage"]/div/form[2]/div/div/div/button[2][contains(@class,"warning")]' + + }, + + #等待至查询表格行数不为0 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'table_rows_is_not_zero', + + 'object_name': '查询表格', + + 'object': '(//*[@id="indexPage"]/div[3]/table/tbody)[1]' + + }, + + #点击最后分页按钮并等待至当前分页第一行唯一标识与上一分页第一行唯一标识不相同 + { + + 'action_type': 'click', + + 'object_name': '最后分页按钮', + + 'object': '(//*[@id="indexPage"]/main/section/div/ul/li[last()])[1]', + + 'first_row_identifier_xpath': '//*[@id="indexPage"]/div[3]/table/tbody/tr[1]/td[2]' + + }, + + #重复执行任务至达到预期完成任务数 + { + + 'action_type': 'repeat', + + 'object_name': '点击查看按钮,打开审核页面以获取数据', + + #行唯一标识 + 'row_identifier_xpath': '(//*[@id="indexPage"]/div[3]/table/tbody)[1]/tr[index]/td[2]', + + 'table_xpath': '(//*[@id="indexPage"]/div[3]/table/tbody)[1]', + + 'button_next_xpath': '(//*[@id="indexPage"]/main/section/div/button[1])[1]', + + 'actions': [ + + #判断当前环节,若为指定内容则跳过,否则终止后续动作 + { + + 'action_type': 'cognize', + + 'cognized_condition': 'text_is', + + 'object_name': '当前环节', + + 'object': '(//*[@id="indexPage"]/div[3]/table/tbody)[1]/tr[index]/td[7]', + + #满足预期条件时执行动作 + 'meet': 'pass', + + #不满足预期条件时执行动作 + 'otherwies': 'break' + + }, + + #点击查看按钮并并切换至新标签页 + { + + 'action_type': 'click_and_switch', + + 'object_name': ['查看按钮', '赔案审核'], + + 'object': '//*[@id="indexPage"]/div[4]/div[2]/table/tbody/tr[index]/td[19]/div/button[1]' + + }, + + #等待至审核页面的面包屑为永诚审核页面 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'element_text_is', + + 'object_name': '审核页面的面包屑', + + 'object': '//*[@id="app"]/div/div/div/div[1]/div[1]/span/span[3]/span[1]/span', + + 'content': '永诚审核页面' + + }, + + #等待至审核页面的赔案号非空 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'element_text_is_not', + + 'object_name': '审核页面的赔案号', + + 'object': '//*[@id="app"]/div/div/section/main/section[1]/form/div[4]/div[1]/div/div/div', + + 'content': '' + + }, + + #点击查看影像件按钮并切换至新标签页(影像件标签页) + { + + 'action_type': 'click_and_switch', + + 'object_name': ['查看影像件按钮', '影像件'], + + 'object': '//*[@id="app"]/div/div/section/main/section[1]/div[1]/div[1]/div[1]/button[3]' + + }, + + #等待至影像件页面中张数非指定内容 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'element_text_is_not', + + 'object_name': '影像件页面中张数', + + 'object': '//*[@id="app"]/div/div/section/div/div[1]/div[1]/span[3]', + + 'content': '影像共计 0 张' + + }, + + #重复执行点击发票影像件并抽取内容任务 + { + + 'action_type': 'repeat', + + 'object_name': '点击顶部菜单-发票,显示每一张影像件以获取数据', + + 'table_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]', + + 'row_identifier_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + 'actions': [ + + #点击发票影像件 + { + + 'action_type': 'click', + + 'object_name': '发票影像件', + + 'object': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + }, + + #抽取内容 + { + + 'action_type': 'extract', + + 'extractions': [ + + { + + 'field': '票据影像件序号', + + 'field_xpath': '//*[@id="app"]/div/div/section/div/div[1]/div[1]/span[4]' + + } + + ] + + } + + ] + + }, + + #点击申请书标签 + { + + 'action_type': 'click', + + 'object_name': '申请书标签', + + 'object': '//*[@id="tab-3"]', + + }, + + #重复执行点击申请书影像件并抽取内容任务 + { + + 'action_type': 'repeat', + + 'object_name': '点击顶部菜单-申请书,显示每一张影像件以获取数据', + + 'table_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]', + + 'row_identifier_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + 'actions': [ + + #点击申请书影像件 + { + + 'action_type': 'click', + + 'object_name': '申请书影像件', + + 'object': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + }, + + #抽取内容 + { + + 'action_type': 'extract', + + 'extractions': [ + + { + + 'field': '申请书影像件序号', + + 'field_xpath': '//*[@id="app"]/div/div/section/div/div[1]/div[1]/span[4]' + + } + + ] + + } + + ] + + }, + + #点击身份证标签 + { + + 'action_type': 'click', + + 'object_name': '身份证标签', + + 'object': '//*[@id="tab-4"]', + + }, + + #重复执行点击身份证影像件并抽取内容任务 + { + + 'action_type': 'repeat', + + 'object_name': '点击顶部菜单-身份证资料,显示每一张影像件以获取数据', + + 'table_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]', + + 'row_identifier_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + 'actions': [ + + #点击身份证影像件 + { + + 'action_type': 'click', + + 'object_name': '身份证影像件', + + 'object': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + }, + + #抽取内容 + { + + 'action_type': 'extract', + + 'extractions': [ + + { + + 'field': '证件影像件类型', + + 'field_xpath': '//*[@id="app"]/div/div/section/div/div[1]/div[2]/div[1]/div/div[1]/input' + + } + + ] + + } + + ] + + }, + + #点击银行卡折标签 + { + + 'action_type': 'click', + + 'object_name': '银行卡折标签', + + 'object': '//*[@id="tab-5"]', + + }, + + #重复执行点击银行卡折影像件并抽取内容任务 + { + + 'action_type': 'repeat', + + 'object_name': '点击顶部菜单-其它,显示每一张影像件以获取数据', + + 'table_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]', + + 'row_identifier_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + 'actions': [ + + #点击银行卡折影像件 + { + + 'action_type': 'click', + + 'object_name': '银行卡折影像件', + + 'object': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + }, + + #抽取内容 + { + + 'action_type': 'extract', + + 'extractions': [ + + { + + 'field': '银行卡折影像件序号', + + 'field_xpath': '//*[@id="app"]/div/div/section/div/div[1]/div[1]/span[4]' + + } + + ] + + } + + ] + + }, + + #点击未分类标签 + { + + 'action_type': 'click', + + 'object_name': '未分类标签', + + 'object': '//*[@id="tab-6"]', + + }, + + #重复执行点击未分类影像件并抽取内容任务 + { + + 'action_type': 'repeat', + + 'object_name': '点击顶部菜单-未分类,显示每一张影像件以获取数据', + + 'table_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]', + + 'row_identifier_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + 'actions': [ + + #点击未分类影像件 + { + + 'action_type': 'click', + + 'object_name': '未分类影像件', + + 'object': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + }, + + #抽取内容 + { + + 'action_type': 'extract', + + 'extractions': [ + + { + + 'field': '未分类影像件序号', + + 'field_xpath': '//*[@id="app"]/div/div/section/div/div[1]/div[1]/span[4]' + + } + + ] + + } + + ] + + }, + + #关闭当前标签页并切换至上一标签页 + { + + 'action_type': 'close_and_switch', + + 'object_name': ['影像件', '审核页面'] + + }, + + #抽取内容 + { + + 'action_type': 'extract' + + }, + + { + + 'action_type': 'close_and_switch', + + 'object_name': ['审核页面', '赔案查询'] + + }, + + { + + 'action_type': 'repeat_finish' + + } + + ], + + 'first_row_identifier_xpath': '(//*[@id="indexPage"]/div[3]/table/tbody)[1]/tr[1]/td[2]' + + }, + + { + + 'action_type': 'finish', + + } + + ], + + 'cases_audit': [ + + #点击顶部菜单“新TPA” + { + + 'action_type': 'click', + + 'object_name': '顶部菜单-新TPA', + + 'object': '//*[@id="app"]/div/div/div[2]/div/div[2]/ul/li[3]' + + }, + + #点击左侧菜单“理赔管理” + { + + 'action_type': 'click', + + 'object_name': '左侧菜单-理赔管理', + + 'object': '//*[@id="app"]/div/div/div[1]/div[2]/div[1]/div/ul/div[3]/li/div' + + }, + + #点击左侧菜单“理赔审核管理” + { + + 'action_type': 'click', + + 'object_name': '左侧菜单-理赔管理-赔案审核管理', + + 'object': '//*[@id="app"]/div/div/div[1]/div[2]/div[1]/div/ul/div[3]/li/ul/div[6]' + + }, + + #等待至赔案审核管理页面的面包屑为赔案审核管理 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'element_text_is', + + 'object_name': '赔案审核管理页面的面包屑', + + 'object': '//*[@id="app"]/div/div/div/div[1]/div[1]/span/span[3]/span[1]/span', + + 'content': '赔案审核管理' + + }, + + #等待至查询表格行数不为0 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'table_rows_is_not_zero', + + 'object_name': '查询表格', + + 'object': '(//*[@id="indexPage"]/div[3]/table/tbody)[1]' + + }, + + #重复动作组 + { + + 'action_type': 'repeat', + + 'object_name': '自动审核', + + 'droplist_more_xpaths': ['//*[@id="indexPage"]/main/section/div/span[2]/div/div[1]/input', '/html/body/div[4]/div[1]/div[1]/ul/li[5]'], + + 'row_xpath': '//*[@id="indexPage"]/div[3]/table/tbody/tr[index]', + + 'row_identifier_xpath': '//*[@id="indexPage"]/div[4]/div[2]/table/tbody/tr[1]/td[4]', + + 'table_xpath': '(//*[@id="indexPage"]/div[3]/table/tbody)[1]', + + 'actions': [ + + #点击审核按钮并切换至审核页面 + { + + 'action_type': 'click_and_switch', + + 'object_name': ['审核按钮', '审核页面'], + + 'object': '//*[@id="indexPage"]/div[5]/div[2]/table/tbody/tr[index]/td[20]/div/button', + + }, + + #等待至审核页面的面包屑为永诚审核页面 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'element_text_is', + + 'object_name': '审核页面的面包屑', + + 'object': '//*[@id="app"]/div/div/div/div[1]/div[1]/span/span[3]/span[1]/span' + + }, + + #等待至审核页面中赔案号加载完成 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'element_text_is_loaded', + + 'object_name': '赔案号' + + }, + + #抽取保单信息和票据信息用于选择保单,并将不在保期内发票的不合理金额调整为与票据金额相同 + { + + 'action_type': 'extract' + + }, + + #判断是否需要修改发票,若需要则点击修改信息按钮,遍历发票将需要修改的发票,点击修改按钮将不合理金额修改为和发票金额相同并点击确定按钮,点击保存按钮并点击确定按钮,否则跳过 + { + + 'action_type': 'cognize', + + 'cognized_condition': 'auto_audit' + + }, + + { + + 'action_type': 'close_and_switch', + + 'object_name': ['审核页面', '赔案查询'] + + }, + + { + + 'action_type': 'repeat_finish' + + } + + ] + + }, + + { + + 'action_type': 'finish', + + } + + ], + + #适用于瑞泰(oldmutual-chnenergy.com,耆卫保险和国家能源合资),考虑审核规则中无需用到影像件相关数据,所以去掉查看影像件相关动作 + 'cases_audit_oldmutual_chnenergy': [ + + { + + 'action_type': 'click', + + 'object_name': '新TPA', + + 'object': '//*[@id="app"]/div/div/div[2]/div/div[2]/ul/li[3]' + + }, + + { + + 'action_type': 'click', + + 'object_name': '理赔管理', + + 'object': '//*[@id="app"]/div/div/div[1]/div[2]/div[1]/div/ul/div[3]/li/div' + + }, + + { + + 'action_type': 'click', + + 'object_name': '赔案审核管理', + + 'object': '//*[@id="app"]/div/div/div[1]/div[2]/div[1]/div/ul/div[3]/li/ul/div[7]' + + }, + + { + + 'action_type': 'wait_until', + + 'expected_condition': 'element_text_is', + + 'object_name': '赔案审核管理页面的面包屑', + + 'object': '//*[@id="app"]/div/div/div/div[1]/div[1]/span/span[3]/span[1]/span', + + 'content': '赔案审核管理' + + }, + + { + + 'action_type': 'wait_until', + + 'expected_condition': 'table_rows_is_not_zero', + + 'object_name': '待审核赔案表格', + + 'object': '(//*[@id="indexPage"]/div[3]/table/tbody)[1]' + + }, + + { + + 'action_type': 'repeat', + + 'object_name': '自动审核', + + 'droplist_more_xpaths': ['//*[@id="indexPage"]/main/section/div/span[2]/div/div', '/html/body/div[4]/div[1]/div[1]/ul/li[5]'], + + 'row_xpath': '//*[@id="indexPage"]/div[3]/table/tbody/tr[index]', + + 'row_identifier_xpath': '//*[@id="indexPage"]/div[4]/div[2]/table/tbody/tr[1]/td[4]', + + 'table_xpath': '(//*[@id="indexPage"]/div[3]/table/tbody)[1]', + + 'actions': [ + + #点击审核按钮并切换至审核页面 + { + + 'action_type': 'click_and_switch', + + 'object_name': ['审核按钮', '审核页面'], + + 'object': '//*[@id="indexPage"]/div[5]/div[2]/table/tbody/tr[index]/td[20]/div/button', + + }, + + #等待至审核页面的面包屑为永诚审核页面 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'element_text_is', + + 'object_name': '审核页面的面包屑', + + 'object': '//*[@id="app"]/div/div/div/div[1]/div[1]/span/span[3]/span[1]/span' + + }, + + #等待至审核页面中赔案号加载完成 + { + + 'action_type': 'wait_until', + + 'expected_condition': 'element_text_is_loaded', + + 'object_name': '赔案号', + + 'object': '//*[@id="app"]/div/div/section/main/section[1]/form/div[4]/div/div/div/div' + + }, + + #抽取保单信息和票据信息用于选择保单,并将不在保期内发票的不合理金额调整为与票据金额相同 + { + + 'action_type': 'extract' + + }, + + #判断是否需要修改发票,若需要则点击修改信息按钮,遍历发票将需要修改的发票,点击修改按钮将不合理金额修改为和发票金额相同并点击确定按钮,点击保存按钮并点击确定按钮,否则跳过 + { + + 'action_type': 'cognize', + + 'cognized_condition': 'auto_audit' + + }, + + { + + 'action_type': 'close_and_switch', + + 'object_name': ['审核页面', '赔案查询'] + + }, + + { + + 'action_type': 'repeat_finish' + + } + + ] + + }, + + { + + 'action_type': 'finish', + + } + + ] + +} + +''' + +类说明:封装普康健康运营管理平台中相关页面和动作 + +''' + +class PKRobot(PageObject): + + #登录运营管理平台 + @fetch_exception + def login(self, insurance): + + logger.info('正在登录运营管理平台') + + #根据保险总公司匹配登录账号 + match insurance: + + case '中银保险有限公司': + + content = 'AutomationTech001' + + case '永诚财产保险股份有限公司': + + content = 'AutomationTech002' + + case '瑞泰人寿保险有限公司': + + content = 'AutomationTech003' + case default: + + raise Exception('保险总公司未定义') + + #获取登录动作组配置项 + loggin_actions = actions.get('login') + + for index, action in enumerate(loggin_actions): + + #在登录动作组配置项中新增登录账号 + if action.get('action_type') == 'input' and action.get('object_name') == '登录账号': + + loggin_actions[index].update({'content': content}) + + + assert self.translator(actions = loggin_actions) == 'success', '登录时发生异常,程序终止' + + logger.info('登录成功') + + #打开赔案查询获取赔案数据 + @fetch_exception + def cases_query(self, expected_tasks, insurance_branch, expected_status): + + #断定预期重复数数据类型为整数且大于等于1 + assert isinstance(expected_tasks, int) and expected_tasks >= 1, 'expected_tasks data type must be int and greater than 0' + + #获取赔案查询动作组配置项 + cases_query_actions = actions.get('cases_query') + + #遍历动作组配置项 + for index, action in enumerate(cases_query_actions): + + if action.get('action_type') == 'input' and action.get('object_name') == '保险分公司': + + #在赔案查询动作组配置项中新增保险分公司 + cases_query_actions[index].update({'content': insurance_branch}) + + if action.get('action_type') == 'repeat' and isinstance(action.get('button_next_xpath'), str) and action.get('button_next_xpath') != '': + + #修改预期完成任务数 + cases_query_actions[index].update({'expected_tasks': expected_tasks}) + + for nested_index, nested_action in enumerate(cases_query_actions[index].get('actions')): + + #修改预期完成任务数和指定当前环节(若赔案当前环节非指定当前环节则跳过) + if nested_action.get('action_type') == 'cognize' and nested_action.get('object_name') == '当前环节': + + cases_query_actions[index]['actions'][nested_index].update({'content': expected_status}) + + #根据保险分公司匹配保险总公司和审核页面 + if nested_action.get('action_type') == 'extract' and nested_action.get('extractions') is None: + + match insurance_branch: + + case condition if '中银保险有限公司' in insurance_branch or '永诚财产保险股份有限公司' in insurance_branch: + + cases_query_actions[index]['actions'][nested_index].update({'extractions': extractions.get('永诚审核页面')}) + + case condition if '瑞泰人寿保险有限公司' in insurance_branch: + + cases_query_actions[index]['actions'][nested_index].update({'extractions': extractions.get('瑞泰审核页面')}) + + case default: + + raise Exception('保险分公司未定义') + + self.login(insurance = insurance) + + logger.info('正在打开赔案查询获取赔案数据') + + #使用动作解释器打开赔案查询获取赔案数据 + assert self.translator(actions = cases_query_actions), '获取赔案数据时发生异常' + + logger.info('获取赔案数据成功') + + #根据保险总公司自动审核赔案 + def cases_audit(self, expected_index, expected_tasks, insurance): + + #根据保险总公司匹配赔案审核动作组配置项 + match insurance: + + case '瑞泰人寿保险有限公司': + + cases_audit_actions = actions.get('cases_audit_oldmutual_chnenergy') + + case default: + + cases_audit_actions = actions.get('cases_audit') + + for index, action in enumerate(cases_audit_actions): + + if action.get('action_type') == 'repeat': + + #在赔案审核动作组配置项中新增预期索引 + cases_audit_actions[index].update({'expected_index': expected_index}) + + #在赔案审核动作组配置项中新增预期重复数 + cases_audit_actions[index].update({'expected_tasks': expected_tasks}) + + for nested_index, nested_action in enumerate(cases_audit_actions[index].get('actions')): + + #在赔案审核动作组配置项中就等待动作组新增指定内容 + if nested_action.get('action_type') == 'wait_until' and nested_action.get('object_name') == '审核页面的面包屑': + + match insurance: + + case '中银保险有限公司' | '永诚财产保险股份有限公司': + + content = '永诚审核页面' + + case '瑞泰人寿保险有限公司': + + content = '瑞泰审核' + + case default: + + raise Exception('保险总公司未定义') + + cases_audit_actions[index]['actions'][nested_index].update({'content': content}) + + #在赔案审核动作组配置项中就等待动作组新增指定内容 + if nested_action.get('action_type') == 'wait_until' and nested_action.get('object_name') == '赔案号': + + match insurance: + + case '中银保险有限公司' | '永诚财产保险股份有限公司' | '瑞泰人寿保险有限公司': + + #审核页面中赔案号XPATH + object_ = '//*[@id="app"]/div/div/section/main/section[1]/form/div[4]/div[1]/div/div/div' + + case default: + + raise Exception('保险总公司未定义') + + cases_audit_actions[index]['actions'][nested_index].update({'object': object_}) + + #在赔案审核动作组配置项中就点击并切换至动作组配置项新增指定内容 + if nested_action.get('action_type') == 'click_and_switch' and nested_action.get('object_name') == ['查看影像件按钮', '影像件']: + + match insurance: + + case '中银保险有限公司' | '永诚财产保险股份有限公司': + + #查看影像件按钮XPATH + object_ = '//*[@id="app"]/div/div/section/main/section[1]/div[1]/div[1]/div[1]/button[4]' + + case '瑞泰人寿保险有限公司': + + #查看影像件按钮XPATH + object_ = '//*[@id="app"]/div/div/section/main/section[1]/div[1]/div[1]/div[1]/a[2]/button' + + case default: + + raise Exception('保险总公司未定义') + + cases_audit_actions[index]['actions'][nested_index].update({'object': object_}) + + #在赔案审核动作组中就抽取内容新增抽取范围 + if nested_action.get('action_type') == 'extract' and nested_action.get('extractions') is None: + + match insurance: + + case '中银保险有限公司' | '永诚财产保险股份有限公司': + + extractions_ = extractions.get('永诚审核页面') + + case '瑞泰人寿保险有限公司': + + extractions_ = extractions.get('瑞泰审核页面') + case default: + + raise Exception('保险总公司未定义') + + cases_audit_actions[index]['actions'][nested_index].update({'extractions': extractions_}) + + #在赔案审核动作组中就普康自动审核新增响应配置项 + #后续通过可配置化 + if nested_action.get('action_type') == 'cognize' and nested_action.get('cognized_condition') == 'auto_audit': + + match insurance: + + case '中银保险有限公司': + + cases_audit_actions[index]['actions'][nested_index].update({'insurance': insurance}) + + #字段:赔案号 + cases_audit_actions[index]['actions'][nested_index].update({'field_case_number_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[4]/div[1]/div/div/div'}) + + #按钮:修改信息 + cases_audit_actions[index]['actions'][nested_index].update({'button_modify_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/p/span[2]/button'}) + + #表格:票据 + cases_audit_actions[index]['actions'][nested_index].update({'table_invoices_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody'}) + + #字段:票据表格中票据序号(行唯一标识) + cases_audit_actions[index]['actions'][nested_index].update({'field_invoice_identifier_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[4]'}) + + #字段:票据表格中合理金额 + cases_audit_actions[index]['actions'][nested_index].update({'field_reasonable_amounts_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[19]'}) + + #字段:票据表格中部分自付金额 + cases_audit_actions[index]['actions'][nested_index].update({'field_part_self_amounts_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[20]'}) + + #字段:票据表格中全部自付金额 + cases_audit_actions[index]['actions'][nested_index].update({'field_all_self_amounts_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[21]'}) + + #按钮:票据表格中修改 + cases_audit_actions[index]['actions'][nested_index].update({'button_invoice_modify_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[29]/div/button'}) + + #输入框:票据表格中票据部分自费金额 + cases_audit_actions[index]['actions'][nested_index].update({'input_invoice_part_self_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[20]/div/div/div/input'}) + + #输入框:票据表格中票据全部自费金额 + cases_audit_actions[index]['actions'][nested_index].update({'input_invoice_all_self_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[21]/div/div/div/input'}) + + #输入框:票据表格中票据不合理金额 + cases_audit_actions[index]['actions'][nested_index].update({'input_invoice_unreasonable_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[23]/div/div/div/input'}) + + #输入框:票据表格中票据备注 + cases_audit_actions[index]['actions'][nested_index].update({'input_invoice_remark_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[27]/div/div/textarea'}) + + #按钮:票据表格中确定 + cases_audit_actions[index]['actions'][nested_index].update({'button_invoice_confirm_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[29]/div/button[1]'}) + + #按钮:保存 + cases_audit_actions[index]['actions'][nested_index].update({'button_save_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/p/span[1]/button'}) + + #按钮:确认修改 + cases_audit_actions[index]['actions'][nested_index].update({'button_confirm_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/p/span[2]/button'}) + + #复选框:选择保单 + cases_audit_actions[index]['actions'][nested_index].update({'checkbox_select_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody/tr[index]/td[1]/div/label'}) + + #提示:选择保单和理算成功提示 + cases_audit_actions[index]['actions'][nested_index].update({'toast_select': '/html/body/div[5]/p'}) + + #文本:约定标题 + cases_audit_actions[index]['actions'][nested_index].update({'text_stipulation_xpath': '//*[@id="specialTemplate"]/div/div/div[2]/div/form/div[1]'}) + + #按钮:关闭约定 + cases_audit_actions[index]['actions'][nested_index].update({'button_close_stipulation_xpath': '//*[@id="specialTemplate"]/div/div/div[3]/div/button'}) + + #提示:选择保单和理算成功提示 + cases_audit_actions[index]['actions'][nested_index].update({'toast_report': '/html/body/div[8]/p'}) + + #按钮:理算 + cases_audit_actions[index]['actions'][nested_index].update({'button_adjust_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/button[1]'}) + + #字段:理算表格第一行保单保额 + cases_audit_actions[index]['actions'][nested_index].update({'field_insurance_amount_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/div[3]/div[3]/table/tbody/tr/td[4]/div'}) + + #表格:理算 + cases_audit_actions[index]['actions'][nested_index].update({'table_adjustment_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/div[3]/div[3]/table/tbody'}) + + #文本:票据交款人与出险人不一致注意标题 + cases_audit_actions[index]['actions'][nested_index].update({'text_caution_xpath': '/html/body/div/div/div[1]/div/span'}) + + #按钮:票据交款人与出险人不一致注意关闭 + cases_audit_actions[index]['actions'][nested_index].update({'button_close_caution_xpath': '/html/body/div/div/div[3]/button[1]'}) + + #下拉菜单:赔付时理赔结论 + cases_audit_actions[index]['actions'][nested_index].update({'droplist_pay_xpaths': ['//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/form/div[1]/div/div/div/div', '/html/body/div[4]/div[1]/div[1]/ul/li[1]']}) + + #下拉菜单:拒付是理赔结论 + cases_audit_actions[index]['actions'][nested_index].update({'droplist_refuse_xpaths': ['//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/form/div[1]/div/div/div/div', '/html/body/div[last()]/div[1]/div[1]/ul/li[2]']}) + + #输入框:结论原因 + cases_audit_actions[index]['actions'][nested_index].update({'textarea_refuse_remark_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/form/div[2]/div/div/div/div/textarea'}) + + #按钮:通过 + cases_audit_actions[index]['actions'][nested_index].update({'button_audit_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/p/span[1]/button'}) + + #文本:不在保单保障期确认弹窗标题 + cases_audit_actions[index]['actions'][nested_index].update({'text_without_assurance_xpath': '//*[@id="app"]/div/div/section/main/div[1]/div/div[1]/span'}) + + #按钮:不在保单保障期确认弹窗标中确认 + cases_audit_actions[index]['actions'][nested_index].update({'button_confrim_without_assurance_xpath': '//*[@id="app"]/div/div/section/main/div[1]/div/div[3]/button[2]'}) + + #文本:提示标题 + cases_audit_actions[index]['actions'][nested_index].update({'text_prompt_xpath': '//*[@id="errorMessage"]/div/div/div[1]/span'}) + + #按钮:关闭提示 + cases_audit_actions[index]['actions'][nested_index].update({'button_close_prompt_xpath': '//*[@id="errorMessage"]/div/div/div[3]/div/button[1]'}) + + #文本:发票关联影像件 + cases_audit_actions[index]['actions'][nested_index].update({'text_prompt_invoices_xpath': '/html/body/div[5]/div/div[1]/div/span'}) + + #抽取内容 + cases_audit_actions[index]['actions'][nested_index].update({'extractions': extractions_}) + + case '永诚财产保险股份有限公司': + + cases_audit_actions[index]['actions'][nested_index].update({'insurance': insurance}) + + #字段:赔案号 + cases_audit_actions[index]['actions'][nested_index].update({'field_case_number_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[4]/div[1]/div/div/div'}) + + #按钮:修改信息 + cases_audit_actions[index]['actions'][nested_index].update({'button_modify_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/p/span[2]/button'}) + + #表格:票据 + cases_audit_actions[index]['actions'][nested_index].update({'table_invoices_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody'}) + + #字段:票据表格中票据序号(行唯一标识) + cases_audit_actions[index]['actions'][nested_index].update({'field_invoice_identifier_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[2]'}) + + #字段:票据表格中合理金额 + cases_audit_actions[index]['actions'][nested_index].update({'field_reasonable_amounts_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[19]'}) + + #字段:票据表格中部分自付金额 + cases_audit_actions[index]['actions'][nested_index].update({'field_part_self_amounts_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[20]'}) + + #字段:票据表格中全部自付金额 + cases_audit_actions[index]['actions'][nested_index].update({'field_all_self_amounts_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[21]'}) + + #按钮:票据表格中修改 + cases_audit_actions[index]['actions'][nested_index].update({'button_invoice_modify_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[28]/div/button'}) + + #输入框:票据表格中票据部分自费金额 + cases_audit_actions[index]['actions'][nested_index].update({'input_invoice_part_self_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[20]/div/div/div/input'}) + + #输入框:票据表格中票据全部自费金额 + cases_audit_actions[index]['actions'][nested_index].update({'input_invoice_all_self_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[21]/div/div/div/input'}) + #输入框:票据表格中票据不合理金额 + cases_audit_actions[index]['actions'][nested_index].update({'input_invoice_unreasonable_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[23]/div/div/div/input'}) + + #输入框:票据表格中票据备注 + cases_audit_actions[index]['actions'][nested_index].update({'input_invoice_remark_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[27]/div/div/textarea'}) + + #按钮:票据表格中确定 + cases_audit_actions[index]['actions'][nested_index].update({'button_invoice_confirm_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[28]/div/button[1]'}) + + #按钮:保存 + cases_audit_actions[index]['actions'][nested_index].update({'button_save_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/p/span[1]/button'}) + + #按钮:确认修改 + cases_audit_actions[index]['actions'][nested_index].update({'button_confirm_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/p/span[2]/button'}) + + #复选框:选择保单 + cases_audit_actions[index]['actions'][nested_index].update({'checkbox_select_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody/tr[index]/td[1]/div/label'}) + + #提示:选择保单和理算成功提示 + cases_audit_actions[index]['actions'][nested_index].update({'toast_select': '/html/body/div[5]/p'}) + + #文本:约定标题 + cases_audit_actions[index]['actions'][nested_index].update({'text_stipulation_xpath': '//*[@id="specialTemplate"]/div/div/div[2]/div/form/div[1]'}) + + #按钮:关闭约定 + cases_audit_actions[index]['actions'][nested_index].update({'button_close_stipulation_xpath': '//*[@id="specialTemplate"]/div/div/div[3]/div/button'}) + + #按钮:报案 + cases_audit_actions[index]['actions'][nested_index].update({'button_report_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/button[3]'}) + + #按钮:确定报案 + cases_audit_actions[index]['actions'][nested_index].update({'button_report_confirm_xpath': '//*[@id="app"]/div/div/section/main/section[14]/div/div/div[3]/div/button[1]'}) + + #提示:选择保单和理算成功提示 + cases_audit_actions[index]['actions'][nested_index].update({'toast_report': '/html/body/div[8]/p'}) + + #按钮:理算 + cases_audit_actions[index]['actions'][nested_index].update({'button_adjust_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/button[1]'}) + + #字段:理算表格第一行保单保额 + cases_audit_actions[index]['actions'][nested_index].update({'field_insurance_amount_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/div[3]/div[3]/table/tbody/tr/td[4]/div'}) + + #表格:理算 + cases_audit_actions[index]['actions'][nested_index].update({'table_adjustment_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/div[3]/div[3]/table/tbody'}) + + #文本:票据交款人与出险人不一致注意标题 + cases_audit_actions[index]['actions'][nested_index].update({'text_caution_xpath': '/html/body/div/div/div[1]/div/span'}) + + #按钮:票据交款人与出险人不一致注意关闭 + cases_audit_actions[index]['actions'][nested_index].update({'button_close_caution_xpath': '/html/body/div/div/div[3]/button[1]'}) + + #下拉菜单:赔付时理赔结论 + cases_audit_actions[index]['actions'][nested_index].update({'droplist_pay_xpaths': ['//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/form/div[1]/div/div/div/div', '/html/body/div[4]/div[1]/div[1]/ul/li[1]']}) + + #下拉菜单:拒付是理赔结论 + cases_audit_actions[index]['actions'][nested_index].update({'droplist_refuse_xpaths': ['//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/form/div[1]/div/div/div/div', '/html/body/div[last()]/div[1]/div[1]/ul/li[2]']}) + + #输入框:结论原因 + cases_audit_actions[index]['actions'][nested_index].update({'textarea_refuse_remark_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/form/div[2]/div/div/div/div/textarea'}) + + #按钮:通过 + cases_audit_actions[index]['actions'][nested_index].update({'button_audit_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/p/span[1]/button[2]'}) + + #文本:不在保单保障期确认弹窗标题 + cases_audit_actions[index]['actions'][nested_index].update({'text_without_assurance_xpath': '//*[@id="app"]/div/div/section/main/div[1]/div/div[1]/span'}) + + #按钮:不在保单保障期确认弹窗标中确认 + cases_audit_actions[index]['actions'][nested_index].update({'button_confrim_without_assurance_xpath': '//*[@id="app"]/div/div/section/main/div[1]/div/div[3]/button[2]'}) + + #文本:提示标题 + cases_audit_actions[index]['actions'][nested_index].update({'text_prompt_xpath': '//*[@id="errorMessage"]/div/div/div[1]/span'}) + + #按钮:关闭提示 + cases_audit_actions[index]['actions'][nested_index].update({'button_close_prompt_xpath': '//*[@id="errorMessage"]/div/div/div[3]/div/button[1]'}) + + #提示:审核成功提示 + cases_audit_actions[index]['actions'][nested_index].update({'toast_audit': '/html/body/div[6]/p'}) + + #抽取内容 + cases_audit_actions[index]['actions'][nested_index].update({'extractions': extractions_}) + + case '瑞泰人寿保险有限公司': + + cases_audit_actions[index]['actions'][nested_index].update({'insurance': insurance}) + + #表格:保单 + cases_audit_actions[index]['actions'][nested_index].update({'table_slips_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody'}) + + #字段:赔案号 + cases_audit_actions[index]['actions'][nested_index].update({'field_case_number_xpath': '//*[@id="app"]/div/div/section/main/section[1]/form/div[4]/div[1]/div/div/div'}) + + #按钮:修改信息 + cases_audit_actions[index]['actions'][nested_index].update({'button_modify_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/p/span[2]/button'}) + + #选择:修改原因 + cases_audit_actions[index]['actions'][nested_index].update({'droplist_modify_xpath': ['//*[@id="app"]/div/div/section/main/section[5]/div/div/div[2]/div/form/div[1]/div/div/div/input', '/html/body/div[5]/div[1]/div/div[1]/ul/li[1]', '/html/body/div[5]/div[1]/div[2]/div[1]/ul/li[1]']}) + + #输入框:修改原因 + cases_audit_actions[index]['actions'][nested_index].update({'textarea_modify_xpath': '//*[@id="app"]/div/div/section/main/section[5]/div/div/div[2]/div/form/div[2]/div/div/textarea'}) + + #按钮:确定修改 + cases_audit_actions[index]['actions'][nested_index].update({'button_modification_confirm_xpath': '//*[@id="app"]/div/div/section/main/section[5]/div/div/div[3]/div/button[1]'}) + + #表格:票据 + cases_audit_actions[index]['actions'][nested_index].update({'table_invoices_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody'}) + + #字段:票据表格中票据序号(行唯一标识) + cases_audit_actions[index]['actions'][nested_index].update({'field_invoice_identifier_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[2]'}) + + #字段:票据表格中合理金额 + cases_audit_actions[index]['actions'][nested_index].update({'field_reasonable_amounts_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[20]'}) + + #字段:票据表格中部分自付金额 + cases_audit_actions[index]['actions'][nested_index].update({'field_part_self_amounts_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[21]'}) + + #字段:票据表格中全部自付金额 + cases_audit_actions[index]['actions'][nested_index].update({'field_all_self_amounts_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[22]'}) + + #按钮:票据表格中修改 + cases_audit_actions[index]['actions'][nested_index].update({'button_invoice_modify_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[30]/div/button'}) + + #输入框:票据表格中票据部分自费金额 + cases_audit_actions[index]['actions'][nested_index].update({'input_invoice_part_self_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[21]/div/div/div/input'}) + + #输入框:票据表格中票据全部自费金额 + cases_audit_actions[index]['actions'][nested_index].update({'input_invoice_all_self_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[22]/div/div/div/input'}) + + #输入框:票据表格中票据不合理金额 + cases_audit_actions[index]['actions'][nested_index].update({'input_invoice_unreasonable_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[24]/div/div/div/input'}) + + #输入框:票据表格中票据备注 + cases_audit_actions[index]['actions'][nested_index].update({'input_invoice_remark_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[26]/div/div/textarea'}) + + #按钮:票据表格中确定 + cases_audit_actions[index]['actions'][nested_index].update({'button_invoice_confirm_xpath': '//*[@id="pane-first"]/div/div[3]/table/tbody/tr[index]/td[30]/div/button[1]'}) + + #按钮:保存 + cases_audit_actions[index]['actions'][nested_index].update({'button_save_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/p/span[1]/button'}) + + #按钮:确认修改 + cases_audit_actions[index]['actions'][nested_index].update({'button_confirm_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/p/span[2]/button'}) + + #复选框:选择保单 + cases_audit_actions[index]['actions'][nested_index].update({'checkbox_select_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[4]/div[3]/table/tbody/tr[index]/td[1]/div/label'}) + + #提示:选择保单和理算成功提示 + cases_audit_actions[index]['actions'][nested_index].update({'toast_select': '/html/body/div[5]/p'}) + + #文本:约定标题 + cases_audit_actions[index]['actions'][nested_index].update({'text_stipulation_xpath': '//*[@id="specialTemplate"]/div/div/div[2]/div/form/div[1]'}) + + #按钮:关闭约定 + cases_audit_actions[index]['actions'][nested_index].update({'button_close_stipulation_xpath': '//*[@id="specialTemplate"]/div/div/div[3]/div/button'}) + + #按钮:理算 + cases_audit_actions[index]['actions'][nested_index].update({'button_adjust_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/button[1]'}) + + #字段:理算表格第一行保单保额 + #cases_audit_actions[index]['actions'][nested_index].update({'field_insurance_amount_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/div[3]/div[3]/table/tbody/tr/td[4]/div'}) + + #表格:理算 + cases_audit_actions[index]['actions'][nested_index].update({'table_adjustment_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[6]/div[3]/div[3]/table/tbody'}) + + #文本:票据交款人与出险人不一致注意标题 + cases_audit_actions[index]['actions'][nested_index].update({'text_caution_xpath': '/html/body/div/div/div[1]/div/span'}) + + #按钮:票据交款人与出险人不一致注意关闭 + cases_audit_actions[index]['actions'][nested_index].update({'button_close_caution_xpath': '/html/body/div/div/div[3]/button[1]'}) + + #下拉菜单:赔付时理赔结论 + cases_audit_actions[index]['actions'][nested_index].update({'droplist_pay_xpaths': ['//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/form/div[1]/div/div/div/div', '/html/body/div[4]/div[1]/div[1]/ul/li[1]']}) + + #下拉菜单:拒付是理赔结论 + cases_audit_actions[index]['actions'][nested_index].update({'droplist_refuse_xpaths': ['//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/form/div[1]/div/div/div/div', '/html/body/div[last()]/div[1]/div[1]/ul/li[2]']}) + + #输入框:结论原因 + cases_audit_actions[index]['actions'][nested_index].update({'textarea_refuse_remark_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/form/div[2]/div/div/div/div/textarea'}) + + #按钮:通过 + cases_audit_actions[index]['actions'][nested_index].update({'button_audit_xpath': '//*[@id="app"]/div/div/section/main/section[1]/div[5]/div[8]/p/span[1]/button[2]'}) + + #文本:不在保单保障期确认弹窗标题 + cases_audit_actions[index]['actions'][nested_index].update({'text_without_assurance_xpath': '//*[@id="app"]/div/div/section/main/div[1]/div/div[1]/span'}) + + #按钮:不在保单保障期确认弹窗标中确认 + cases_audit_actions[index]['actions'][nested_index].update({'button_confrim_without_assurance_xpath': '//*[@id="app"]/div/div/section/main/div[1]/div/div[3]/button[2]'}) + + #文本:提示标题 + cases_audit_actions[index]['actions'][nested_index].update({'text_prompt_xpath': '//*[@id="errorMessage"]/div/div/div[1]/span'}) + + #按钮:关闭提示 + cases_audit_actions[index]['actions'][nested_index].update({'button_close_prompt_xpath': '//*[@id="errorMessage"]/div/div/div[3]/div/button[1]'}) + + #提示:审核成功提示 + cases_audit_actions[index]['actions'][nested_index].update({'toast_audit': '/html/body/div[6]/p'}) + + #抽取内容 + cases_audit_actions[index]['actions'][nested_index].update({'extractions': extractions_}) + + case default: + + raise Exception('保险总公司未定义') + + self.login(insurance = insurance) + + logger.info('正在打开赔案审核管理审核') + + #使用动作解释器打开赔案审核管理审核 + assert self.translator(actions = cases_audit_actions), '自动审核时发生异常' + + logger.info('自动审核成功') + +''' + +程序说明:启动普康健康机器人 + +''' + +if __name__ == '__main__': + + #创建日志记录器 + logger = Logger(logger_name = 'audit_robot').get_logger() + + try: + + #创建普康健康机器人 + robot = PKRobot() + + match input('请选择作业类型(1获取中银保赔案数据,2自动审核中银保赔案,3获取永诚赔案数据,4自动审核永诚赔案,6自动审核瑞泰赔案):'): + + #获取中银保赔案数据 + case '1': + + match input('请选择保险分公司(1中银保险有限公司广州分公司):'): + + case '1': + + expected_insurance_branch = '中银保险有限公司广州分公司' + + case default: + + logger.info('所输入保险分公司未定义,脚本终止') + + expected_tasks = int(input('请输入预期完成任务数:')) + + robot.cases_query(expected_tasks = expected_tasks, expected_insurance_branch = expected_insurance_branch, expected_status = '推送') + + #自动审核中银保赔案 + case '2': + + expected_index = int(input('请输入预期索引:')) + + robot.cases_audit(expected_index = expected_index, expected_tasks = 200, insurance = '中银保险有限公司') + + #获取永诚赔案数据 + case '3': + + match input('请选择保险分公司(1永诚财产保险股份有限公司黑龙江分公司):'): + + case '1': + + expected_insurance_branch = '永诚财产保险股份有限公司黑龙江分公司' + + case default: + + logger.info('所输入保险分公司未定义,脚本终止') + + expected_tasks = int(input('请输入预期完成任务数:')) + + robot.cases_query(expected_tasks = expected_tasks, expected_insurance_branch = expected_insurance_branch, expected_status = '推送') + + #自动审核永诚赔案 + case '4': + + expected_index = int(input('请输入预期索引:')) + + robot.cases_audit(expected_index = expected_index, expected_tasks = 200, insurance = '永诚财产保险股份有限公司') + + #自动审核瑞泰赔案 + case '6': + + expected_index = int(input('请输入预期索引:')) + + robot.cases_audit(expected_index = expected_index, expected_tasks = 200, insurance = '瑞泰人寿保险有限公司') + + case default: + + logger.info('所输入作业类型未定义,脚本终止') + + except Exception as e: + + print(e) + + logger.info('发生异常,脚本终止') + + + + +''' + + #点击查看影像件按钮并切换至影像件 + { + + 'action_type': 'click_and_switch', + + 'object_name': ['查看影像件按钮', '影像件'] + + }, + + { + + 'action_type': 'wait_until', + + 'expected_condition': 'element_text_is_not', + + 'object_name': '影像件页面中张数', + + 'object': '//*[@id="app"]/div/div/section/div/div[1]/div[1]/span[3]', + + 'content': '影像共计 0 张' + + }, + + { + + 'action_type': 'repeat', + + 'object_name': '点击顶部菜单-发票,显示每一张影像件以获取数据', + + 'table_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]', + + 'row_identifier_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + 'actions': [ + + { + + 'action_type': 'click', + + 'object_name': '发票影像件', + + 'object': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]' + + }, + + { + + + 'action_type': 'extract', + + 'extractions': [ + + { + + 'field': '票据影像件序号', + + 'field_xpath': '//*[@id="app"]/div/div/section/div/div[1]/div[1]/span[4]' + + } + + ] + + } + + ] + + }, + + { + + + 'action_type': 'click', + + 'object_name': '申请书标签', + + 'object': '//*[@id="tab-3"]', + + }, + + { + + + 'action_type': 'repeat', + + 'object_name': '点击顶部菜单-申请书,显示每一张影像件以获取数据', + + 'table_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]', + + 'row_identifier_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + 'actions': [ + + { + + + 'action_type': 'click', + + 'object_name': '申请书影像件', + + 'object': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + }, + + { + + 'action_type': 'extract', + + 'extractions': [ + + { + + 'field': '申请书影像件序号', + + 'field_xpath': '//*[@id="app"]/div/div/section/div/div[1]/div[1]/span[4]' + + } + + ] + + } + + ] + + }, + + { + + 'action_type': 'click', + + 'object_name': '身份证资料标签', + + 'object': '//*[@id="tab-4"]', + + }, + + { + + 'action_type': 'repeat', + + 'object_name': '点击顶部菜单-身份证资料,显示每一张影像件以获取数据', + + 'table_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]', + + 'row_identifier_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + 'actions': [ + + { + + 'action_type': 'click', + + 'object_name': '身份证影像件', + + 'object': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + }, + + { + + 'action_type': 'extract', + + 'extractions': [ + + { + + 'field': '证件影像件类型', + + 'field_xpath': '//*[@id="app"]/div/div/section/div/div[1]/div[2]/div[1]/div/div[1]/input' + + } + + ] + + } + + ] + + }, + + { + + 'action_type': 'click', + + 'object_name': '未分类标签', + + 'object': '//*[@id="tab-6"]', + + }, + + { + + 'action_type': 'repeat', + + 'object_name': '点击顶部菜单-未分类,显示每一张影像件以获取数据', + + 'table_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]', + + 'row_identifier_xpath': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + 'actions': [ + + { + + 'action_type': 'click', + + 'object_name': '未分类影像件', + + 'object': '//*[@id="app"]/div/div/section/div/div[3]/div[3]/div[2]/div[index]', + + }, + + { + + 'action_type': 'extract', + + 'extractions': [ + + { + + 'field': '未分类影像件序号', + + 'field_xpath': '//*[@id="app"]/div/div/section/div/div[1]/div[1]/span[4]' + + } + + ] + + } + + ] + + }, + + { + + 'action_type': 'close_and_switch', + + 'object_name': ['影像件', '审核页面'] + + }, + +''' \ No newline at end of file diff --git a/普康健康审核机器人/report/main.py b/普康健康审核机器人/report/main.py new file mode 100644 index 0000000..7bc31fe --- /dev/null +++ b/普康健康审核机器人/report/main.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- + +''' + +脚本说明:将传统理赔已质检的赔案数据进行解析命否规则 + +''' + +#导入模块 + +import json + +import pandas + +import os + +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))) + +from utils.pandas_extension import SaveAsExcel + +print('正在打开文件并抽取数据...', end = '') + +#打开源文件并读取数据 +dataset = pandas.read_csv(filepath_or_buffer = 'dataset.csv') + +print('已完成') +print() + +print('正在转换数据...', end = '') + +#转换函数,用于将命否规则中结果返回 +def transform(rules): + + result = [] + + try: + + #替换规则结果中部分字符串 + rules = rules.replace('true', 'True').replace('false', 'False') + + #表达化 + rules = eval(rules) + + #若规则结果数据类型为列表则遍历,获取命否的规则编码并拼接 + if isinstance(rules, list): + + #遍历规则结果 + for rule in rules: + + #若规则命否则将规则获取规则编码 + if not rule.get('success'): + + #添加命否规则编码 + result.append(rule.get('ruleCode')) + + except: + + result.append('failure') + + return ','.join(result) + +#清洗命否规则 +dataset['清洗后命否规则'] = dataset['命否规则'].apply(func = lambda x: transform(x)) + +#解析预期进入自动审核字段 +dataset['预期进入自动审核'] = dataset['清洗后命否规则'].apply(func = lambda x: '是' if x == '' else '否') + +#解析实际 +dataset['自动审核通过复核'] = dataset.apply(func = lambda x: '是' if x['预期进入自动审核'] == '是' and x['审核员'] == '自动审核' else ('否' if x['预期进入自动审核'] == '是' and x['审核员'] != '自动审核' else ''), axis = 'columns') + +dataset = dataset[['赔案号', '质检完成时间', '预期进入自动审核', '自动审核通过复核', '清洗后命否规则']] + +#数据集透视 +pivot_table = dataset.groupby(by = '质检完成时间').agg(质检完成赔案数 = pandas.NamedAgg(column = '质检完成时间', aggfunc = lambda x: pandas.notna(x).sum()), 预期进入自动审核赔案数 = pandas.NamedAgg(column = '预期进入自动审核', aggfunc = lambda x: sum(x == '是')), 自动审核通过复核赔案数 = pandas.NamedAgg(column = '自动审核通过复核', aggfunc = lambda x: sum(x == '是'))) + +#按照质检完成时间倒序排序 +pivot_table.sort_values(by = '质检完成时间', ascending = False, inplace = True) + +#重置索引 +pivot_table.reset_index(inplace = True) + +#计算预期进入自动化审核率 +pivot_table['预期进入自动审核率'] = pivot_table.apply(lambda x: round(x['预期进入自动审核赔案数'] / x['质检完成赔案数'] * 100, 2) if x['质检完成赔案数'] != 0 else None, axis = 'columns') + +#计算预期进入自动化审核率 +pivot_table['自动审核复核通过率'] = pivot_table.apply(lambda x: round(x['自动审核通过复核赔案数'] / x['预期进入自动审核赔案数'] * 100, 2) if x['预期进入自动审核赔案数'] != 0 else None, axis = 'columns') + +#添加汇总行 +pivot_table.loc[pivot_table.shape[0]] = ['汇总', pivot_table['质检完成赔案数'].sum(), pivot_table['预期进入自动审核赔案数'].sum(), pivot_table['自动审核通过复核赔案数'].sum(), round(pivot_table['预期进入自动审核赔案数'].sum() / pivot_table['质检完成赔案数'].sum() * 100, 2), round(pivot_table['自动审核通过复核赔案数'].sum() / pivot_table['预期进入自动审核赔案数'].sum() * 100, 2)] + +print('已完成') +print() + +print('正在将数据保存为EXCEL...', end = '') + +SaveAsExcel(worksheets = [['按照质检完成时间汇总', pivot_table], ['明细数据', dataset]], file_path = '普康健康_自动审核统计数据.xlsx') + +print('已完成') +print() + +''' + +修改记录 + +1、241220 就数据转换类型进行调整,使用封装函数保存为EXCEL,另外处理步骤参考抽取、转换和加载 + +''' \ No newline at end of file diff --git a/普康健康影像处理/main.py b/普康健康影像处理/main.py new file mode 100644 index 0000000..c4f501f --- /dev/null +++ b/普康健康影像处理/main.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- + +""" +普康健康_影像处理 +""" + +# 加载模块 + +from pathlib import Path + +import zipfile + +from utils.pandas_extension import open_csv, save_as_workbook + + +if __name__ == "__main__": + + print("正在本地加载待处理影像信息...", end="") + + dataframe = open_csv(file_name="待处理影像信息.csv") + + dataframe["处理结果"] = "待处理" + + # 统计行数 + rows = dataframe.shape[0] + + print(f"已完成,行数为 {rows}") + + print("正在处理影像件...", end="") + + for index, row in dataframe.iterrows(): + + # 推送月份 + push_month = row["推送月份"] + + # 影像件名称 + image_name = row["影像件名称"] + + # 推送山东国网影像件名称 + push_name = row["推送山东国网影像件名称"] + + # 压缩包名称 + zip_name = row["压缩包名称"] + + # 处理结果 + result = "发生其它异常" + + # 创建影像件PATH对象 + image = Path(f"images/{push_month}/{image_name}") + + # 若影像件PATH对象不存在则处理结果为“未找到影像件” + if not image.exists(): + + result = "未找到影像件" + + continue + + # ZIP压缩 + with zipfile.ZipFile(file=f"zips/{zip_name}", mode='w', compression=zipfile.ZIP_DEFLATED) as file: + + # 压缩包内影像件名称为推送山东国网影像件名称 + file.write(image, arcname=push_name) # 指定在压缩包内影像件名称 + + result = "处理成功" + + dataframe.loc[index, "处理结果"] = result + + print("已完成") + + print("正在保存为工作簿...", end="") + + save_as_workbook(worksheets=[("Sheet1", dataframe)], workbook_name="results.xlsx") + + print("已完成") + + diff --git a/普康健康自动化录入/main.py b/普康健康自动化录入/main.py new file mode 100644 index 0000000..c7fe829 --- /dev/null +++ b/普康健康自动化录入/main.py @@ -0,0 +1,1567 @@ +# -*- coding: utf-8 -*- + +""" +普康健康_自动化录入 + +--优先使用深圳快瞳,就增值税发票、医疗发票优先使用深圳快瞳票据查验、其次使用深圳快瞳票据识别,最后使用本地识别 +--优先考虑增值税发票 + +""" +import hashlib +import json +import re +import uuid +from base64 import b64encode +from datetime import datetime +from decimal import Decimal, ROUND_HALF_UP +from pathlib import Path + +import cv2 +import numpy +import pandas +from dateutil.parser import parse +from jinja2 import Environment, FileSystemLoader +from jionlp import parse_location +from zen import ZenDecision, ZenEngine + +from utils.client import Authenticator, HTTPClient +from utils.ocr import fuzzy_match + + +# ------------------------- +# 工具函数 +# ------------------------- + + +def images_compression(**kwargs) -> tuple[str | None, str | None]: + """影像件压缩并BASE64编码""" + + # 影像件格式 + image_format = kwargs.get("image_format", globals()["image_format"]) + if image_format is None: + return None, None + + # 影像件 + image = kwargs.get("image", globals()["image"]) + if image is None: + return None, None + + # 编码为图像字节流 + success, image_bytes = cv2.imencode(ext=f".{image_format}", img=image) + # 若发生异常则返回NONE + if not success or image_bytes is None: + return None, None + + # 生成影像件唯一标识 + image_guid = hashlib.md5(image_bytes.tobytes()).hexdigest().upper() + + # BASE64编码 + image_base64 = b64encode(image_bytes.tobytes()).decode("utf-8") + + # 将指定影像件大小单位由MB转为B + image_size_specified = kwargs.get("image_size_specified", 2) * 1048576 + + # 若影像件BASE64编码后大小小于指定影像件大小则返回 + if len(image_base64) <= image_size_specified: + return image_guid, image_base64 + + # 双循环压缩影像件 + # 外循环压缩:通过降低影像件质量实现压缩影像件大小 + for quality in range(90, 0, -10): + + image_copy = image.copy() + + # 内循环压缩:通过等比例调整影像件尺寸实现压缩影像件大小 + for i in range(25): + + success, image_bytes = cv2.imencode( + ext=f".{image_format}", + img=image_copy, + params=( + [cv2.IMWRITE_PNG_COMPRESSION, 10 - quality // 10] + if image_format == "png" + else [cv2.IMWRITE_JPEG_QUALITY, quality] + ), + ) + if not success or image_bytes is None: + break + + image_base64 = b64encode(image_bytes.tobytes()).decode("utf-8") + if len(image_base64) <= image_size_specified: + return image_guid, image_base64 + + # 调整影像件尺寸 + image_copy = cv2.resize( + src=image_copy, + dsize=(int(image_copy.shape[0] * 0.9), int(image_copy.shape[1] * 0.9)), + interpolation=cv2.INTER_AREA, + ) + + # 若调整后影像件尺寸中长或宽小于200像素则停止调整影像件尺寸 + if min(image_copy.shape[:2]) < 200: + break + + # 若仍未压缩至指定影像件大小则返回NONE + return None, None + + +def images_classification(**kwargs) -> tuple[str | None, str | None]: + """影像件分类""" + + # 影像件全局唯一标识:优先使用关键词变量,其次使用全局变量,再次使用随机唯一标识 + image_guid = kwargs.get( + "image_guid", globals().get("image_guid", uuid.uuid4().hex.upper()) + ) + + # 影像件格式 + image_format = kwargs.get("image_format", globals()["image_format"]) + if image_format is None: + return None, None + + # 影像件BASE64编码 + image_base64 = kwargs.get("image_base64", globals()["image_base64"]) + if image_base64 is None: + return None, None + + # 请求深圳快瞳影像件分类接口 + response = globals()["http_client"].post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/genalClassify"), + # 用于和深圳快瞳联查定位 + headers={"X-RequestId-Header": image_guid}, + data={ + "token": globals()["authenticator"].get_token( + servicer="szkt" + ), # 使用全局变量 + "imgBase64": f"data:image/{image_format};base64,{image_base64}", # 深圳快瞳要求修饰影像件BASE64编码的DATAURI + }, + guid=hashlib.md5((url + image_guid).encode("utf-8")).hexdigest().upper(), + ) + + # 若响应非成功,则返回NONE + if not (response.get("status") == 200 and response.get("code") == 0): + return None, None + + # 根据票据类型和票据二级分类映射影像件类型 + match (response["data"]["flag"], response["data"]["type"]): + case (7, "idcard-front-back"): + image_type = "居民身份证(正背面)" + case (7, "idcard-front"): + image_type = "居民身份证(正面)" # 包含国徽一面 + case (7, "idcard-back"): + image_type = "居民身份证(背面)" # 包含头像一面 + case (8, _): + image_type = "银行卡" + case (4, _): + image_type = "增值税发票" + case (5, _): + image_type = "门诊收费票据" + case (3, _): + image_type = "住院收费票据" + case (18, _): + image_type = "理赔申请书" + case _: + return None, None + + # 影像件方向 + image_orientation = { + "0": "0度", + "90": "顺时针90度", + "180": "180度", + "270": "逆时针90度", + }.get(response["data"]["angle"], "0度") + + return image_type, image_orientation + + +def idcard_extraction(**kwargs) -> dict | None: + """居民身份证数据提取""" + + # 影像件全局唯一标识:优先使用关键词变量,其次使用全局变量,再次使用随机唯一标识 + image_guid = kwargs.get( + "image_guid", globals().get("image_guid", uuid.uuid4().hex.upper()) + ) + + # 影像件格式 + image_format = kwargs.get("image_format", globals()["image_format"]) + if image_format is None: + return None + + # 影像件BASE64编码 + image_base64 = kwargs.get("image_base64", globals()["image_base64"]) + if image_base64 is None: + return None + + # 请求深圳快瞳居民身份证识别接口 + response = globals()["http_client"].post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/identityCard"), + headers={"X-RequestId-Header": image_guid}, + data={ + "token": globals()["authenticator"].get_token( + servicer="szkt" + ), # 使用全局变量 + "imgBase64": f"data:image/{image_format};base64,{image_base64}", + }, # 支持同时识别居民身份证正反面 + guid=hashlib.md5((url + image_guid).encode("utf-8")).hexdigest().upper(), + ) + + # 若响应非成功,则返回NONE + if not (response.get("status") == 200 and response.get("code") == 0): + return None + + extraction = { + "姓名": response["data"]["name"], + "性别": response["data"]["sex"], + "民族": response["data"]["nation"], + "出生": response["data"][ + "birthday" + ], # 深圳快瞳居民身份证出生日期格式为%Y-%m-%d + "住址": response["data"]["address"], + "公民身份号码": response["data"]["idNo"], + "签发机关": response["data"]["issuedBy"], + "有效期起": parse( + (date := response["data"]["validDate"]).split("-")[0] + ).strftime( + "%Y-%m-%d" + ), # 深圳快瞳居民身份证识别中有效期日期格式为%Y.%m.%d,转为%Y-%m-%d + "有效期止": ( + date + if (date := date.split("-")[1]) == "长期" + else parse(date).strftime("%Y-%m-%d") + ), + } + + return extraction + + +def bankcard_extraction(**kwargs) -> dict | None: + """银行卡数据提取""" + + # 影像件全局唯一标识:优先使用关键词变量,其次使用全局变量,再次使用随机唯一标识 + image_guid = kwargs.get( + "image_guid", globals().get("image_guid", uuid.uuid4().hex.upper()) + ) + + # 影像件格式 + image_format = kwargs.get("image_format", globals()["image_format"]) + if image_format is None: + raise RuntimeError("请入参:image_format") + + # 影像件BASE64编码 + image_base64 = kwargs.get("image_base64", globals()["image_base64"]) + if image_base64 is None: + raise RuntimeError("请入参:image_base64") + + # 请求深圳快瞳银行卡识别接口 + response = globals()["http_client"].post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/bankCard"), + headers={"X-RequestId-Header": image_guid}, + data={ + "token": globals()["authenticator"].get_token(servicer="szkt"), + "imgBase64": f"data:image/{image_format};base64,{image_base64}", + }, + guid=hashlib.md5((url + image_guid).encode("utf-8")).hexdigest().upper(), + ) + + # 若响应非成功,则返回NONE + if not (response.get("status") == 200 and response.get("code") == 0): + return None + + extraction = { + "卡类型": {"1": "借记卡", "2": "贷记卡"}.get( + response["data"]["bankCardType"], "其它" + ), # 0不能识别、3准贷记卡、4预付卡合并为其它 + "银行名称": response["data"]["bankInfo"], + "卡号": response["data"]["cardNo"].replace(" ", ""), + } + + return extraction + + +def invoice_extraction(**kwargs) -> dict | None: + """增值税发票/收费票据数据提取""" + + # 影像件全局唯一标识:优先使用关键词变量,其次使用全局变量,再次使用随机唯一标识 + image_guid = kwargs.get( + "image_guid", globals().get("image_guid", uuid.uuid4().hex.upper()) + ) + + # 影像件格式 + image_format = kwargs.get("image_format", globals()["image_format"]) + if image_format is None: + return None + + # 影像件BASE64编码 + image_base64 = kwargs.get("image_base64", globals()["image_base64"]) + if image_base64 is None: + return None + + try: + # 请求深圳快瞳票据查验接口(兼容增值税发票、医疗门诊/住院收费票据) + response = globals()["http_client"].post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll"), + headers={"X-RequestId-Header": image_guid}, + data={ + "token": globals()["authenticator"].get_token(servicer="szkt"), + "imgBase64": f"data:image/{image_format};base64,{image_base64}", + }, + guid=hashlib.md5((url + image_guid).encode("utf-8")).hexdigest().upper(), + ) + if not (response.get("status") == 200 and response.get("code") == 10000): + raise RuntimeError("深圳快瞳票据查验发生异常") + + match response["data"]["productCode"]: + case "003082": # 增值税发票 + extraction = { + "票据类型": { + "10108": "数电票", + "10101": "增值税普通发票", + "10100": "增值税专用发票", + "30100": "数电票", + "30101": "数电票", + "30104": "增值税专用发票", + "30105": "数电票", + "10106": "区块链电子发票", + "30109": "数电票", + "30121": "增值税普通发票", + "10102": "增值税普通发票", + "10103": "增值税普通发票", + "10107": "数电票", + }.get(response["data"]["type"], "其它增值税发票"), + "票据号码": response["data"]["details"]["number"], + "票据代码": ( + code if (code := response["data"]["details"]["code"]) else None + ), # 深圳快瞳票据查验中数电票票据代码为空字符,转为NONE + "开票日期": datetime.strptime( + response["data"]["details"]["date"], "%Y年%m月%d日" + ).strftime( + "%Y-%m-%d" + ), # 深圳快瞳票据查验中就增值税发票开票日期格式为%Y年%m月%d日,转为%Y-%m-%d + "校验码": response["data"]["details"]["check_code"], + "收款方": response["data"]["details"]["seller"], + "付款方": response["data"]["details"]["buyer"], + "票据金额": format( + Decimal(response["data"]["details"]["total"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "查验状态": ( + "真票" + if response["data"]["details"]["invoiceTypeNo"] == "0" + else "红票" + ), + "备注": ( + remark + if (remark := response["data"]["details"]["remark"]) + else None + ), # 深圳快瞳票据查验中增值税发票备注可能为空字符,转为NONE + "项目": [ + { + "名称": item["name"], + "规格": ( + specification + if (specification := item["specification"]) + else None + ), + "单位": unit if (unit := item["unit"]) else None, + "数量": ( + format( + Decimal(quantity).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if (quantity := item["quantity"]) + else None + ), + "金额": format( + ( + Decimal(item["total"]) + Decimal(item["tax"]) + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), # 价税合计 + } + for item in response["data"]["details"].get("items", []) + ], + } # 深圳快瞳票据查验中就部分增值税发票仅可查,数据标准化抛出异常 + return extraction + + case "003081": # 门诊/住院收费票据 + extraction = { + "票据类型": ( + "门诊收费票据" + if "门诊" in response["data"]["billName"] + else "住院收费票据" + ), + "票据号码": response["data"]["billNumber"], + "票据代码": response["data"]["billCode"], + "开票日期": response["data"][ + "invoiceDate" + ], # 深圳快瞳票据查验中就收费票据开票日期格式为%Y-%m-%d + "校验码": response["data"]["checkCode"], + "收款方": response["data"]["payeeName"], + "付款方": response["data"]["payer"], + "票据金额": format( + Decimal(response["data"]["amount"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "查验状态": {"true": "真票", "false": "红票"}[ + response["data"]["flushedRed"] + ], + "备注": response["data"].get("remark"), + "医保支付": format( + Decimal(response["data"].get("medicarePay", "0.00")).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "其它支付": format( + Decimal(response["data"].get("otherPayment", "0.00")).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "个人自付": format( + Decimal(response["data"].get("personalPay", "0.00")).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "自付一": format( + Decimal(response["data"].get("self_pay_one", "0.00")).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), # 深圳快瞳票据查验中就部分地区无自付一 + "自付二": format( + Decimal( + response["data"].get("classificationPays", "0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), # 深圳快瞳票据查验中就部分地区无自付二 + "个人自费": format( + Decimal( + response["data"].get("personalExpense", "0.00") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "住院日期": ( + parse(date.split("-")[0]).strftime("%Y-%m-%d") + if (date := response["data"].get("hospitalizationDate")) + else None + ), # 深圳快瞳票据查验中就收费票据住院日期格式为%Y%m%d-%Y%m%d,即住院日期-出院日期 + "出院日期": ( + parse(date.split("-")[1]).strftime("%Y-%m-%d") if date else None + ), + "医疗机构类型": response["data"]["institutionsType"], + "项目": [ + { + "名称": item["itemName"], + "规格": item[ + "medical_level" + ], # 甲类无自付、乙类有自付、丙类全自付 + "单位": item["unit"], + "数量": format( + Decimal(item["number"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "金额": format( + Decimal(item["totalAmount"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + } + for item in response["data"]["feedetails"] + ], + } + return extraction + # 若请求深圳快瞳票据查验接口或解析发生异常,则根据影像件类型请求深圳快瞳增值税发票/收费票据识别接口 + except: + # 影像件类型 + image_type = kwargs.get("image_type", globals()["image_type"]) + if image_type is None: + return None + + match image_type: + case "增值税发票": + try: + # 请求深圳快瞳增值税发票识别接口 + response = globals()["http_client"].post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/vatInvoice"), + headers={"X-RequestId-Header": image_guid}, + data={ + "token": globals()["authenticator"].get_token( + servicer="szkt" + ), + "imgBase64": f"data:image/{image_format};base64,{image_base64}", + }, + guid=hashlib.md5((url + image_guid).encode("utf-8")) + .hexdigest() + .upper(), + ) + # 若深圳快瞳增值税发票识别响应非成功则返回NONE + if not ( + response.get("status") == 200 and response.get("code") == 0 + ): + return None + + extraction = { + "票据类型": ( + invoice_type := ( + data := { + item["desc"]: item["value"] + for item in response["data"] + } + ).get("发票类型") + ), + "票据号码": (number := data.get("发票号码")), + "票据代码": data.get("发票代码"), + "开票日期": ( + datetime.strptime(date, "%Y年%m月%d日").strftime("%Y-%m-%d") + if re.match( + r"\d{4}年\d{1,2}月\d{1,2}日", + (date := data.get("开票日期")), + ) + else date + ), + "校验码": ( + check_code if (check_code := data.get("校验码")) else number + ), # 若校验码为空则默认为票据号码 + "收款方": data.get("销售方名称"), + "付款方": data.get("购买方名称"), + "票据金额": format( + Decimal( + data.get("小写金额").replace("¥", "") + if invoice_type == "电子发票(普通发票)" + else data.get("合计金额(小写)") + ).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "备注": remark if (remark := data.get("备注")) else None, + "项目": ( + [ + { + "名称": name, + "规格": specification if specification else None, + "单位": unit if unit else None, + "数量": ( + format( + Decimal(quantity).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if quantity + else None + ), + "金额": format( + (Decimal(amount) + Decimal(tax)).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", # 价税合计,保留两位小数 + ), + } + for name, specification, unit, quantity, amount, tax in zip( + [ + component["value"] + for component in response["data"] + if re.match( + r"^项目名称(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^规格型号(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^单位(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^数量(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^金额(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^税额(\d+)?$", + component["desc"], + ) + ], + ) + ] + if invoice_type == "电子发票(普通发票)" + else [ + { + "名称": name, + "数量": format( + Decimal(quantity).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + "0.2f", + ), + "金额": format( + Decimal(amount).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + } + for name, quantity, amount in zip( + [ + component["value"] + for component in response["data"] + if re.match( + r"^项目名称明细(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^项目数量明细(\d+)?$", + component["desc"], + ) + ], + [ + component["value"] + for component in response["data"] + if re.match( + r"^项目金额明细(\d+)?$", + component["desc"], + ) + ], + ) + ] + ), + "查验状态": "无法查验", + } + return extraction + except: + return None + + case _: + try: + # 请求深圳快瞳收费票据识别接口 + response = globals()["http_client"].post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/medical"), + headers={"X-RequestId-Header": image_guid}, + data={ + "token": globals()["authenticator"].get_token( + servicer="szkt" + ), + "imgBase64": f"data:image/{image_format};base64,{image_base64}", + }, + guid=hashlib.md5((url + image_guid).encode("utf-8")) + .hexdigest() + .upper(), + ) + # 若深圳快瞳收费票据识别响应非成功则返回NONE + if not ( + response.get("status") == 200 and response.get("code") == 0 + ): + return None + + extraction = { + "票据类型": ( + "门诊收费票据" + if response["data"]["insured"]["receipt_outpatient"] + else "住院收费票据" + ), + "票据号码": ( + receipt := ( + response["data"]["insured"]["receipt_outpatient"] + or response["data"]["insured"][ + "receipt_hospitalization" + ] + )["receipts"][0] + )["receipt_no"][ + "value" + ], # 默认提取门诊/住院收费票据的第一张票据 + "票据代码": receipt["global_detail"]["invoice_code"]["value"], + "开票日期": receipt["global_detail"]["invoice_date"][ + "value" + ], # 深圳快瞳收费票据识别中就开票日期格式为%Y-%m-%d + "校验码": fuzzy_match( + target="校验码", + components=receipt["global_detail"]["region_specific"], + specify_key="name", + return_key="word.value", + ), + "收款方": receipt["hospital_name"]["value"], + "付款方": receipt["name"]["value"], + "票据金额": format( + Decimal(receipt["total_amount"]["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ), + "医保支付": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance((field := receipt.get("medicare_pay")), dict) + else None + ), + "其它支付": format( + ( + Decimal(value).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + if ( + value := fuzzy_match( + target="其它支付", + components=receipt.get("global_detail", {}).get( + "pay_list", [] + ), + specify_key="name", + return_key="word.value", + ) + ) + else None + ), + ".2f", + ), + "个人自付": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance((field := receipt.get("self_pay")), dict) + else None + ), + "自付一": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance(field := (receipt.get("self_pay_one")), dict) + else None + ), + "自付二": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance(field := (receipt.get("self_pay_two")), dict) + else None + ), + "个人自费": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance(field := (receipt.get("self_cost")), dict) + else None + ), + "住院日期": ( + datetime.strptime(field["value"], "%Y%m%d").strftime( + "%Y-%m-%d" + ) + if isinstance(field := (receipt.get("starttime")), dict) + else None + ), + "出院日期": ( + datetime.strptime(field["value"], "%Y%m%d").strftime( + "%Y-%m-%d" + ) + if isinstance(field := (receipt.get("endtime")), dict) + else None + ), + "医疗机构类型": receipt["others"]["medical_institution_type"][ + "value" + ], + "项目": [ + { + "名称": ( + field["value"] + if isinstance((field := item["item_name"]), dict) + else None + ), + "规格": ( + field["value"] + if isinstance( + (field := item["specifications"]), dict + ) + else None + ), + "单位": ( + field["value"] + if isinstance((field := item["unit"]), dict) + else None + ), + "数量": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance((field := item["number"]), dict) + else None + ), + "金额": ( + format( + Decimal(field["value"]).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ), + ".2f", + ) + if isinstance((field := item["total_amount"]), dict) + else None + ), + } + for item in receipt["feeitems"] + ], + "查验状态": "无法查验", + } + return extraction + except: + return None + + +def common_extraction(**kwargs) -> dict | None: + """通用数据提取""" + + # 影像件全局唯一标识:优先使用关键词变量,其次使用全局变量,再次使用随机唯一标识 + image_guid = kwargs.get( + "image_guid", globals().get("image_guid", uuid.uuid4().hex.upper()) + ) + + # 影像件格式 + image_format = kwargs.get("image_format", globals()["image_format"]) + if image_format is None: + raise RuntimeError("请入参:image_format") + + # 影像件BASE64编码 + image_base64 = kwargs.get("image_base64", globals()["image_base64"]) + if image_base64 is None: + raise RuntimeError("请入参:image_base64") + + # 请求深圳快瞳通用文本识别接口 + response = globals()["http_client"].post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/general"), + headers={"X-RequestId-Header": image_guid}, + data={ + "token": globals()["authenticator"].get_token(servicer="szkt"), + "imgBase64": f"data:image/{image_format};base64,{image_base64}", + }, + guid=hashlib.md5((url + image_guid).encode("utf-8")).hexdigest().upper(), + ) + # 若响应非成功,则返回NONE + if not (response.get("status") == 200 and response.get("code") == 0): + return None + + # 基于空间坐标法就识别结果中文本框进行分行排序 + texts = [] + # 重构文本框列表 + for text in response["data"]: + texts.append( + [ + # 文本框左上角的X坐标 + numpy.float64(text["itemPolygon"]["x"]), + # 文本框左上角的Y坐标 + numpy.float64(text["itemPolygon"]["y"]), + # 文本框的高度 + numpy.float64( + text["itemPolygon"]["height"] + ), # 深圳快瞳基于文本框的Y坐标最大值和最小值的差值 + text["value"], + ] + ) + # 按照文本框Y坐标升序(使用空间坐标算法,从上到下,从左到右) + texts.sort(key=lambda x: x[1]) + rows = [] + # 分行 + for index, text in enumerate(texts[1:]): + # 若为第一行则初始化当前行 + if index == 0: + row = [texts[0]] + continue + # 若文本框的Y坐标与当前行中最后一个文本框的Y坐标差值小于阈值,则归为同一行 + # noinspection PyUnboundLocalVariable + # noinspection PyTypeChecker + if text[1] - row[-1][1] < numpy.mean([x[2] for x in row]) * 0.5: + row.append(text) + # 否则结束当前行、初始化当前行 + else: + rows.append(row) + row = [text] + # 添加最后一行 + rows.append(row) + extraction = [] + # 按照文本框X坐标升序 + for row in rows: + extraction.extend( + [x[3].replace(" ", "") for x in sorted(row, key=lambda x: x[0])] + ) + # 以空格拼接 + extraction = " ".join(extraction) + + # 根据理赔申请书匹配提示词 + match application_form := kwargs.get( + "application_form", globals().get("application_form") + ): + case "中行员工福利保障计划索赔申请书": + prompt = f""" + 指令:你是一个从OCR文本中智能提取信息并生成JSON的工具,请严格按照要求执行。 + + 输入:OCR文本(可能包含错漏): + {extraction} + + 输出要求: + 1、只输出可被Python中json.loads()解析的JSON格式字符串,不包含任何代码块标记、说明文字等其它非JSON格式内容 + 2、无法确定的值设置为`null`(不是"null"字符串) + + JSON结构: + {{ + "基础信息": {{ + "申请人": "字符串或null", + "性别": "字符串或null", + "年龄": "字符串或null", + "手机": "字符串或null", + "身份证号": "字符串或null", + "开户银行": "字符串或null", + "户名": "字符串或null", + "账号": "字符串或null", + }}, + "票据表格": [ + {{ + "就诊序号": "字符串或null", + "发票日期": "YYYY-MM-DD或null", + "发票上的就诊医院/药店": "字符串或null", + "票据张数": "字符串或null", + "票据金额": "字符串或null", + "诊断": "字符串或null" + }}, + ] + }} + + 开始输出: + """ + case _: + raise RuntimeError(f"理赔申请书{application_form}未设置处理方法") + + # 请求大语言模型创建对话接口 + response = globals()["http_client"].post( + url="https://api.siliconflow.cn/v1/chat/completions", + headers={ + "Authorization": "Bearer sk-xsnuwirjjphhfdbvznfdfjqlinfdlrnlxuhkbbqynfnbhiqz", # 基于硅基流动 + "Content-Type": "application/json; charset=utf-8", + }, + json={ + "model": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", # 通过从DeepSeek-R1-0528模型蒸馏思维链接至Qwen3-8B-Base获得的模型 + "messages": [{"role": "user", "content": prompt}], + "max_tokens": 10240, # 生成文本最大令牌数 + "temperature": 0.2, + "top_p": 0.5, + "top_k": 20, + "frequency_penalty": 0.0, + "thinking_budget": 1, + }, + guid=hashlib.md5(prompt.encode("utf-8")).hexdigest().upper(), + ) + + extraction = ( + json.loads(match.group("json")) + if ( + match := re.search( + r"```json\s*(?P\{.*})\s*```", + response["choices"][0]["message"]["content"], + re.DOTALL, + ) + ) + else None + ) + + return extraction + + +# 规则模型初始化 +def decision(rules_path: Path) -> ZenDecision: + def loader(path): + with open(path, "r") as file: + return file.read() + + return ZenEngine({"loader": loader}).get_decision(rules_path.as_posix()) + + +def disease_diagnosis(**kwargs) -> str | None: + """疾病推定""" + + # 赔案档案:优先使用关键词变量,其次使用全局变量 + dossier = kwargs.get("dossier", globals().get("dossier")) + + prompt = f""" + 指令:你是一个医学疾病分类诊断的工具,请严格按照要求执行。 + + 患者信息: + 性别 {gender if (gender := dossier["赔案层"]["申请人信息"].get("性别")) is not None else "未知"}, + 年龄 {age if (age := dossier["赔案层"]["申请人信息"].get("年龄")) is not None else "未知"}, + 近期在药房/医院开具发票中内容 {dossier["赔案层"]["其它信息"]["小项合集"]} + + 输出要求: + 1、患者自述症状在 {dossier["赔案层"]["其它信息"]["自述症状"]} 其中之一 + 2、依据患者信息、自述症状和其提供的发票中内容 {kwargs["items"]} 综合诊断,只输出一个最可能的ICD-11中的疾病分类中亚类目代码对应的中文名称字符串,不包含任何代码块标记、说明文字等 + + 开始输出: + """ + + # 请求大语言模型创建对话接口 + response = globals()["http_client"].post( + url="https://ark.cn-beijing.volces.com/api/v3/chat/completions", + headers={ + "Authorization": "Bearer 2c28ab07-888c-45be-84a2-fc4b2cb5f3f2", # 火山引擎 + "Content-Type": "application/json; charset=utf-8", + }, + json={ + "model": "deepseek-r1-250528", + "messages": [ + {"role": "system", "content": "你是人工智能助手"}, + {"role": "user", "content": prompt}, + ], + "temperature": 0.2, + "top_p": 0.5, + "top_k": 20, + "frequency_penalty": 0.0, + "thinking_budget": 1, + }, + guid=hashlib.md5(prompt.encode("utf-8")).hexdigest().upper(), + ) + + recognition = ( + match.group("text") + if ( + match := re.match( + r"\s*(?P.*)", response["choices"][0]["message"]["content"] + ) + ) + else None + ) + + return recognition + + +# ------------------------- +# 主程序 +# ------------------------- + + +if __name__ == "__main__": + + # 初始化HTTP客户端 + http_client = HTTPClient(timeout=300, cache_enabled=True) + + # 初始化认证器 + authenticator = Authenticator() + + # 初始化工作目录地址对象 + directory_path = Path("directory") + # 若不存在则创建 + directory_path.mkdir(parents=True, exist_ok=True) + + # 初始化影像件识别规则引擎 + recognize_decision = decision(Path("rules/影像件是否需要数据提取.json")) + + # 初始化JINJA2环境 + environment = Environment(loader=FileSystemLoader(".")) + + # 添加DATE过滤器 + environment.filters["date"] = lambda date: ( + date.strftime("%Y-%m-%d") if date else "长期" + ) + # 加载赔案档案模版 + template = environment.get_template("template.html") + + # 遍历工作目录中赔案目录 + for case_path in [ + case_path for case_path in directory_path.iterdir() if case_path.is_dir() + ]: + + # 初始化赔案档案 + dossier = { + "影像件层": [], + "赔案层": { + "赔案编号": ( + case_number := case_path.stem + ), # 假设赔案已签收,系统已生成赔案编号并根据签收时填报保单编号可知保险总公司、保险分公司和投保公司 + "签收保单编号": "3291120243205000000002", + "保险总公司": "中银保险有限公司", + "保险分公司": None, # 实验阶段保险分公司、投保公司和申请时间为NONE + "投保公司": None, + "申请时间": None, + "申请人信息": {}, + "受益人信息": {}, + "被保人信息": {}, # 实验阶段被保人信息为空字典 + "其它信息": {}, + }, + "发票层": [], + "小项层": [], + } + + # 遍历赔案目录中影像件地址 + for image_index, image_path in enumerate( + sorted(case_path.glob(pattern="*"), key=lambda x: x.stat().st_ctime), 1 + ): + dossier["影像件层"].append( + { + "影像件序号": (image_index := f"{image_index:02d}"), + "影像件名称": (image_name := image_path.name), + } + ) + + # 若影像件格式非JPG/JPEG/PNG则跳过该影像件 + if (image_format := image_path.suffix.lower().lstrip(".")) not in [ + "jpg", + "jpeg", + "png", + ]: + dossier["影像件层"][-1]["已分类"] = "否,不支持的影像件" + continue + + # 影像件读取 + image = cv2.imread(image_path.as_posix(), cv2.IMREAD_GRAYSCALE) + # 若发生异常则跳过该影像件 + if image is None: + dossier["影像件层"][-1]["已分类"] = "否,读取异常" + continue + + # 影像件压缩(输出BASE64编码) + image_guid, image_base64 = images_compression() + # 若发生异常则跳过该影像件 + if image_guid is None or image_base64 is None: + dossier["影像件层"][-1]["已分类"] = "否,压缩异常" + continue + + # 通过请求深圳快瞳影像件分类接口获取影像件类型和方向 + image_type, image_orientation = images_classification() + # 若发生异常则跳过该影像件 + if image_type is None or image_orientation is None: + dossier["影像件层"][-1]["已分类"] = "否,影像件分类异常" + continue + + # 若影像件方向非0度,则影像件旋正并在此压缩 + if image_orientation != "0度": + # 影像件旋正 + image = cv2.rotate( + image, + { + "顺时针90度": cv2.ROTATE_90_COUNTERCLOCKWISE, # 逆时针旋转90度 + "180度": cv2.ROTATE_180, # 旋转180度 + "逆时针90度": cv2.ROTATE_90_CLOCKWISE, # 顺时针旋转90度 + }[image_orientation], + ) + # 影像件再次压缩 + image_guid, image_base64 = images_compression() + if image_guid is None or image_base64 is None: + dossier["影像件层"][-1]["已分类"] = "否,压缩异常" + continue + + dossier["影像件层"][-1].update({"已分类": "是", "影像件类型": image_type}) + + # 根据保险总公司和影像件类型评估影像件是否需要数据提取,若无需数据提取则跳过该影像件(例如,中银保险有限公司理赔申请书包含户名、开户银行和银行账号,无需识别银行卡) + if not recognize_decision.evaluate( + { + "insurer": (insurer := dossier["赔案层"]["保险总公司"]), + "image_type": image_type, + } + )["result"]["extract"]: + dossier["影像件层"][-1]["已识别"] = "否,无需识别" + continue + + # 根据影像件类型匹配影像件数据提取 + # noinspection PyUnreachableCode + match image_type: + case "居民身份证(正背面)" | "居民身份证(正面)" | "居民身份证(背面)": + extraction = idcard_extraction() + # 若发生异常则跳过该影像件 + if extraction is None: + dossier["影像件层"][-1]["已识别"] = "否,无法识别" + continue + + if image_type in ["居民身份证(正背面)", "居民身份证(正面)"]: + dossier["赔案层"]["申请人信息"].update( + { + "证件有效期起": datetime.strptime( + extraction["有效期起"], "%Y-%m-%d" + ), + "证件有效期止": ( + date + if (date := extraction["有效期止"]) == "长期" + else datetime.strptime(date, "%Y-%m-%d") + ), # 若证件有效期止为NONE默认为“长期”, + } + ) # 原则上由影像件数据提取环节负责数据标准化,赔案档案数据填充环节负责数据机构化 + + if image_type in ["居民身份证(正背面)", "居民身份证(背面)"]: + dossier["赔案层"]["申请人信息"].update( + { + "姓名": extraction["姓名"], + "证件类型": "居民身份证", + "证件号码": extraction["公民身份号码"], + "性别": extraction["性别"], + "出生": datetime.strptime( + extraction["出生"], "%Y-%m-%d" + ), # 默认日期格式为%Y-%m-%d + "省": ( + address := parse_location(extraction["住址"]) + ).get("province"), + "地": address.get("city"), + "县": address.get("county"), + "详细地址": address.get("detail"), + } + ) + + case "银行卡": + extraction = bankcard_extraction() + # 若发生异常则跳过该影像件 + if extraction is None: + dossier["影像件层"][-1]["已识别"] = "否,无法识别" + continue + + dossier["赔案层"]["受益人信息"].update( + { + "开户银行": extraction["银行名称"], + "银行账号": extraction["卡号"], + } + ) + + case "增值税发票" | "门诊收费票据" | "住院收费票据": + extraction = invoice_extraction() + # 若发生异常则跳过该影像件 + if extraction is None: + dossier["影像件层"][-1]["已识别"] = "否,无法识别" + continue + + dossier["发票层"].append( + { + "关联影像件序号": image_index, + "票据类型": extraction["票据类型"], + "票据号码": extraction["票据号码"], + "票据代码": ( + extraction["票据代码"] + if extraction["票据代码"] + else "--" + ), # 数电票无票据代码,校验码同票据号码 + "开票日期": datetime.strptime( + extraction["开票日期"], "%Y-%m-%d" + ), + "校验码后六位": ( + check_code[-6:] + if (check_code := extraction["校验码"]) + else "--" + ), + "医药机构": extraction["收款方"], + "就诊人": ( + match.group("name") + if ( + match := re.search( + r"^(?P[^((]+)", extraction["付款方"] + ) + ) + else extraction["付款方"] + ), + "票据金额": Decimal(extraction["票据金额"]).quantize( + Decimal("0.00"), rounding=ROUND_HALF_UP + ), # 默认金额转为小数,保留两位小数 + "查验状态": extraction["查验状态"], + "项目": ( + pandas.DataFrame(extraction["项目"]) + .assign( + 数量=lambda dataframe: dataframe["数量"].apply( + lambda row: ( + Decimal(row).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + if row + else Decimal("0.00") + ) + ), + 金额=lambda dataframe: dataframe["金额"].apply( + lambda row: ( + Decimal(row).quantize( + Decimal("0.00"), + rounding=ROUND_HALF_UP, + ) + if row + else Decimal("0.00") + ) + ), + ) + .groupby(by="名称", as_index=False) + .agg(数量=("数量", "sum"), 金额=("金额", "sum")) + .assign( + 大项=lambda dataframe: dataframe["名称"].apply( + lambda row: ( + match.group("category") + if ( + match := re.match( + r"^\*(?P.+?)\*.*$", + row, + ) + ) + else row + ) + ), + 小项=lambda dataframe: dataframe["名称"].apply( + lambda row: ( + re.sub( + r"[^\u4e00-\u9fa5a-zA-Z0-9./%*]", + "", + match.group("name"), + ) + if ( + match := re.match( + r"^\*.+?\*(?:\[[^]]+])?(?P[^\s(]+)(?:\([^\s(]+\))?(?:.*?)?$", + row, + ) + ) + else "" + ) + ), + ) + .loc[ + lambda dataframe: dataframe["金额"] != 0, + ["名称", "大项", "小项", "数量", "金额"], + ] + .to_dict(orient="records") + ), + "就诊类型": ( + "药店购药" + if "增值税发票" in image_type + else ( + "门诊就诊" + if "门诊收费票据" in image_type + else "住院治疗" + ) + ), + } + ) + + case "理赔申请书": + # 根据保险总公司匹配理赔申请书 + # noinspection PyUnreachableCode + match insurer: + case "中银保险有限公司": + extraction = common_extraction( + application_form="中行员工福利保障计划索赔申请书" + ) + # 若识别异常则跳过该影像件 + if extraction is None: + dossier["影像件层"][-1]["已识别"] = "否,无法识别" + continue + + dossier["赔案层"]["申请人信息"].update( + { + "与被保险人关系": "本人", # 中银保险有限公司:默认申请人与被保险人关系为本人 + "年龄": ( + Decimal(age).quantize( + Decimal("0"), + rounding=ROUND_HALF_UP, + ) + if ( + age := extraction.get("基础信息", {}).get( + "年龄", "--" + ) + ).isdigit() + else age + ), # 若年龄仅数字则转为小数、取整,否则默认为“--” + "手机号": ( + phone_number + if re.match( + r"^1[3-9]\d{9}$", + phone_number := extraction.get( + "基础信息", {} + ).get("手机", "--"), + ) + else phone_number + ), # 若手机未正则匹配手机号格式则为“--” + } + ) + + dossier["赔案层"]["受益人信息"].update( + { + "与被保险人关系": "本人", # 中银保险有限公司:默认受益人与被保人关系为本人 + "户名": ( + account_name + if ( + account_name := extraction.get( + "基础信息", {} + ).get("户名") + ) + else "--" + ), # 若户名为NONE则为“--” + "开户银行": ( + account_name + if ( + account_name := extraction.get( + "基础信息", {} + ).get("开户银行") + ) + else "--" + ), # 若开户银行为NONE则为“--” + "银行账号": ( + account_name + if ( + account_name := extraction.get( + "基础信息", {} + ).get("账号") + ) + is not None + else "--" + ), # 若银行账号为NONE则为“--” + } + ) + + dossier["赔案层"]["其它信息"]["自述症状"] = ( + ("、".join(diagnoses)) + if ( + diagnoses := sorted( + set( + "、".join( + [ + diagnosis + for invoice in extraction.get( + "票据表格", [] + ) + if ( + diagnosis := invoice.get("诊断") + ) + ] + ).split("、") + ) + ) + ) + else "--" + ) + + case _: + dossier["影像件层"][-1]["已识别"] = "否,无法识别" + continue + + case _: + dossier["影像件层"][-1]["已识别"] = "否,无法识别" + continue + + dossier["影像件层"][-1].update( + { + "已识别": "是", + "识别结果": extraction, + } + ) + + # 发票层根据开票日期顺序排序 + dossier["发票层"] = sorted( + dossier["发票层"], key=lambda x: (x["开票日期"], x["票据号码"]) + ) + + # 构建小项层 + # noinspection PyTypeChecker + dossier["小项层"] = ( + pandas.DataFrame( + [ + { + "小项": item["小项"], + "数量": item["数量"], + "金额": item["金额"], + } + for invoice in dossier["发票层"] + for item in invoice["项目"] + ] + ) + .groupby(by="小项", as_index=False) + .agg(数量=("数量", "sum"), 金额=("金额", "sum")) + .to_dict(orient="records") + ) + + for invoice in dossier["发票层"]: + # noinspection PyTypeChecker + invoice["推定疾病"] = disease_diagnosis( + items="、".join(sorted(set([item["小项"] for item in invoice["项目"]]))) + ) + + print(dossier) + exit() + + with open(f"dossiers/{case_number}.html", "w", encoding="utf-8") as file: + file.write( + template.render( + { + "dossier": dossier, + } + ) + )