788 lines
		
	
	
		
			31 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			788 lines
		
	
	
		
			31 KiB
		
	
	
	
		
			Python
		
	
	
	
| # -*- coding: utf-8 -*-
 | ||
| 
 | ||
| """
 | ||
| 普康健康_发票查验
 | ||
| """
 | ||
| 
 | ||
| # 加载模块
 | ||
| 
 | ||
| import hashlib
 | ||
| import json
 | ||
| import shutil
 | ||
| import uuid
 | ||
| from base64 import b64decode, b64encode
 | ||
| from concurrent.futures import ThreadPoolExecutor, as_completed
 | ||
| from pathlib import Path
 | ||
| 
 | ||
| import cv2
 | ||
| import numpy
 | ||
| import pandas
 | ||
| 
 | ||
| from utils.client import Authenticator, HTTPClient, RequestException, restrict
 | ||
| from utils.pandas_extension import open_csv, save_as_workbook, traverse_directory
 | ||
| 
 | ||
| 
 | ||
| # 影像件压缩
 | ||
| def image_compression(
 | ||
|     image_path: str | None = None,
 | ||
|     image_format: str | None = None,
 | ||
|     image_data: bytes | None = None,  # 数据类型为包含图像文件的二进制数据的字节串
 | ||
|     image_size_specified: int = 2,  # 指定影像件大小
 | ||
|     raw: bool = False,  # 250804新增返回是否为完整URI数据格式
 | ||
| ) -> str | None:
 | ||
| 
 | ||
|     try:
 | ||
| 
 | ||
|         # 若影像件路径数据类型为STR则创建路径对象
 | ||
|         if isinstance(image_path, str):
 | ||
|             image_path = Path(image_path)
 | ||
|             # 影像件文件名称后缀
 | ||
|             image_format = image_path.suffix.strip().lstrip(".").lower()
 | ||
| 
 | ||
|             # 读取影像件数据
 | ||
|             with open(image_path, "rb") as image:
 | ||
|                 image_data = image.read()
 | ||
| 
 | ||
|         # 影像件数据BASE64编码
 | ||
|         image_data_base64 = b64encode(image_data).decode("utf-8")
 | ||
| 
 | ||
|         # 指定影像件大小的单位由MB转为KB
 | ||
|         image_size_specified = image_size_specified * 1024 * 1024
 | ||
| 
 | ||
|         # 若影像件大小小于指定影像件大小则返回BASE64编码后影像件数据
 | ||
|         if len(image_data_base64) < image_size_specified:
 | ||
|             if raw:
 | ||
|                 # 返回非完整URI数据格式
 | ||
|                 return image_data_base64
 | ||
|             else:
 | ||
|                 # 返回完整URI数据格式
 | ||
|                 return f"data:image/{image_format};base64,{image_data_base64}"
 | ||
| 
 | ||
|         # OPENCV解码(数据类型为NUMPY-UINT8)
 | ||
|         image_data_cv2 = cv2.imdecode(
 | ||
|             numpy.frombuffer(image_data, numpy.uint8), cv2.IMREAD_COLOR
 | ||
|         )
 | ||
| 
 | ||
|         # 若OPENCV解码失败则抛出异常
 | ||
|         if image_data_cv2 is None:
 | ||
|             raise RuntimeError(f"OPENCV解码发生异常")
 | ||
| 
 | ||
|         # 初始化近似BASE64编码后影像件数据
 | ||
|         proximate_image_data_base64 = None
 | ||
| 
 | ||
|         # 初始化最小压缩前后影像件大小差值
 | ||
|         min_image_size_difference = float("inf")
 | ||
| 
 | ||
|         # 基于双层压缩方法:先外层降低图像质量,再内层缩小图像尺寸
 | ||
|         for quality in range(90, 0, -10):
 | ||
| 
 | ||
|             image_data_cv2_ = image_data_cv2.copy()
 | ||
| 
 | ||
|             # 根据影像件格式匹配图片质量配置
 | ||
|             # noinspection PyUnreachableCode
 | ||
|             match image_format:
 | ||
|                 case "png":
 | ||
|                     encoding_params = [cv2.IMWRITE_PNG_COMPRESSION, 10 - quality // 10]
 | ||
|                 case _:
 | ||
|                     encoding_params = [cv2.IMWRITE_JPEG_QUALITY, quality]
 | ||
| 
 | ||
|             for i in range(25):
 | ||
|                 # 降低图像质量
 | ||
|                 # noinspection PyTypeChecker
 | ||
|                 success, image_data_encoded = cv2.imencode(
 | ||
|                     image_format, image_data_cv2_, encoding_params
 | ||
|                 )  # 图像编码
 | ||
|                 # 若图像编码失败则退出
 | ||
|                 if not success:
 | ||
|                     break
 | ||
|                 image_data_base64 = b64encode(image_data_encoded.tobytes()).decode(
 | ||
|                     "utf-8"
 | ||
|                 )
 | ||
| 
 | ||
|                 # 压缩前后影像件大小差值
 | ||
|                 image_size_difference = len(image_data_base64) - image_size_specified
 | ||
| 
 | ||
|                 if image_size_difference <= 0:
 | ||
|                     if raw:
 | ||
|                         return image_data_base64
 | ||
|                     else:
 | ||
|                         return f"data:image/{image_format};base64,{image_data_base64}"
 | ||
| 
 | ||
|                 if image_size_difference < min_image_size_difference:
 | ||
|                     min_image_size_difference = image_size_difference
 | ||
|                     proximate_image_data_base64 = image_data_base64
 | ||
| 
 | ||
|                 # 影像件高度和宽度
 | ||
|                 image_height, image_weight = image_data_cv2_.shape[:2]
 | ||
| 
 | ||
|                 # 若仍超过影像件指定大小则调整图像尺寸
 | ||
|                 image_data_cv2_ = cv2.resize(
 | ||
|                     image_data_cv2_,
 | ||
|                     dsize=(int(image_weight * 0.9), int(image_height * 0.9)),
 | ||
|                     interpolation=cv2.INTER_AREA,
 | ||
|                 )
 | ||
| 
 | ||
|         if proximate_image_data_base64:
 | ||
|             if raw:
 | ||
|                 return proximate_image_data_base64
 | ||
|             else:
 | ||
|                 return f"data:image/{image_format};base64,{image_data_base64}"
 | ||
|         else:
 | ||
|             raise RuntimeError("影像件压缩失败")
 | ||
| 
 | ||
|     except:
 | ||
|         return None
 | ||
| 
 | ||
| 
 | ||
| # 票据查验接口(需要)
 | ||
| @restrict(refill_rate=5, max_tokens=5)  # 限速至5QPS
 | ||
| def invoices_verification(
 | ||
|     image_index,
 | ||
|     image_path=None,
 | ||
|     invoice_number=None,
 | ||
|     invoice_code=None,
 | ||
|     invoice_check_code=None,
 | ||
|     invoice_date=None,
 | ||
|     invoice_amount=None,
 | ||
|     id_number=None,
 | ||
|     process_mode=None,
 | ||
|     supplier=None,
 | ||
| ):
 | ||
| 
 | ||
|     try:
 | ||
| 
 | ||
|         # 若影像件地址非空则imgBASE64请求,否则根据发票五要素请求
 | ||
|         if image_path:
 | ||
| 
 | ||
|             match process_mode:
 | ||
|                 case "通过影像件本地地址":
 | ||
| 
 | ||
|                     # 创建路径对象
 | ||
|                     image_path = Path(image_path)
 | ||
| 
 | ||
|                     # 影像件文件名称后缀
 | ||
|                     image_format = image_path.suffix.strip().lstrip(".").lower()
 | ||
| 
 | ||
|                     # 读取影像件数据
 | ||
|                     with open(image_path, "rb") as image:
 | ||
|                         image_data = image.read()
 | ||
| 
 | ||
|                 case "通过影像件对象服务器地址":
 | ||
|                     image_format, image_data = http_client.download(url=image_path)
 | ||
| 
 | ||
|             # 断定影像件格式为JGP、JPEG或者PNG
 | ||
|             # noinspection PyUnboundLocalVariable
 | ||
|             assert image_format in [
 | ||
|                 "jpg",
 | ||
|                 "jpeg",
 | ||
|                 "png",
 | ||
|             ], f"影像件格式({image_format})不支持"
 | ||
| 
 | ||
|             match supplier:
 | ||
|                 case "szkt":
 | ||
| 
 | ||
|                     image_data_base64 = image_compression(
 | ||
|                         image_format=image_format, image_data=image_data
 | ||
|                     )
 | ||
| 
 | ||
|                     # noinspection PyUnusedLocal
 | ||
|                     response = http_client.post(
 | ||
|                         # 深圳快瞳增值税发票、医疗发票查验兼容版
 | ||
|                         url="https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll",
 | ||
|                         # 用于和深圳快瞳联查时定位请求
 | ||
|                         headers={"X-RequestId-Header": image_index},
 | ||
|                         data={
 | ||
|                             "token": authenticator.get_token(servicer="szkt"),
 | ||
|                             "imgBase64": image_data_base64,
 | ||
|                         },
 | ||
|                     )
 | ||
|                 case "bjfd":
 | ||
| 
 | ||
|                     image_data_base64 = image_compression(
 | ||
|                         image_format=image_format, image_data=image_data, raw=True
 | ||
|                     )  # 北京分单不支持完整URI数据格式
 | ||
| 
 | ||
|                     # 业务入参,序列化并BASE64编码
 | ||
|                     data = b64encode(
 | ||
|                         json.dumps(
 | ||
|                             {
 | ||
|                                 "fileByte": image_data_base64,
 | ||
|                                 "fileType": (
 | ||
|                                     "png" if image_format == "png" else "jpg"
 | ||
|                                 ),  # 北京分单影像件格式支持JPG、PNG或PDF(本脚本暂不支持PDF)
 | ||
|                             }
 | ||
|                         ).encode("utf-8")
 | ||
|                     ).decode("utf-8")
 | ||
| 
 | ||
|                     # 应用账号
 | ||
|                     appid = "mbYr11Rc_42"
 | ||
| 
 | ||
|                     # 随机标识
 | ||
|                     noise = image_index
 | ||
| 
 | ||
|                     # 版本号
 | ||
|                     version = "1.0"
 | ||
| 
 | ||
|                     # 装配签名
 | ||
|                     sign = (
 | ||
|                         hashlib.md5(
 | ||
|                             f"appid={appid}&data={data}&noise={noise}&key=80357535c95333c3b133dfe5533f6334fe5e9321&version={version}".encode(
 | ||
|                                 "utf-8"
 | ||
|                             )
 | ||
|                         )
 | ||
|                         .hexdigest()
 | ||
|                         .upper()
 | ||
|                     )
 | ||
| 
 | ||
|                     # noinspection PyUnusedLocal
 | ||
|                     response = http_client.post(
 | ||
|                         # 北京分单增值税发票、医疗票据二维码查验接口
 | ||
|                         url="https://api.fendanyun.com/rsx/api/checkByQRCode",
 | ||
|                         headers={"Content-Type": "application/json; charset=utf-8"},
 | ||
|                         json={
 | ||
|                             "appid": appid,
 | ||
|                             "data": data,
 | ||
|                             "noise": noise,
 | ||
|                             "version": version,
 | ||
|                             "sign": sign,
 | ||
|                         },
 | ||
|                     )
 | ||
| 
 | ||
|         else:
 | ||
| 
 | ||
|             response = http_client.post(
 | ||
|                 # 深圳快瞳增值税发票、医疗发票查验兼容版
 | ||
|                 url="https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll",
 | ||
|                 data={
 | ||
|                     "token": authenticator.get_token(servicer="szkt"),
 | ||
|                     "invoiceNumber": invoice_number,
 | ||
|                     "invoiceCode": invoice_code,
 | ||
|                     "checkCode": invoice_check_code,
 | ||
|                     "invoicingDate": invoice_date,
 | ||
|                     "pretaxAmount": invoice_amount,
 | ||
|                     "idCardNo": id_number,
 | ||
|                 },
 | ||
|             )
 | ||
| 
 | ||
|     except RequestException as request_exception:
 | ||
|         response = {
 | ||
|             "status": request_exception.status,
 | ||
|             "code": request_exception.code,
 | ||
|             "message": request_exception.message,
 | ||
|         }
 | ||
| 
 | ||
|     except Exception as exception:
 | ||
|         response = {
 | ||
|             "code": "40000",
 | ||
|             "message": f"发生其它异常{exception}",
 | ||
|         }
 | ||
| 
 | ||
|     return image_index, response
 | ||
| 
 | ||
| 
 | ||
| if __name__ == "__main__":
 | ||
| 
 | ||
|     print("已启动批量票据查验")
 | ||
| 
 | ||
|     match input("请选择票据查验供应商(1:深圳快瞳,2:北京分单,其它任意字符:退出脚本):"):
 | ||
|         case "1":
 | ||
|             supplier = "szkt"
 | ||
|         case "2":
 | ||
|             supplier = "bjfd"
 | ||
|         case _:
 | ||
|             print("选择退出脚本!")
 | ||
|             exit(0)
 | ||
| 
 | ||
|     match input(
 | ||
|         "请选择处理流程(1:批量解析已归档响应报文,2:根据影像件地址或票据五要素批量查验,其它任意字符:退出脚本):"
 | ||
|     ):
 | ||
|         case "1":
 | ||
|             # 打开前置影像件索引CSV文件
 | ||
|             dataframe = open_csv(file_name="dataframe_indexed.csv")
 | ||
|         case "2":
 | ||
|             print("正在归档响应报文...", end="")
 | ||
| 
 | ||
|             # 创建响应报文目录路径对象
 | ||
|             responses_path = Path("temporary/responses")
 | ||
| 
 | ||
|             # 若响应报文目录路径不存在则创建
 | ||
|             if not responses_path.exists():
 | ||
|                 responses_path.mkdir(parents=True, exist_ok=True)
 | ||
| 
 | ||
|             # 创建归档响应报文目录路径对象
 | ||
|             archives_path = Path("temporary/archives")
 | ||
| 
 | ||
|             # 若归档响应报文目录路径不存在则创建
 | ||
|             if not archives_path.exists():
 | ||
|                 archives_path.mkdir(parents=True, exist_ok=True)
 | ||
| 
 | ||
|             # 遍历响应报文目录下所有文件名后缀为JSON的文件路径
 | ||
|             for file_path in Path(responses_path).glob("*.json"):
 | ||
|                 # 若文件路径为文件
 | ||
|                 if file_path.is_file():
 | ||
|                     # 移动响应报文由响应报文目录至归档响应报文目录
 | ||
|                     shutil.move(str(file_path), str(archives_path / file_path.name))
 | ||
| 
 | ||
|             print("已完成")
 | ||
| 
 | ||
|             match input(
 | ||
|                 "请选择批量查验方法(1:通过影像件本地地址,2:通过影像件对象服务器地址,3:通过增值税发票和医疗票据的五要素,其它任意字符:退出脚本):"
 | ||
|             ):
 | ||
|                 case "1":
 | ||
|                     print("正在读取影像件本地地址...", end="")
 | ||
| 
 | ||
|                     dataframe = traverse_directory(
 | ||
|                         directory_path="待查验发票", suffixes=[".jpg", ".jpeg", ".png"]
 | ||
|                     )
 | ||
| 
 | ||
|                     # 修改列名相对路径为影像件地址
 | ||
|                     dataframe.rename(columns={"相对路径": "影像件地址"}, inplace=True)
 | ||
| 
 | ||
|                     process_mode = "通过影像件本地地址"
 | ||
| 
 | ||
|                 case "2":
 | ||
|                     print("正在读取影像件对象服务器地址...", end="")
 | ||
| 
 | ||
|                     dataframe = open_csv(file_name="dataframe.csv")
 | ||
| 
 | ||
|                     # 断定列名包括赔案编号、发票编号和影像件地址
 | ||
|                     assert all(
 | ||
|                         [
 | ||
|                             column_name in dataframe.columns
 | ||
|                             for column_name in ["赔案编号", "发票编号", "影像件地址"]
 | ||
|                         ]
 | ||
|                     ), "CSV文件中列名必须包括赔案编号、发票编号和影像件地址"
 | ||
| 
 | ||
|                     # 根据赔案编号和发票编号去重
 | ||
|                     dataframe.drop_duplicates(
 | ||
|                         subset=["赔案编号", "发票编号"], keep="first", inplace=True
 | ||
|                     )
 | ||
| 
 | ||
|                     # 处理方式
 | ||
|                     process_mode = "通过影像件对象服务器地址"
 | ||
| 
 | ||
|                 case "3":
 | ||
|                     print("正在读取增值税发票和医疗票据的五要素...", end="")
 | ||
| 
 | ||
|                     dataframe = open_csv(file_name="dataframe.csv")
 | ||
| 
 | ||
|                     # 断定列名包括身份证号码后六位、发票编号、发票代码、校验号码后六位、开票日期和发票金额
 | ||
|                     assert all(
 | ||
|                         [
 | ||
|                             column_name in dataframe.columns
 | ||
|                             for column_name in [
 | ||
|                                 "身份证号码后六位",
 | ||
|                                 "发票编号",
 | ||
|                                 "发票代码",
 | ||
|                                 "校验号码后六位",
 | ||
|                                 "开票日期",
 | ||
|                                 "发票金额",
 | ||
|                             ]
 | ||
|                         ]
 | ||
|                     ), "CSV文件中列名必须包括身份证号码后六位、发票编号、发票代码、校验号码后六位、开票日期和发票金额"
 | ||
| 
 | ||
|                     # 根据身份证号码后六位、发票编号、发票代码、校验号码后六位、开票日期和发票金额去重
 | ||
|                     dataframe.drop_duplicates(
 | ||
|                         subset=[
 | ||
|                             "身份证号码后六位",
 | ||
|                             "发票编号",
 | ||
|                             "发票代码",
 | ||
|                             "校验号码后六位",
 | ||
|                             "开票日期",
 | ||
|                             "发票金额",
 | ||
|                         ],
 | ||
|                         keep="first",
 | ||
|                         inplace=True,
 | ||
|                     )
 | ||
| 
 | ||
|                     # 格式化开票日期
 | ||
|                     dataframe["开票日期"] = dataframe["开票日期"].str.replace(
 | ||
|                         "-", "", regex=False
 | ||
|                     )
 | ||
| 
 | ||
|                     # 处理方式
 | ||
|                     process_mode = "通过增值税发票和医疗票据的五要素"
 | ||
| 
 | ||
|                 case _:
 | ||
|                     print("选择退出脚本!")
 | ||
|                     exit(0)
 | ||
| 
 | ||
|             # 统计待查验发票张数
 | ||
|             rows = dataframe.shape[0]
 | ||
| 
 | ||
|             # 若待查验发票张数为0则退出脚本
 | ||
|             if rows == 0:
 | ||
|                 print("待查验发票张数为0,退出脚本")
 | ||
|                 exit(0)
 | ||
| 
 | ||
|             print(f"已完成,待查验发票张数为 {rows}")
 | ||
| 
 | ||
|             # 添加索引
 | ||
|             dataframe["索引"] = dataframe.apply(
 | ||
|                 lambda x: uuid.uuid4().hex, axis="columns"
 | ||
|             )
 | ||
| 
 | ||
|             dataframe.to_csv("dataframe_indexed.csv", index=False)
 | ||
| 
 | ||
|             # 创建深圳快瞳获取访问令牌方法
 | ||
|             authenticator = Authenticator()
 | ||
| 
 | ||
|             # 初始化请求客户端
 | ||
|             http_client = HTTPClient()
 | ||
| 
 | ||
|             # 用于记录已完成任务数
 | ||
|             completed_futures = 0
 | ||
| 
 | ||
|             # 创建线程池
 | ||
|             with ThreadPoolExecutor(max_workers=5) as executor:
 | ||
| 
 | ||
|                 # noinspection PyUnreachableCode
 | ||
|                 # noinspection PyUnboundLocalVariable
 | ||
|                 match process_mode:
 | ||
| 
 | ||
|                     case "通过影像件本地地址" | "通过影像件对象服务器地址":
 | ||
| 
 | ||
|                         futures = [
 | ||
|                             executor.submit(
 | ||
|                                 invoices_verification,
 | ||
|                                 image_index=row.索引,
 | ||
|                                 image_path=row.影像件地址,
 | ||
|                                 process_mode=process_mode,
 | ||
|                                 supplier=supplier,
 | ||
|                             )
 | ||
|                             for row in dataframe[["索引", "影像件地址"]].itertuples(
 | ||
|                                 index=False, name="row"
 | ||
|                             )
 | ||
|                         ]
 | ||
| 
 | ||
|                     case "通过增值税发票和医疗票据的五要素":
 | ||
| 
 | ||
|                         # 提交任务
 | ||
|                         futures = [
 | ||
|                             executor.submit(
 | ||
|                                 invoices_verification,
 | ||
|                                 image_index=row.索引,
 | ||
|                                 invoice_number=row.发票编号,
 | ||
|                                 invoice_code=row.发票代码,
 | ||
|                                 invoice_check_code=row.校验号码后六位,
 | ||
|                                 invoice_date=row.开票日期,
 | ||
|                                 invoice_amount=row.发票金额,
 | ||
|                                 id_number=row.身份证号码后六位,
 | ||
|                                 process_mode=process_mode,
 | ||
|                                 supplier=supplier,
 | ||
|                             )
 | ||
|                             for row in dataframe[
 | ||
|                                 [
 | ||
|                                     "索引",
 | ||
|                                     "发票编号",
 | ||
|                                     "发票代码",
 | ||
|                                     "校验号码后六位",
 | ||
|                                     "开票日期",
 | ||
|                                     "发票金额",
 | ||
|                                     "身份证号码后六位",
 | ||
|                                 ]
 | ||
|                             ].itertuples(index=False, name="row")
 | ||
|                         ]
 | ||
| 
 | ||
|                 for future in as_completed(futures):
 | ||
|                     index, response = future.result()
 | ||
| 
 | ||
|                     # 保存报文
 | ||
|                     with open(
 | ||
|                         "temporary/responses/{}.json".format(index),
 | ||
|                         "w",
 | ||
|                         encoding="utf-8",
 | ||
|                     ) as file:
 | ||
|                         json.dump(response, file, ensure_ascii=False)
 | ||
| 
 | ||
|                     completed_futures += 1
 | ||
| 
 | ||
|                     print(f"已完成 {completed_futures / rows * 100:.2f} %")
 | ||
| 
 | ||
|         case _:
 | ||
|             print("选择退出脚本!")
 | ||
|             exit(0)
 | ||
| 
 | ||
|     print("正在解析报文...", end="")
 | ||
| 
 | ||
|     # 解析后数据体
 | ||
|     dataframe_parsed = []
 | ||
| 
 | ||
|     # 遍历报文所在目录
 | ||
|     for path_object in list(Path("temporary/responses").glob("*.json")):
 | ||
| 
 | ||
|         # 解析报文结构
 | ||
|         parse = {
 | ||
|             "索引": "",
 | ||
|             "机打发票号码": "",
 | ||
|             "发票金额": "",
 | ||
|             "购买方": "",
 | ||
|             "销售方": "",
 | ||
|             "发票状态": "",
 | ||
|             "最大销售项目名称": "",
 | ||
|             "最大销售项目数量": "",
 | ||
|             "XML版式文件": "",
 | ||
|         }
 | ||
| 
 | ||
|         # 若路径对象包含下划线则在解析报文结构添加赔案编号和发票编号
 | ||
|         if "_" in path_object.stem:
 | ||
| 
 | ||
|             parse["赔案编号"] = path_object.stem.split("_")[0]
 | ||
| 
 | ||
|             parse["发票编号"] = path_object.stem.split("_")[1]
 | ||
| 
 | ||
|         # 打开报文并JSON逆序列化
 | ||
|         with open(path_object, "r", encoding="utf-8") as file:
 | ||
|             response = json.load(file)
 | ||
| 
 | ||
|         # 索引
 | ||
|         parse["索引"] = path_object.stem
 | ||
| 
 | ||
|         match supplier:
 | ||
|             case "szkt":
 | ||
|                 try:
 | ||
| 
 | ||
|                     # 响应状态码
 | ||
|                     status_code = response.get("status", "")
 | ||
| 
 | ||
|                     # 错误码
 | ||
|                     code = response.get("code", "")
 | ||
| 
 | ||
|                     # 流水号
 | ||
|                     serial = response.get("serialNo", "")
 | ||
| 
 | ||
|                     # 若响应状态码为200且错误码为10000,则定义为响应成功
 | ||
|                     if status_code == 200 and code == 10000:
 | ||
| 
 | ||
|                         # 查验类型,若查验类型为003081则为医疗票据查验,003082则为增值税发票查验,两者报文结构不一致
 | ||
|                         match response.get("data").get(
 | ||
|                             "productCode"
 | ||
|                         ):  # 若响应成功则必定存在键DATA和PRODUCTCODE
 | ||
|                             # 解析医疗票据查验结果
 | ||
|                             case "003081":
 | ||
| 
 | ||
|                                 parse["机打发票号码"] = response.get("data").get(
 | ||
|                                     "billNumber"
 | ||
|                                 )
 | ||
| 
 | ||
|                                 parse["校验码"] = response.get("data").get("checkCode")
 | ||
| 
 | ||
|                                 parse["发票金额"] = response.get("data").get("amount")
 | ||
| 
 | ||
|                                 parse["购买方"] = response.get("data").get("payer")
 | ||
| 
 | ||
|                                 parse["销售方"] = response.get("data").get("payeeName")
 | ||
| 
 | ||
|                                 # 发票状态
 | ||
|                                 match response.get("data").get("flushedRed"):
 | ||
|                                     case "true":
 | ||
|                                         parse["发票状态"] = "正常"
 | ||
|                                     case "false":
 | ||
|                                         parse["发票状态"] = "已红冲"
 | ||
| 
 | ||
|                                 # 最大销售项目数量
 | ||
|                                 max_item_quantity = 0
 | ||
| 
 | ||
|                                 # 遍历销售项目列表
 | ||
|                                 for item in response.get("data").get("feeitems", []):
 | ||
|                                     # 销售项目数量
 | ||
|                                     item_quantity = item.get("number")
 | ||
| 
 | ||
|                                     # 若销售项目数量非空,进一步判断是否包含斜杠,若包含斜杠则分割并取第一部分,最后转为浮点
 | ||
|                                     if item_quantity:
 | ||
|                                         if "/" in item_quantity:
 | ||
|                                             item_quantity = item_quantity.split("/")[0]
 | ||
| 
 | ||
|                                         item_quantity = float(item_quantity)
 | ||
|                                     else:
 | ||
|                                         item_quantity = 1
 | ||
| 
 | ||
|                                     if item_quantity > max_item_quantity:
 | ||
|                                         parse["最大销售项目名称"] = item.get(
 | ||
|                                             "itemName", ""
 | ||
|                                         )
 | ||
| 
 | ||
|                                         parse["最大销售项目数量"] = str(item_quantity)
 | ||
| 
 | ||
|                                 parse["XML版式文件"] = response.get("PDFInfo", {}).get(
 | ||
|                                     "fileUrl"
 | ||
|                                 )
 | ||
| 
 | ||
|                             # 解析增值税发票查验结果
 | ||
|                             case "003082":
 | ||
| 
 | ||
|                                 parse["机打发票号码"] = (
 | ||
|                                     response.get("data").get("details").get("number")
 | ||
|                                 )
 | ||
| 
 | ||
|                                 parse["校验码"] = (
 | ||
|                                     response.get("data")
 | ||
|                                     .get("details")
 | ||
|                                     .get("check_code")
 | ||
|                                 )
 | ||
| 
 | ||
|                                 parse["发票金额"] = (
 | ||
|                                     response.get("data").get("details").get("total")
 | ||
|                                 )
 | ||
| 
 | ||
|                                 parse["购买方"] = (
 | ||
|                                     response.get("data").get("details").get("buyer")
 | ||
|                                 )
 | ||
| 
 | ||
|                                 parse["销售方"] = (
 | ||
|                                     response.get("data").get("details").get("seller")
 | ||
|                                 )
 | ||
| 
 | ||
|                                 # 发票状态
 | ||
|                                 match response.get("data").get("details").get(
 | ||
|                                     "invoiceTypeNo"
 | ||
|                                 ):
 | ||
|                                     case "0":
 | ||
|                                         parse["发票状态"] = "正常"
 | ||
|                                     case "1":
 | ||
|                                         parse["发票状态"] = "无法查验"
 | ||
|                                     case "2" | "3" | "7" | "8":
 | ||
|                                         parse["发票状态"] = "已红冲"
 | ||
| 
 | ||
|                                 max_item_quantity = 0
 | ||
| 
 | ||
|                                 items = (
 | ||
|                                     response.get("data").get("details").get("items", [])
 | ||
|                                 )
 | ||
| 
 | ||
|                                 for item in items:
 | ||
|                                     item_quantity = (
 | ||
|                                         float(item.get("quantity", 1))
 | ||
|                                         if item.get("quantity")
 | ||
|                                         else 1
 | ||
|                                     )
 | ||
| 
 | ||
|                                     if item_quantity > max_item_quantity:
 | ||
|                                         parse["最大销售项目名称"] = item.get("name")
 | ||
| 
 | ||
|                                         parse["最大销售项目数量"] = str(item_quantity)
 | ||
| 
 | ||
|                                 # XML版式文件(25-06-11本接口不在提供版式文件,通过另一接口可获取数电增值税发票版式文件)
 | ||
|                                 parse["XML版式文件"] = "本接口不再提供版式文件"
 | ||
| 
 | ||
|                     # 若响应状态码为400且错误码为10001或10100,则定义为假票
 | ||
|                     elif status_code == 400 and (code == 10001 or code == 10100):
 | ||
|                         parse["发票状态"] = "假票"
 | ||
| 
 | ||
|                     else:
 | ||
|                         raise Exception("解析报文发生其它异常")
 | ||
| 
 | ||
|                 except Exception as exception:
 | ||
| 
 | ||
|                     parse["发票状态"] = "{}".format(response.get("message"))
 | ||
| 
 | ||
|             case "bjfd":
 | ||
|                 try:
 | ||
| 
 | ||
|                     # 不验签,业务出参BASE64解码并反序列化
 | ||
|                     response = json.loads(
 | ||
|                         b64decode(response.get("data")).decode("utf-8")
 | ||
|                     )
 | ||
| 
 | ||
|                     # 增值税发票、医疗票据查验结果BASE64解码并反序列化
 | ||
|                     response["message"] = json.loads(
 | ||
|                         b64decode(response.get("message")).decode("utf-8")
 | ||
|                     )
 | ||
| 
 | ||
|                     # 错误码
 | ||
|                     code = response.get("result")
 | ||
| 
 | ||
|                     # 流水号
 | ||
|                     serial = response.get("message").get("checkId")
 | ||
| 
 | ||
|                     # 核验结果代码
 | ||
|                     result_code = response.get("message").get("resultCode")
 | ||
| 
 | ||
|                     # 若错误码为S0000则定义为响应成功
 | ||
|                     if code == "S0000":
 | ||
|                         # noinspection PyUnreachableCode
 | ||
|                         match result_code:
 | ||
|                             # 若查验成功则根据增值税发票、医疗票据状态匹配发票状态
 | ||
|                             case "200":
 | ||
| 
 | ||
|                                 parse["机打发票号码"] = (
 | ||
|                                     response.get("message")
 | ||
|                                     .get("tickMainInfo")
 | ||
|                                     .get("invoiceNo")
 | ||
|                                 )
 | ||
| 
 | ||
|                                 parse["发票金额"] = (
 | ||
|                                     response.get("message")
 | ||
|                                     .get("tickMainInfo")
 | ||
|                                     .get("invoiceTotalPrice")
 | ||
|                                 )
 | ||
| 
 | ||
|                                 parse["购买方"] = (
 | ||
|                                     response.get("message")
 | ||
|                                     .get("tickMainInfo")
 | ||
|                                     .get("payerPartyName")
 | ||
|                                 )
 | ||
| 
 | ||
|                                 parse["销售方"] = (
 | ||
|                                     response.get("message")
 | ||
|                                     .get("tickMainInfo")
 | ||
|                                     .get("invoicingPartyName")
 | ||
|                                 )
 | ||
| 
 | ||
|                                 max_item_quantity = 0
 | ||
| 
 | ||
|                                 for item in (
 | ||
|                                     response.get("message")
 | ||
|                                     .get("tickMainInfo")
 | ||
|                                     .get("chargeItems", [])
 | ||
|                                 ):
 | ||
|                                     item_quantity = (
 | ||
|                                         float(item.get("num", 1))
 | ||
|                                         if item.get("num")
 | ||
|                                         else 1
 | ||
|                                     )
 | ||
| 
 | ||
|                                     if item_quantity > max_item_quantity:
 | ||
|                                         parse["最大销售项目名称"] = item.get(
 | ||
|                                             "chargeName"
 | ||
|                                         )
 | ||
|                                         parse["最大销售项目数量"] = str(item_quantity)
 | ||
| 
 | ||
|                                 match response.get("message").get("invoiceStatus"):
 | ||
|                                     case "0":
 | ||
|                                         parse["发票状态"] = "正常"
 | ||
|                                     case (
 | ||
|                                         "1" | "2"
 | ||
|                                     ):  # 沿用深圳快瞳解析规则,北京分单已开红票和已作废映射为已红冲
 | ||
|                                         parse["发票状态"] = "已红冲"
 | ||
| 
 | ||
|                             case "E20003" | "E20007	":
 | ||
|                                 parse["发票状态"] = "假票"
 | ||
| 
 | ||
|                             # 其它情况发票状态根据核验结果描述
 | ||
|                             case _:
 | ||
|                                 parse["发票状态"] = response.get("message").get(
 | ||
|                                     "resultMsg"
 | ||
|                                 )
 | ||
| 
 | ||
|                 except Exception as exception:
 | ||
|                     parse["发票状态"] = str(exception)
 | ||
| 
 | ||
|         dataframe_parsed.append(parse)
 | ||
| 
 | ||
|     dataframe_parsed = pandas.DataFrame(data=dataframe_parsed, dtype=str)
 | ||
| 
 | ||
|     # 将解析数据集拼接至数据集
 | ||
|     dataframe = dataframe.merge(right=dataframe_parsed, how="left", on=["索引"])
 | ||
| 
 | ||
|     # 填补缺失值
 | ||
|     dataframe = dataframe.fillna(value="")
 | ||
| 
 | ||
|     print("已完成")
 | ||
| 
 | ||
|     print("正在保存为工作簿...", end="")
 | ||
| 
 | ||
|     save_as_workbook(worksheets=[("Sheet1", dataframe)], workbook_name="results.xlsx")
 | ||
| 
 | ||
|     print("已完成")
 |