788 lines
31 KiB
Python
788 lines
31 KiB
Python
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
普康健康_发票查验
|
||
"""
|
||
|
||
# 加载模块
|
||
|
||
import hashlib
|
||
import json
|
||
import shutil
|
||
import uuid
|
||
from base64 import b64decode, b64encode
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
from pathlib import Path
|
||
|
||
import cv2
|
||
import numpy
|
||
import pandas
|
||
|
||
from utils.client import Authenticator, HTTPClient, RequestException, restrict
|
||
from utils.pandas_extension import open_csv, save_as_workbook, traverse_directory
|
||
|
||
|
||
# 影像件压缩
|
||
def image_compression(
|
||
image_path: str | None = None,
|
||
image_format: str | None = None,
|
||
image_data: bytes | None = None, # 数据类型为包含图像文件的二进制数据的字节串
|
||
image_size_specified: int = 2, # 指定影像件大小
|
||
raw: bool = False, # 250804新增返回是否为完整URI数据格式
|
||
) -> str | None:
|
||
|
||
try:
|
||
|
||
# 若影像件路径数据类型为STR则创建路径对象
|
||
if isinstance(image_path, str):
|
||
image_path = Path(image_path)
|
||
# 影像件文件名称后缀
|
||
image_format = image_path.suffix.strip().lstrip(".").lower()
|
||
|
||
# 读取影像件数据
|
||
with open(image_path, "rb") as image:
|
||
image_data = image.read()
|
||
|
||
# 影像件数据BASE64编码
|
||
image_data_base64 = b64encode(image_data).decode("utf-8")
|
||
|
||
# 指定影像件大小的单位由MB转为KB
|
||
image_size_specified = image_size_specified * 1024 * 1024
|
||
|
||
# 若影像件大小小于指定影像件大小则返回BASE64编码后影像件数据
|
||
if len(image_data_base64) < image_size_specified:
|
||
if raw:
|
||
# 返回非完整URI数据格式
|
||
return image_data_base64
|
||
else:
|
||
# 返回完整URI数据格式
|
||
return f"data:image/{image_format};base64,{image_data_base64}"
|
||
|
||
# OPENCV解码(数据类型为NUMPY-UINT8)
|
||
image_data_cv2 = cv2.imdecode(
|
||
numpy.frombuffer(image_data, numpy.uint8), cv2.IMREAD_COLOR
|
||
)
|
||
|
||
# 若OPENCV解码失败则抛出异常
|
||
if image_data_cv2 is None:
|
||
raise RuntimeError(f"OPENCV解码发生异常")
|
||
|
||
# 初始化近似BASE64编码后影像件数据
|
||
proximate_image_data_base64 = None
|
||
|
||
# 初始化最小压缩前后影像件大小差值
|
||
min_image_size_difference = float("inf")
|
||
|
||
# 基于双层压缩方法:先外层降低图像质量,再内层缩小图像尺寸
|
||
for quality in range(90, 0, -10):
|
||
|
||
image_data_cv2_ = image_data_cv2.copy()
|
||
|
||
# 根据影像件格式匹配图片质量配置
|
||
# noinspection PyUnreachableCode
|
||
match image_format:
|
||
case "png":
|
||
encoding_params = [cv2.IMWRITE_PNG_COMPRESSION, 10 - quality // 10]
|
||
case _:
|
||
encoding_params = [cv2.IMWRITE_JPEG_QUALITY, quality]
|
||
|
||
for i in range(25):
|
||
# 降低图像质量
|
||
# noinspection PyTypeChecker
|
||
success, image_data_encoded = cv2.imencode(
|
||
image_format, image_data_cv2_, encoding_params
|
||
) # 图像编码
|
||
# 若图像编码失败则退出
|
||
if not success:
|
||
break
|
||
image_data_base64 = b64encode(image_data_encoded.tobytes()).decode(
|
||
"utf-8"
|
||
)
|
||
|
||
# 压缩前后影像件大小差值
|
||
image_size_difference = len(image_data_base64) - image_size_specified
|
||
|
||
if image_size_difference <= 0:
|
||
if raw:
|
||
return image_data_base64
|
||
else:
|
||
return f"data:image/{image_format};base64,{image_data_base64}"
|
||
|
||
if image_size_difference < min_image_size_difference:
|
||
min_image_size_difference = image_size_difference
|
||
proximate_image_data_base64 = image_data_base64
|
||
|
||
# 影像件高度和宽度
|
||
image_height, image_weight = image_data_cv2_.shape[:2]
|
||
|
||
# 若仍超过影像件指定大小则调整图像尺寸
|
||
image_data_cv2_ = cv2.resize(
|
||
image_data_cv2_,
|
||
dsize=(int(image_weight * 0.9), int(image_height * 0.9)),
|
||
interpolation=cv2.INTER_AREA,
|
||
)
|
||
|
||
if proximate_image_data_base64:
|
||
if raw:
|
||
return proximate_image_data_base64
|
||
else:
|
||
return f"data:image/{image_format};base64,{image_data_base64}"
|
||
else:
|
||
raise RuntimeError("影像件压缩失败")
|
||
|
||
except:
|
||
return None
|
||
|
||
|
||
# 票据查验接口(需要)
|
||
@restrict(refill_rate=5, max_tokens=5) # 限速至5QPS
|
||
def invoices_verification(
|
||
image_index,
|
||
image_path=None,
|
||
invoice_number=None,
|
||
invoice_code=None,
|
||
invoice_check_code=None,
|
||
invoice_date=None,
|
||
invoice_amount=None,
|
||
id_number=None,
|
||
process_mode=None,
|
||
supplier=None,
|
||
):
|
||
|
||
try:
|
||
|
||
# 若影像件地址非空则imgBASE64请求,否则根据发票五要素请求
|
||
if image_path:
|
||
|
||
match process_mode:
|
||
case "通过影像件本地地址":
|
||
|
||
# 创建路径对象
|
||
image_path = Path(image_path)
|
||
|
||
# 影像件文件名称后缀
|
||
image_format = image_path.suffix.strip().lstrip(".").lower()
|
||
|
||
# 读取影像件数据
|
||
with open(image_path, "rb") as image:
|
||
image_data = image.read()
|
||
|
||
case "通过影像件对象服务器地址":
|
||
image_format, image_data = http_client.download(url=image_path)
|
||
|
||
# 断定影像件格式为JGP、JPEG或者PNG
|
||
# noinspection PyUnboundLocalVariable
|
||
assert image_format in [
|
||
"jpg",
|
||
"jpeg",
|
||
"png",
|
||
], f"影像件格式({image_format})不支持"
|
||
|
||
match supplier:
|
||
case "szkt":
|
||
|
||
image_data_base64 = image_compression(
|
||
image_format=image_format, image_data=image_data
|
||
)
|
||
|
||
# noinspection PyUnusedLocal
|
||
response = http_client.post(
|
||
# 深圳快瞳增值税发票、医疗发票查验兼容版
|
||
url="https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll",
|
||
# 用于和深圳快瞳联查时定位请求
|
||
headers={"X-RequestId-Header": image_index},
|
||
data={
|
||
"token": authenticator.get_token(servicer="szkt"),
|
||
"imgBase64": image_data_base64,
|
||
},
|
||
)
|
||
case "bjfd":
|
||
|
||
image_data_base64 = image_compression(
|
||
image_format=image_format, image_data=image_data, raw=True
|
||
) # 北京分单不支持完整URI数据格式
|
||
|
||
# 业务入参,序列化并BASE64编码
|
||
data = b64encode(
|
||
json.dumps(
|
||
{
|
||
"fileByte": image_data_base64,
|
||
"fileType": (
|
||
"png" if image_format == "png" else "jpg"
|
||
), # 北京分单影像件格式支持JPG、PNG或PDF(本脚本暂不支持PDF)
|
||
}
|
||
).encode("utf-8")
|
||
).decode("utf-8")
|
||
|
||
# 应用账号
|
||
appid = "mbYr11Rc_42"
|
||
|
||
# 随机标识
|
||
noise = image_index
|
||
|
||
# 版本号
|
||
version = "1.0"
|
||
|
||
# 装配签名
|
||
sign = (
|
||
hashlib.md5(
|
||
f"appid={appid}&data={data}&noise={noise}&key=80357535c95333c3b133dfe5533f6334fe5e9321&version={version}".encode(
|
||
"utf-8"
|
||
)
|
||
)
|
||
.hexdigest()
|
||
.upper()
|
||
)
|
||
|
||
# noinspection PyUnusedLocal
|
||
response = http_client.post(
|
||
# 北京分单增值税发票、医疗票据二维码查验接口
|
||
url="https://api.fendanyun.com/rsx/api/checkByQRCode",
|
||
headers={"Content-Type": "application/json; charset=utf-8"},
|
||
json={
|
||
"appid": appid,
|
||
"data": data,
|
||
"noise": noise,
|
||
"version": version,
|
||
"sign": sign,
|
||
},
|
||
)
|
||
|
||
else:
|
||
|
||
response = http_client.post(
|
||
# 深圳快瞳增值税发票、医疗发票查验兼容版
|
||
url="https://ai.inspirvision.cn/s/api/ocr/invoiceCheckAll",
|
||
data={
|
||
"token": authenticator.get_token(servicer="szkt"),
|
||
"invoiceNumber": invoice_number,
|
||
"invoiceCode": invoice_code,
|
||
"checkCode": invoice_check_code,
|
||
"invoicingDate": invoice_date,
|
||
"pretaxAmount": invoice_amount,
|
||
"idCardNo": id_number,
|
||
},
|
||
)
|
||
|
||
except RequestException as request_exception:
|
||
response = {
|
||
"status": request_exception.status,
|
||
"code": request_exception.code,
|
||
"message": request_exception.message,
|
||
}
|
||
|
||
except Exception as exception:
|
||
response = {
|
||
"code": "40000",
|
||
"message": f"发生其它异常{exception}",
|
||
}
|
||
|
||
return image_index, response
|
||
|
||
|
||
if __name__ == "__main__":
|
||
|
||
print("已启动批量票据查验")
|
||
|
||
match input("请选择票据查验供应商(1:深圳快瞳,2:北京分单,其它任意字符:退出脚本):"):
|
||
case "1":
|
||
supplier = "szkt"
|
||
case "2":
|
||
supplier = "bjfd"
|
||
case _:
|
||
print("选择退出脚本!")
|
||
exit(0)
|
||
|
||
match input(
|
||
"请选择处理流程(1:批量解析已归档响应报文,2:根据影像件地址或票据五要素批量查验,其它任意字符:退出脚本):"
|
||
):
|
||
case "1":
|
||
# 打开前置影像件索引CSV文件
|
||
dataframe = open_csv(file_name="dataframe_indexed.csv")
|
||
case "2":
|
||
print("正在归档响应报文...", end="")
|
||
|
||
# 创建响应报文目录路径对象
|
||
responses_path = Path("temporary/responses")
|
||
|
||
# 若响应报文目录路径不存在则创建
|
||
if not responses_path.exists():
|
||
responses_path.mkdir(parents=True, exist_ok=True)
|
||
|
||
# 创建归档响应报文目录路径对象
|
||
archives_path = Path("temporary/archives")
|
||
|
||
# 若归档响应报文目录路径不存在则创建
|
||
if not archives_path.exists():
|
||
archives_path.mkdir(parents=True, exist_ok=True)
|
||
|
||
# 遍历响应报文目录下所有文件名后缀为JSON的文件路径
|
||
for file_path in Path(responses_path).glob("*.json"):
|
||
# 若文件路径为文件
|
||
if file_path.is_file():
|
||
# 移动响应报文由响应报文目录至归档响应报文目录
|
||
shutil.move(str(file_path), str(archives_path / file_path.name))
|
||
|
||
print("已完成")
|
||
|
||
match input(
|
||
"请选择批量查验方法(1:通过影像件本地地址,2:通过影像件对象服务器地址,3:通过增值税发票和医疗票据的五要素,其它任意字符:退出脚本):"
|
||
):
|
||
case "1":
|
||
print("正在读取影像件本地地址...", end="")
|
||
|
||
dataframe = traverse_directory(
|
||
directory_path="待查验发票", suffixes=[".jpg", ".jpeg", ".png"]
|
||
)
|
||
|
||
# 修改列名相对路径为影像件地址
|
||
dataframe.rename(columns={"相对路径": "影像件地址"}, inplace=True)
|
||
|
||
process_mode = "通过影像件本地地址"
|
||
|
||
case "2":
|
||
print("正在读取影像件对象服务器地址...", end="")
|
||
|
||
dataframe = open_csv(file_name="dataframe.csv")
|
||
|
||
# 断定列名包括赔案编号、发票编号和影像件地址
|
||
assert all(
|
||
[
|
||
column_name in dataframe.columns
|
||
for column_name in ["赔案编号", "发票编号", "影像件地址"]
|
||
]
|
||
), "CSV文件中列名必须包括赔案编号、发票编号和影像件地址"
|
||
|
||
# 根据赔案编号和发票编号去重
|
||
dataframe.drop_duplicates(
|
||
subset=["赔案编号", "发票编号"], keep="first", inplace=True
|
||
)
|
||
|
||
# 处理方式
|
||
process_mode = "通过影像件对象服务器地址"
|
||
|
||
case "3":
|
||
print("正在读取增值税发票和医疗票据的五要素...", end="")
|
||
|
||
dataframe = open_csv(file_name="dataframe.csv")
|
||
|
||
# 断定列名包括身份证号码后六位、发票编号、发票代码、校验号码后六位、开票日期和发票金额
|
||
assert all(
|
||
[
|
||
column_name in dataframe.columns
|
||
for column_name in [
|
||
"身份证号码后六位",
|
||
"发票编号",
|
||
"发票代码",
|
||
"校验号码后六位",
|
||
"开票日期",
|
||
"发票金额",
|
||
]
|
||
]
|
||
), "CSV文件中列名必须包括身份证号码后六位、发票编号、发票代码、校验号码后六位、开票日期和发票金额"
|
||
|
||
# 根据身份证号码后六位、发票编号、发票代码、校验号码后六位、开票日期和发票金额去重
|
||
dataframe.drop_duplicates(
|
||
subset=[
|
||
"身份证号码后六位",
|
||
"发票编号",
|
||
"发票代码",
|
||
"校验号码后六位",
|
||
"开票日期",
|
||
"发票金额",
|
||
],
|
||
keep="first",
|
||
inplace=True,
|
||
)
|
||
|
||
# 格式化开票日期
|
||
dataframe["开票日期"] = dataframe["开票日期"].str.replace(
|
||
"-", "", regex=False
|
||
)
|
||
|
||
# 处理方式
|
||
process_mode = "通过增值税发票和医疗票据的五要素"
|
||
|
||
case _:
|
||
print("选择退出脚本!")
|
||
exit(0)
|
||
|
||
# 统计待查验发票张数
|
||
rows = dataframe.shape[0]
|
||
|
||
# 若待查验发票张数为0则退出脚本
|
||
if rows == 0:
|
||
print("待查验发票张数为0,退出脚本")
|
||
exit(0)
|
||
|
||
print(f"已完成,待查验发票张数为 {rows}")
|
||
|
||
# 添加索引
|
||
dataframe["索引"] = dataframe.apply(
|
||
lambda x: uuid.uuid4().hex, axis="columns"
|
||
)
|
||
|
||
dataframe.to_csv("dataframe_indexed.csv", index=False)
|
||
|
||
# 创建深圳快瞳获取访问令牌方法
|
||
authenticator = Authenticator()
|
||
|
||
# 初始化请求客户端
|
||
http_client = HTTPClient()
|
||
|
||
# 用于记录已完成任务数
|
||
completed_futures = 0
|
||
|
||
# 创建线程池
|
||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||
|
||
# noinspection PyUnreachableCode
|
||
# noinspection PyUnboundLocalVariable
|
||
match process_mode:
|
||
|
||
case "通过影像件本地地址" | "通过影像件对象服务器地址":
|
||
|
||
futures = [
|
||
executor.submit(
|
||
invoices_verification,
|
||
image_index=row.索引,
|
||
image_path=row.影像件地址,
|
||
process_mode=process_mode,
|
||
supplier=supplier,
|
||
)
|
||
for row in dataframe[["索引", "影像件地址"]].itertuples(
|
||
index=False, name="row"
|
||
)
|
||
]
|
||
|
||
case "通过增值税发票和医疗票据的五要素":
|
||
|
||
# 提交任务
|
||
futures = [
|
||
executor.submit(
|
||
invoices_verification,
|
||
image_index=row.索引,
|
||
invoice_number=row.发票编号,
|
||
invoice_code=row.发票代码,
|
||
invoice_check_code=row.校验号码后六位,
|
||
invoice_date=row.开票日期,
|
||
invoice_amount=row.发票金额,
|
||
id_number=row.身份证号码后六位,
|
||
process_mode=process_mode,
|
||
supplier=supplier,
|
||
)
|
||
for row in dataframe[
|
||
[
|
||
"索引",
|
||
"发票编号",
|
||
"发票代码",
|
||
"校验号码后六位",
|
||
"开票日期",
|
||
"发票金额",
|
||
"身份证号码后六位",
|
||
]
|
||
].itertuples(index=False, name="row")
|
||
]
|
||
|
||
for future in as_completed(futures):
|
||
index, response = future.result()
|
||
|
||
# 保存报文
|
||
with open(
|
||
"temporary/responses/{}.json".format(index),
|
||
"w",
|
||
encoding="utf-8",
|
||
) as file:
|
||
json.dump(response, file, ensure_ascii=False)
|
||
|
||
completed_futures += 1
|
||
|
||
print(f"已完成 {completed_futures / rows * 100:.2f} %")
|
||
|
||
case _:
|
||
print("选择退出脚本!")
|
||
exit(0)
|
||
|
||
print("正在解析报文...", end="")
|
||
|
||
# 解析后数据体
|
||
dataframe_parsed = []
|
||
|
||
# 遍历报文所在目录
|
||
for path_object in list(Path("temporary/responses").glob("*.json")):
|
||
|
||
# 解析报文结构
|
||
parse = {
|
||
"索引": "",
|
||
"机打发票号码": "",
|
||
"发票金额": "",
|
||
"购买方": "",
|
||
"销售方": "",
|
||
"发票状态": "",
|
||
"最大销售项目名称": "",
|
||
"最大销售项目数量": "",
|
||
"XML版式文件": "",
|
||
}
|
||
|
||
# 若路径对象包含下划线则在解析报文结构添加赔案编号和发票编号
|
||
if "_" in path_object.stem:
|
||
|
||
parse["赔案编号"] = path_object.stem.split("_")[0]
|
||
|
||
parse["发票编号"] = path_object.stem.split("_")[1]
|
||
|
||
# 打开报文并JSON逆序列化
|
||
with open(path_object, "r", encoding="utf-8") as file:
|
||
response = json.load(file)
|
||
|
||
# 索引
|
||
parse["索引"] = path_object.stem
|
||
|
||
match supplier:
|
||
case "szkt":
|
||
try:
|
||
|
||
# 响应状态码
|
||
status_code = response.get("status", "")
|
||
|
||
# 错误码
|
||
code = response.get("code", "")
|
||
|
||
# 流水号
|
||
serial = response.get("serialNo", "")
|
||
|
||
# 若响应状态码为200且错误码为10000,则定义为响应成功
|
||
if status_code == 200 and code == 10000:
|
||
|
||
# 查验类型,若查验类型为003081则为医疗票据查验,003082则为增值税发票查验,两者报文结构不一致
|
||
match response.get("data").get(
|
||
"productCode"
|
||
): # 若响应成功则必定存在键DATA和PRODUCTCODE
|
||
# 解析医疗票据查验结果
|
||
case "003081":
|
||
|
||
parse["机打发票号码"] = response.get("data").get(
|
||
"billNumber"
|
||
)
|
||
|
||
parse["校验码"] = response.get("data").get("checkCode")
|
||
|
||
parse["发票金额"] = response.get("data").get("amount")
|
||
|
||
parse["购买方"] = response.get("data").get("payer")
|
||
|
||
parse["销售方"] = response.get("data").get("payeeName")
|
||
|
||
# 发票状态
|
||
match response.get("data").get("flushedRed"):
|
||
case "true":
|
||
parse["发票状态"] = "正常"
|
||
case "false":
|
||
parse["发票状态"] = "已红冲"
|
||
|
||
# 最大销售项目数量
|
||
max_item_quantity = 0
|
||
|
||
# 遍历销售项目列表
|
||
for item in response.get("data").get("feeitems", []):
|
||
# 销售项目数量
|
||
item_quantity = item.get("number")
|
||
|
||
# 若销售项目数量非空,进一步判断是否包含斜杠,若包含斜杠则分割并取第一部分,最后转为浮点
|
||
if item_quantity:
|
||
if "/" in item_quantity:
|
||
item_quantity = item_quantity.split("/")[0]
|
||
|
||
item_quantity = float(item_quantity)
|
||
else:
|
||
item_quantity = 1
|
||
|
||
if item_quantity > max_item_quantity:
|
||
parse["最大销售项目名称"] = item.get(
|
||
"itemName", ""
|
||
)
|
||
|
||
parse["最大销售项目数量"] = str(item_quantity)
|
||
|
||
parse["XML版式文件"] = response.get("PDFInfo", {}).get(
|
||
"fileUrl"
|
||
)
|
||
|
||
# 解析增值税发票查验结果
|
||
case "003082":
|
||
|
||
parse["机打发票号码"] = (
|
||
response.get("data").get("details").get("number")
|
||
)
|
||
|
||
parse["校验码"] = (
|
||
response.get("data")
|
||
.get("details")
|
||
.get("check_code")
|
||
)
|
||
|
||
parse["发票金额"] = (
|
||
response.get("data").get("details").get("total")
|
||
)
|
||
|
||
parse["购买方"] = (
|
||
response.get("data").get("details").get("buyer")
|
||
)
|
||
|
||
parse["销售方"] = (
|
||
response.get("data").get("details").get("seller")
|
||
)
|
||
|
||
# 发票状态
|
||
match response.get("data").get("details").get(
|
||
"invoiceTypeNo"
|
||
):
|
||
case "0":
|
||
parse["发票状态"] = "正常"
|
||
case "1":
|
||
parse["发票状态"] = "无法查验"
|
||
case "2" | "3" | "7" | "8":
|
||
parse["发票状态"] = "已红冲"
|
||
|
||
max_item_quantity = 0
|
||
|
||
items = (
|
||
response.get("data").get("details").get("items", [])
|
||
)
|
||
|
||
for item in items:
|
||
item_quantity = (
|
||
float(item.get("quantity", 1))
|
||
if item.get("quantity")
|
||
else 1
|
||
)
|
||
|
||
if item_quantity > max_item_quantity:
|
||
parse["最大销售项目名称"] = item.get("name")
|
||
|
||
parse["最大销售项目数量"] = str(item_quantity)
|
||
|
||
# XML版式文件(25-06-11本接口不在提供版式文件,通过另一接口可获取数电增值税发票版式文件)
|
||
parse["XML版式文件"] = "本接口不再提供版式文件"
|
||
|
||
# 若响应状态码为400且错误码为10001或10100,则定义为假票
|
||
elif status_code == 400 and (code == 10001 or code == 10100):
|
||
parse["发票状态"] = "假票"
|
||
|
||
else:
|
||
raise Exception("解析报文发生其它异常")
|
||
|
||
except Exception as exception:
|
||
|
||
parse["发票状态"] = "{}".format(response.get("message"))
|
||
|
||
case "bjfd":
|
||
try:
|
||
|
||
# 不验签,业务出参BASE64解码并反序列化
|
||
response = json.loads(
|
||
b64decode(response.get("data")).decode("utf-8")
|
||
)
|
||
|
||
# 增值税发票、医疗票据查验结果BASE64解码并反序列化
|
||
response["message"] = json.loads(
|
||
b64decode(response.get("message")).decode("utf-8")
|
||
)
|
||
|
||
# 错误码
|
||
code = response.get("result")
|
||
|
||
# 流水号
|
||
serial = response.get("message").get("checkId")
|
||
|
||
# 核验结果代码
|
||
result_code = response.get("message").get("resultCode")
|
||
|
||
# 若错误码为S0000则定义为响应成功
|
||
if code == "S0000":
|
||
# noinspection PyUnreachableCode
|
||
match result_code:
|
||
# 若查验成功则根据增值税发票、医疗票据状态匹配发票状态
|
||
case "200":
|
||
|
||
parse["机打发票号码"] = (
|
||
response.get("message")
|
||
.get("tickMainInfo")
|
||
.get("invoiceNo")
|
||
)
|
||
|
||
parse["发票金额"] = (
|
||
response.get("message")
|
||
.get("tickMainInfo")
|
||
.get("invoiceTotalPrice")
|
||
)
|
||
|
||
parse["购买方"] = (
|
||
response.get("message")
|
||
.get("tickMainInfo")
|
||
.get("payerPartyName")
|
||
)
|
||
|
||
parse["销售方"] = (
|
||
response.get("message")
|
||
.get("tickMainInfo")
|
||
.get("invoicingPartyName")
|
||
)
|
||
|
||
max_item_quantity = 0
|
||
|
||
for item in (
|
||
response.get("message")
|
||
.get("tickMainInfo")
|
||
.get("chargeItems", [])
|
||
):
|
||
item_quantity = (
|
||
float(item.get("num", 1))
|
||
if item.get("num")
|
||
else 1
|
||
)
|
||
|
||
if item_quantity > max_item_quantity:
|
||
parse["最大销售项目名称"] = item.get(
|
||
"chargeName"
|
||
)
|
||
parse["最大销售项目数量"] = str(item_quantity)
|
||
|
||
match response.get("message").get("invoiceStatus"):
|
||
case "0":
|
||
parse["发票状态"] = "正常"
|
||
case (
|
||
"1" | "2"
|
||
): # 沿用深圳快瞳解析规则,北京分单已开红票和已作废映射为已红冲
|
||
parse["发票状态"] = "已红冲"
|
||
|
||
case "E20003" | "E20007 ":
|
||
parse["发票状态"] = "假票"
|
||
|
||
# 其它情况发票状态根据核验结果描述
|
||
case _:
|
||
parse["发票状态"] = response.get("message").get(
|
||
"resultMsg"
|
||
)
|
||
|
||
except Exception as exception:
|
||
parse["发票状态"] = str(exception)
|
||
|
||
dataframe_parsed.append(parse)
|
||
|
||
dataframe_parsed = pandas.DataFrame(data=dataframe_parsed, dtype=str)
|
||
|
||
# 将解析数据集拼接至数据集
|
||
dataframe = dataframe.merge(right=dataframe_parsed, how="left", on=["索引"])
|
||
|
||
# 填补缺失值
|
||
dataframe = dataframe.fillna(value="")
|
||
|
||
print("已完成")
|
||
|
||
print("正在保存为工作簿...", end="")
|
||
|
||
save_as_workbook(worksheets=[("Sheet1", dataframe)], workbook_name="results.xlsx")
|
||
|
||
print("已完成")
|