日常更新

from NUC
This commit is contained in:
liubiren 2025-12-23 19:47:39 +08:00
parent 15a703cfe9
commit b1cf62bf8f
5 changed files with 457 additions and 78 deletions

Binary file not shown.

View File

@ -120,3 +120,55 @@ def general_text_recognize(image) -> str:
CREATE VIRTUAL TABLE IF NOT EXISTS jiojio_tokenizer USING fts5tokenizer(jiojio_fts5_module)
"""
)
{
"code": 0,
"status": 200,
"message": "success",
"serialNo": "3a08935648632621760512",
"data": [
{"desc": "金额", "value": "175.22"},
{
"desc": "项目名称",
"value": "*化学药品制剂*[海露]玻璃酸钠滴眼液0.1%*10ml支/盒",
},
{"desc": "数量", "value": "2"},
{"desc": "规格型号", "value": ""},
{"desc": "税额", "value": "22.78"},
{"desc": "税率", "value": "13%"},
{"desc": "单位", "value": ""},
{"desc": "单价", "value": "87.61"},
{"desc": "金额1", "value": "-69.42"},
{
"desc": "项目名称1",
"value": "*化学药品制剂*[海露]玻璃酸钠滴眼液0.1%*10ml/支/盒",
},
{"desc": "数量1", "value": ""},
{"desc": "规格型号1", "value": ""},
{"desc": "税额1", "value": "-9.02"},
{"desc": "税率1", "value": "13%"},
{"desc": "单位1", "value": ""},
{"desc": "单价1", "value": ""},
{"desc": "发票名称", "value": "电子发票(普通发票)"},
{"desc": "全电票标签", "value": ""},
{"desc": "发票号码", "value": "25447200000045325946"},
{"desc": "开票日期", "value": "2025年01月20日"},
{"desc": "购买方名称", "value": "唐敏华"},
{"desc": "购买方识别号", "value": ""},
{"desc": "销售方名称", "value": "广州美团大药房有限公司"},
{"desc": "销售方识别号", "value": "91440100MAC1CAJH27"},
{"desc": "合计金额", "value": "¥105.80"},
{"desc": "合计税额", "value": "¥13.76"},
{"desc": "金额小计", "value": ""},
{"desc": "税额小计", "value": ""},
{"desc": "价税合计(大写)", "value": "壹佰壹拾玖圆伍角陆分"},
{"desc": "小写金额", "value": "¥119.56"},
{"desc": "备注", "value": ""},
{"desc": "开票人", "value": "张景景"},
{"desc": "发票类型", "value": "电子发票(普通发票)"},
{"desc": "监制章存在性判断", "value": "True"},
{"desc": "总页数", "value": ""},
{"desc": "当前页数", "value": ""},
],
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 143 KiB

View File

@ -53,6 +53,10 @@ if __name__ == "__main__":
# 影像件识别使能
recognize_enabled = rule_engine(Path("rules/影像件识别使能.json"))
# 药店购药明细项不合理费用扣除
deduct_unreasonable_amount = rule_engine(
Path("rules/药店购药明细项不合理费用扣除.json")
)
class MasterData(SQLiteClient):
"""主数据"""
@ -784,10 +788,11 @@ if __name__ == "__main__":
boc_application_recognize(image)
# noinspection PyShadowingNames
def receipt_recognize(image) -> None:
def receipt_recognize(image, insurer_company) -> None:
"""
票据识别并整合至赔案档案
:param image: 影像件
:param insurer_company: 保险分公司
:return:
"""
@ -1082,79 +1087,163 @@ if __name__ == "__main__":
):
raise RuntimeError("请求深圳快瞳增值税发票识别接口发生异常")
# noinspection PyTypeChecker
receipt.update(
{
"票据号": query_value(response["data"], "发票号码"),
"票据代码": query_value(response["data"], "发票代码"),
"开票日期": datetime.strptime(
query_value(response["data"], "开票日期"),
"%Y年%m月%d",
),
"校验码": query_value(response["data"], "校验码"),
"开票金额": Decimal(
query_value(response["data"], "小写金额").replace(
"¥", ""
)
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"姓名": query_value(response["data"], "购买方名称"),
"购药及就医机构": query_value(
response["data"], "销售方名称"
),
"明细项": [
match receipt_type := query_value(response["data"], "发票类型"):
case "电子发票(普通发票)":
# noinspection PyTypeChecker
receipt.update(
{
"名称": name,
"数量": Decimal(quantity).quantize(
"票据号": query_value(
response["data"], "发票号码"
),
"票据代码": query_value(
response["data"], "发票代码"
),
"开票日期": datetime.strptime(
query_value(response["data"], "开票日期"),
"%Y年%m月%d",
),
"校验码": query_value(
response["data"], "校验码"
),
"开票金额": Decimal(
query_value(
response["data"], "小写金额"
).replace("¥", "")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"金额": (
Decimal(amount) + Decimal(tax)
).quantize(
Decimal("0.00"), rounding=ROUND_HALF_UP
), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal保留两位小数并求和
"姓名": query_value(
response["data"], "购买方名称"
),
"购药及就医机构": query_value(
response["data"], "销售方名称"
),
"明细项": [
{
"名称": name,
"数量": Decimal(quantity).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"金额": (
Decimal(amount) + Decimal(tax)
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal保留两位小数并求和
}
for name, quantity, amount, tax in zip(
[
x["value"]
for x in response["data"]
if re.match(
r"^项目名称(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^数量(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^金额(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^税额(\d+)?$",
x["desc"],
)
],
)
],
"备注": query_value(response["data"], "备注"),
}
for name, quantity, amount, tax in zip(
[
x["value"]
for x in response["data"]
if re.match(
r"^项目名称(\d+)?$",
x["desc"],
)
case "增值税普通发票(卷票)":
# noinspection PyTypeChecker
receipt.update(
{
"票据号": query_value(
response["data"], "发票号码"
),
"票据代码": query_value(
response["data"], "发票代码"
),
"开票日期": datetime.strptime(
query_value(response["data"], "开票日期"),
"%Y-%m-%d",
),
"校验码": query_value(
response["data"], "校验码"
),
"开票金额": Decimal(
query_value(
response["data"], "合计金额(小写)"
).replace("¥", "")
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"姓名": query_value(
response["data"], "购买方名称"
),
"购药及就医机构": query_value(
response["data"], "销售方名称"
),
"明细项": [
{
"名称": name,
"数量": Decimal(quantity).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"金额": Decimal(amount).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
), # 深圳快瞳票据识别接口中明细的金额和税额由字符串转为Decimal保留两位小数并求和
}
for name, quantity, amount in zip(
[
x["value"]
for x in response["data"]
if re.match(
r"^项目名称明细(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^项目数量明细(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^项目金额明细(\d+)?$",
x["desc"],
)
],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^数量(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^金额(\d+)?$",
x["desc"],
)
],
[
x["value"]
for x in response["data"]
if re.match(
r"^税额(\d+)?$",
x["desc"],
)
],
)
],
"备注": query_value(response["data"], "备注"),
}
)
"备注": query_value(response["data"], "备注"),
}
)
case "医疗门诊收费票据" | "医疗住院收费票据":
# 请求深圳快瞳医疗收费票据识别接口
response = http_client.post(
@ -1295,15 +1384,15 @@ if __name__ == "__main__":
receipt["购药及就医机构"]
)
# 根据影像件类型和购药及就医机构类型匹配购药及就医类型,就增值税发票且药店扣除不合理费用、增值税发票且私立医院解析个人自费、个人自付和医保支付
# 根据影像件类型和购药及就医机构类型匹配购药及就医类型
match (image["影像件类型"], receipt["购药及就医机构类型"]):
# 就增值税发票且药店扣除不合理费用、增值税发票且私立医院解析个人自费、个人自付、医保支付、不合理金额和合理金额
case ("增值税发票", "药店"):
receipt["购药及就医类型"] = "药店购药"
# 就相同明细项合并其数量和金额
items = (
pandas.DataFrame(receipt["明细项"])
.groupby("名称")
.groupby("名称") # 就相同明细项名称合并数量和金额
.agg(数量=("数量", "sum"), 金额=("金额", "sum"))
.loc[
lambda dataframe: dataframe["金额"] != 0
@ -1321,20 +1410,60 @@ if __name__ == "__main__":
.rename(columns={0: "类别", 1: "药品/医疗服务"})
)
)
.assign(
合理金额=lambda dataframe: dataframe.apply(
lambda row: Decimal(
deduct_unreasonable_amount.evaluate(
{
"insurer_company": insurer_company,
"category": row["类别"],
"medicine": row["药品/医疗服务"],
"amount": format(row["金额"], ".2f"),
}
)["result"]["reasonable_amount"]
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
axis="columns",
)
) # 药店购药明细项不合理费用扣除
)
print(items)
print()
receipt.update(
{
"个人自费": Decimal("0.00"),
"个人自付": Decimal("0.00"),
"医保支付": Decimal("0.00"),
"不合理金额": Decimal(
receipt["开票金额"] - items["合理金额"].sum()
).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"合理金额": Decimal(items["合理金额"].sum()).quantize(
Decimal("0.00"),
rounding=ROUND_HALF_UP,
),
"明细项": items.to_dict("records"),
}
)
# TODO: 后续完善
case ("增值税发票", "私立医院"):
receipt["购药及就医类型"] = "门诊就医"
# TODO: 后续完善
case ("医疗门诊收费票据", "公立医院"):
receipt["购药及就医类型"] = "门诊就医"
# TODO: 后续完善
case ("医疗住院收费票据", "公立医院"):
receipt["购药及就医类型"] = "住院治疗"
# TODO: 若根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常则流转至人工处理
case _:
# TODO: 若匹配购药及就医类型发生异常则流转至人工处理
raise RuntimeError("匹配购药及就医类型发生异常")
raise RuntimeError(
"根据影像件类型和购药及就医机构类型匹配购药及就医类型发生异常"
)
dossier["票据层"].append(receipt)
# noinspection PyShadowingNames
def bank_card_recognize(image) -> None:
@ -1375,7 +1504,7 @@ if __name__ == "__main__":
}
)
# 影像件识别使能检查,若影像件不识别则跳过
# 检查影像件识别使能,若影像件不识别则跳过
if not recognize_enabled.evaluate(
{
"insurer_company": insurer_company,
@ -1402,7 +1531,7 @@ if __name__ == "__main__":
application_recognize(image, insurer_company)
case "增值税发票" | "医疗门诊收费票据" | "医疗住院收费票据":
# 票据识别并整合至赔案档案
receipt_recognize(image)
receipt_recognize(image, insurer_company)
case "银行卡":
# 银行卡识别并整合至赔案档案
bank_card_recognize(image)
@ -1425,7 +1554,7 @@ if __name__ == "__main__":
"出险人层": {},
"被保险人层": [],
"受益人层": {},
"费项": [],
"票据": [],
}
# 遍历赔案目录中影像件
@ -1493,6 +1622,11 @@ if __name__ == "__main__":
insurer_company,
)
for receipt in dossier["票据层"]:
print(receipt)
print(dossier["被保险人层"])
"""

View File

@ -0,0 +1,193 @@
# -*- coding: utf-8 -*-
"""
营销短视频生成自动化
功能清单
1打开并读取任务
2按照初始化剪映草稿分别添加视频轨音频轨字幕轨和图片轨等生成视频执行任务
"""
import asyncio
from pathlib import Path
import edge_tts
async def audio_gen(
text: str, output_path: Path, voice_name: str = "zh-CN-XiaoxiaoNeural"
):
# 可用的中文语音列表
# chinese_voices = {
# "女声-晓晓": "zh-CN-XiaoxiaoNeural", # 年轻女声,自然
# "女声-晓辰": "zh-CN-XiaochenNeural", # 年轻女声,温暖
# "女声-晓墨": "zh-CN-XiaomoNeural", # 女声,富有表现力
# "男声-晓悠": "zh-CN-XiaoyouNeural", # 儿童音,可爱
# "男声-晓涵": "zh-CN-XiaohanNeural", # 年轻男声,温暖
# "男声-晓睿": "zh-CN-XiaoruiNeural", # 成熟男声,沉稳
# "女声-晓倩": "zh-CN-XiaoqianNeural", # 女声,成熟
# "男声-晓东": "zh-CN-XiaodongNeural", # 男声,权威
# }
# 生成语音
communicate = edge_tts.Communicate(
text, voice_name, rate="+0%", pitch="+0Hz", volume="+0%"
)
await communicate.save(output_path)
print(f"语音已保存到: {output_path}")
# ----
import pycapcut as capcut
from pycapcut import KeyframeProperty, SEC
from pycapcut import FontType, TextStyle, ClipSettings
from typing import List
import srt
async def video_merge_audio_and_caption(
video_path: Path,
audioPathList: List[Path],
image_path: Path,
subtitleList: List[srt.Subtitle],
output_path: Path,
):
PROJECT_DIR = "C:\\Users\\z4938\\AppData\\Local\\JianyingPro\\User Data\\Projects\\com.lveditor.draft"
draft_folder = capcut.DraftFolder(PROJECT_DIR)
script = draft_folder.create_draft("新草稿", 1920, 1080)
print("成功创建 draft剪映项目草稿")
# 添加视频轨
video_track_id = 1
script.add_track(
capcut.TrackType.video, track_name="视频", relative_index=video_track_id
)
video_mat = capcut.VideoMaterial(video_path)
video_seg = capcut.VideoSegment(video_mat, capcut.Timerange(0, video_mat.duration))
video_seg.add_keyframe(KeyframeProperty.alpha, video_seg.duration - SEC, 1.0)
video_seg.add_keyframe(KeyframeProperty.alpha, video_seg.duration, 0.0)
video_seg.volume = 0
script.add_segment(video_seg, "视频")
print(
f"视频素材导入成功素材ID{video_seg.material_id}, 片段id{video_seg.segment_id}"
)
# 添加logo图片轨
logo_track_id = 2
script.add_track(
capcut.TrackType.video, track_name="logo图片", relative_index=logo_track_id
)
logo_mat = capcut.VideoMaterial(image_path)
logo_seg = capcut.VideoSegment(logo_mat, capcut.Timerange(0, video_mat.duration))
logo_seg.add_keyframe(KeyframeProperty.alpha, video_seg.duration - SEC, 1.0)
logo_seg.add_keyframe(KeyframeProperty.alpha, video_seg.duration, 0.0)
script.add_segment(logo_seg, "logo图片")
print(f"logo导入成功视频轨道id{logo_track_id}")
# 添加音频轨
script.add_track(capcut.TrackType.audio, track_name="音频", relative_index=3)
for audio_path in audioPathList:
audio_mat = capcut.AudioMaterial(audio_path)
audio_seg = capcut.AudioSegment(
audio_mat,
capcut.Timerange(0, audio_mat.duration),
source_timerange=capcut.Timerange(0, audio_mat.duration),
)
script.add_segment(audio_seg, "音频")
print(
f"音频素材导入成功素材ID{audio_seg.material_id}, 片段id{audio_seg.segment_id}"
)
# 添加字幕轨
script.add_track(capcut.TrackType.text, track_name="字幕", relative_index=4)
for subtitle in subtitleList:
text_seg = capcut.TextSegment(
subtitle.content,
capcut.Timerange(
subtitle.start.total_seconds(), subtitle.end.total_seconds()
),
font=FontType.下午茶,
style=TextStyle(
size=5.0,
color=(0.7, 0.7, 1.0),
auto_wrapping=True,
underline=True,
align=1,
),
clip_settings=ClipSettings(transform_y=-0.8),
)
script.add_segment(text_seg, "字幕")
print("字幕添加成功")
export_result = script.export(
output_path=output_path, resolution="1080p", fps=30, quality="high"
)
print(f"视频导出完成result: {export_result}, 成品路径:{output_path}")
# ----
from datetime import timedelta
from moviepy import AudioFileClip
async def main():
TEXT_PATH_STR = "D:\\develop\\trkj\\resources\\tts_text.txt"
AUDIO_PATH_STR_FMT = "D:\\develop\\trkj\\resources\\audio_tts_{}.aac"
SRT_PATH_STR = "D:\\develop\\trkj\\resources\\tts_text_srt.srt"
VIDEO_PATH_STR = "D:\\develop\\trkj\\resources\\test_video.mp4"
IMAGE_PATH_STR = "D:\\develop\\trkj\\resources\\logo.png"
OUTPUT_PATH_STR = "D:\\develop\\trkj\\resources\\video_output.mp4"
videoPath = Path(VIDEO_PATH_STR)
audioPathList = list()
subtitleList = list()
imagePath = Path(IMAGE_PATH_STR)
outputPath = Path(OUTPUT_PATH_STR)
with open(TEXT_PATH_STR, "r", encoding="UTF-8") as text_f:
line = text_f.readline()
i = 0
next_start_seconds = timedelta(seconds=0.0)
while line:
audioPath = AUDIO_PATH_STR_FMT.format(i)
# 每行生成音频
await audio_gen(line, audioPath)
audioPathList.append(audioPath)
# 创建音频对应的字幕
audio = AudioFileClip(audioPath)
try:
current_start_seconds = next_start_seconds
current_end_seconds = next_start_seconds + timedelta(
seconds=audio.duration
)
subtitle = srt.Subtitle(
index=i + 1,
start=current_start_seconds,
end=current_end_seconds,
content=line.strip(),
)
subtitleList.append(subtitle)
finally:
audio.close()
line = text_f.readline()
i = i + 1
next_start_seconds = current_end_seconds
srt_content = srt.compose(subtitleList)
with open(SRT_PATH_STR, "w", encoding="UTF-8") as srt_file:
srt_file.write(srt_content)
print(f"字幕文件已生成: {SRT_PATH_STR}")
await video_merge_audio_and_caption(
videoPath, audioPathList, imagePath, subtitleList, outputPath
)
if __name__ == "__main__":
asyncio.run(main())