From 9df91dac94a5181ce672a6d327c6064254322986 Mon Sep 17 00:00:00 2001
From: liubiren <marslbr@qq.com>
Date: Thu, 11 Dec 2025 16:38:34 +0800
Subject: [PATCH] =?UTF-8?q?=E6=97=A5=E5=B8=B8=E6=9B=B4=E6=96=B0=20from=20m?=
 =?UTF-8?q?ac=20mini(m1)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test.py                |  18 ++
 票据理赔自动化/main.py | 530 ++++++++++++++++++++---------------------
 票据理赔自动化/test.py | 221 -----------------
 3 files changed, 275 insertions(+), 494 deletions(-)
 create mode 100644 test.py
 delete mode 100644 票据理赔自动化/test.py

diff --git a/test.py b/test.py
new file mode 100644
index 0000000..ac43bc5
--- /dev/null
+++ b/test.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+
+"""
+根据现普康票据理赔自动化最小化实现
+功能清单
+https://liubiren.feishu.cn/docx/WFjTdBpzroUjQvxxrNIcKvGnneh?from=from_copylink
+"""
+
+from decimal import Decimal, ROUND_HALF_UP
+
+bill_amount = Decimal("1.2223").quantize(
+    Decimal("0.00"),
+    rounding=ROUND_HALF_UP,
+)
+
+result = {"票据金额": bill_amount}
+
+print(result)
diff --git a/票据理赔自动化/main.py b/票据理赔自动化/main.py
index 1c62903..ccd9216 100644
--- a/票据理赔自动化/main.py
+++ b/票据理赔自动化/main.py
@@ -26,6 +26,7 @@ from zen import ZenDecision, ZenEngine
 from utils.client import Authenticator, HTTPClient
 
 
+
 # from utils.ocr import fuzzy_match
 
 
@@ -439,9 +440,9 @@ if __name__ == "__main__":
             case (1, _):
                 image_type = "医疗费用清单"
             case (5, _):
-                image_type = "门诊收费票据"
+                image_type = "医疗门诊收费票据"
             case (3, _):
-                image_type = "住院收费票据"
+                image_type = "医疗住院收费票据"
             case (18, _):
                 image_type = "理赔申请书"
             case _:
@@ -632,221 +633,195 @@ if __name__ == "__main__":
             )
             # 若查验为真票或红票则直接整合至赔案档案
             if response.get("status") == 200 and response.get("code") == 10000:
-                pass
-
-            # 若查验为假票或无法查验
-            else:
-                if response.get("status") == 400 and (response.get("code") == 10100 or response.get("code") == 10001):
-                    receipt["查验结果"] = "假票"
-                else:
-                    receipt["查验结果"] = "无法查验"
-
-
-
-            match response["data"]["productCode"]:
-                case "003082":  # 增值税发票
-                    extraction = {
-                        "票据类型": {
-                            "10108": "数电票",
-                            "10101": "增值税普通发票",
-                            "10100": "增值税专用发票",
-                            "30100": "数电票",
-                            "30101": "数电票",
-                            "30104": "增值税专用发票",
-                            "30105": "数电票",
-                            "10106": "区块链电子发票",
-                            "30109": "数电票",
-                            "30121": "增值税普通发票",
-                            "10102": "增值税普通发票",
-                            "10103": "增值税普通发票",
-                            "10107": "数电票",
-                        }.get(response["data"]["type"], "其它增值税发票"),
-                        "票据号码": response["data"]["details"]["number"],
-                        "票据代码": (
-                            code
-                            if (code := response["data"]["details"]["code"])
-                            else None
-                        ),  # 深圳快瞳票据查验中数电票票据代码为空字符，转为NONE
-                        "开票日期": datetime.strptime(
-                            response["data"]["details"]["date"], "%Y年%m月%d日"
-                        ).strftime(
-                            "%Y-%m-%d"
-                        ),  # 深圳快瞳票据查验中就增值税发票开票日期格式为%Y年%m月%d日，转为%Y-%m-%d
-                        "校验码": response["data"]["details"]["check_code"],
-                        "收款方": response["data"]["details"]["seller"],
-                        "付款方": response["data"]["details"]["buyer"],
-                        "票据金额": format(
-                            Decimal(response["data"]["details"]["total"]).quantize(
-                                Decimal("0.00"),
-                                rounding=ROUND_HALF_UP,
+                # noinspection PyTypeChecker
+                match response["data"]["productCode"]:
+                    # 增值税发票，目前深圳快瞳支持全电和全电纸质增值税发票查验
+                    case "003082":
+                        # noinspection PyTypeChecker
+                        receipt.update({
+                            "查验状态": (
+                                "真票"
+                                if response["data"]["details"]["invoiceTypeNo"] == "0"
+                                else "红票"
                             ),
-                            ".2f",
-                        ),
-                        "查验状态": (
-                            "真票"
-                            if response["data"]["details"]["invoiceTypeNo"] == "0"
-                            else "红票"
-                        ),
-                        "备注": (
-                            remark
-                            if (remark := response["data"]["details"]["remark"])
-                            else None
-                        ),  # 深圳快瞳票据查验中增值税发票备注可能为空字符，转为NONE
-                        "项目": [
-                            {
-                                "名称": item["name"],
-                                "规格": (
-                                    specification
-                                    if (specification := item["specification"])
-                                    else None
+                            "票据号码": response["data"]["details"]["number"],
+                            "票据代码": (
+                                response["data"]["details"]["code"]
+                                if response["data"]["details"]["code"]
+                                else None
+                            ),  # 全电发票无发票代码，深圳快瞳票据查验接口中票据代码由空字符转为None
+                            "开票日期": datetime.strptime(
+                                response["data"]["details"]["date"], "%Y年%m月%d日"
+                            ).strftime(
+                                "%Y-%m-%d"
+                            ),  # 深圳快瞳票据查验接口中开票日期由字符串（%Y年%m月%d日）转为日期
+                            "校验码": response["data"]["details"]["check_code"],
+                            "开票金额": Decimal(response["data"]["details"]["total"]).quantize(
+                                    Decimal("0.00"),
+                                    rounding=ROUND_HALF_UP,
+                                ),  # 深圳快瞳票据查验接口中开票金额由字符串转为Decimal，保留两位小数
+                            "姓名": response["data"]["details"]["buyer"],
+                            "购药及就医机构": response["data"]["details"]["seller"],
+                            "备注": (
+                                response["data"]["details"]["remark"]
+                                if response["data"]["details"]["remark"]
+                                else None
+                            ),  # 深圳快瞳票据查验接口中备注由空字符转为None
+                            "明细层": [
+                                {
+                                    "名称": item["name"],
+                                    "规格": (
+                                        item["specification"]
+                                        if item["specification"]
+                                        else None
+                                    ),  # 深圳快瞳票据查验接口中明细规则由空字符转为None
+                                    "单位": (
+                                        item["unit"]
+                                        if item["unit"]
+                                        else None
+                                    ),  # 深圳快瞳票据查验接口中明细单位由空字符转为None
+                                    "数量": (Decimal(item["quantity"]).quantize(
+                                                Decimal("0.00"),
+                                                rounding=ROUND_HALF_UP,
+                                            )
+                                        if item["quantity"]
+                                        else None
+                                    ),  # 深圳快瞳票据查验接口中明细单位由空字符转为None，若非空字符由字符串转为Decimal，保留两位小数
+                                    "金额": (
+                                                Decimal(item["total"])
+                                                + Decimal(item["tax"])
+                                        ).quantize(
+                                            Decimal("0.00"),
+                                            rounding=ROUND_HALF_UP,
+                                        ),  # 深圳快瞳票据查验接口中明细金额税额由字符串转为Decimal，保留两位小数，求和
+                                }
+                                for item in response["data"]["details"].get("items", [])
+                            ],
+                        })
+                    # 门诊/住院收费票据
+                    case "003081":
+                        # noinspection PyTypeChecker
+                        receipt.update({
+                            "查验状态": (
+                                "真票"
+                                if response["data"]["flushedRed"] == "true"
+                                else "红票"
+                            ),
+                            "票据号码": response["data"]["billNumber"],
+                            "票据代码": (
+                                response["data"]["billCode"]
+                                if response["data"]["billCode"]
+                                else None
+                            ),  # 部分地区医疗收费票据无发票代码，深圳快瞳票据查验接口中票据代码由空字符转为None
+                            "开票日期": parse(response["data"][
+                                "invoiceDate"
+                            ]).strftime(
+                            "%Y-%m-%d"
+                        ),  # 深圳快瞳票据查验接口中开票日期由字符串（%Y-%m-%d）转为日期
+                            "校验码": response["data"]["checkCode"],
+                            "票据金额": Decimal(response["data"]["amount"]).quantize(
+                                    Decimal("0.00"),
+                                    rounding=ROUND_HALF_UP,
                                 ),
-                                "单位": unit if (unit := item["unit"]) else None,
-                                "数量": (
-                                    format(
-                                        Decimal(quantity).quantize(
+                            "姓名": response["data"]["payer"],
+                            "购药及就医机构": response["data"]["receivablesInstitution"],
+
+
+
+                            "医保支付": format(
+                                Decimal(
+                                    response["data"].get("medicarePay", "0.00")
+                                ).quantize(
+                                    Decimal("0.00"),
+                                    rounding=ROUND_HALF_UP,
+                                ),
+                                ".2f",
+                            ),
+                            "其它支付": format(
+                                Decimal(
+                                    response["data"].get("otherPayment", "0.00")
+                                ).quantize(
+                                    Decimal("0.00"),
+                                    rounding=ROUND_HALF_UP,
+                                ),
+                                ".2f",
+                            ),
+                            "个人自付": format(
+                                Decimal(
+                                    response["data"].get("personalPay", "0.00")
+                                ).quantize(
+                                    Decimal("0.00"),
+                                    rounding=ROUND_HALF_UP,
+                                ),
+                                ".2f",
+                            ),
+                            "自付一": format(
+                                Decimal(
+                                    response["data"].get("self_pay_one", "0.00")
+                                ).quantize(
+                                    Decimal("0.00"),
+                                    rounding=ROUND_HALF_UP,
+                                ),
+                                ".2f",
+                            ),  # 深圳快瞳票据查验中就部分地区无自付一
+                            "自付二": format(
+                                Decimal(
+                                    response["data"].get("classificationPays", "0.00")
+                                ).quantize(
+                                    Decimal("0.00"),
+                                    rounding=ROUND_HALF_UP,
+                                ),
+                                ".2f",
+                            ),  # 深圳快瞳票据查验中就部分地区无自付二
+                            "个人自费": format(
+                                Decimal(
+                                    response["data"].get("personalExpense", "0.00")
+                                ).quantize(
+                                    Decimal("0.00"),
+                                    rounding=ROUND_HALF_UP,
+                                ),
+                                ".2f",
+                            ),
+                            "住院日期": (
+                                parse(date.split("-")[0]).strftime("%Y-%m-%d")
+                                if (date := response["data"].get("hospitalizationDate"))
+                                else None
+                            ),  # 深圳快瞳票据查验中就收费票据住院日期格式为%Y%m%d-%Y%m%d，即住院日期-出院日期
+                            "出院日期": (
+                                parse(date.split("-")[1]).strftime("%Y-%m-%d")
+                                if date
+                                else None
+                            ),
+                            "医疗机构类型": response["data"]["institutionsType"],
+                            "项目": [
+                                {
+                                    "名称": item["itemName"],
+                                    "规格": item[
+                                        "medical_level"
+                                    ],  # 甲类无自付、乙类有自付、丙类全自付
+                                    "单位": item["unit"],
+                                    "数量": format(
+                                        Decimal(item["number"]).quantize(
                                             Decimal("0.00"),
                                             rounding=ROUND_HALF_UP,
                                         ),
                                         ".2f",
-                                    )
-                                    if (quantity := item["quantity"])
-                                    else None
-                                ),
-                                "金额": format(
-                                    (
-                                        Decimal(item["total"])
-                                        + Decimal(item["tax"])
-                                    ).quantize(
-                                        Decimal("0.00"),
-                                        rounding=ROUND_HALF_UP,
                                     ),
-                                    ".2f",
-                                ),  # 价税合计
-                            }
-                            for item in response["data"]["details"].get("items", [])
-                        ],
-                    }  # 深圳快瞳票据查验中就部分增值税发票仅可查，数据标准化抛出异常
-                    return extraction
+                                    "金额": format(
+                                        Decimal(item["totalAmount"]).quantize(
+                                            Decimal("0.00"),
+                                            rounding=ROUND_HALF_UP,
+                                        ),
+                                        ".2f",
+                                    ),
+                                }
+                                for item in response["data"]["feedetails"]
+                            ],
+                        })
+
+
+                # 若请求深圳快瞳票据查验接口或解析发生异常，则根据影像件类型请求深圳快瞳增值税发票/收费票据识别接口
 
-                case "003081":  # 门诊/住院收费票据
-                    extraction = {
-                        "票据类型": (
-                            "门诊收费票据"
-                            if "门诊" in response["data"]["billName"]
-                            else "住院收费票据"
-                        ),
-                        "票据号码": response["data"]["billNumber"],
-                        "票据代码": response["data"]["billCode"],
-                        "开票日期": response["data"][
-                            "invoiceDate"
-                        ],  # 深圳快瞳票据查验中就收费票据开票日期格式为%Y-%m-%d
-                        "校验码": response["data"]["checkCode"],
-                        "收款方": response["data"]["payeeName"],
-                        "付款方": response["data"]["payer"],
-                        "票据金额": format(
-                            Decimal(response["data"]["amount"]).quantize(
-                                Decimal("0.00"),
-                                rounding=ROUND_HALF_UP,
-                            ),
-                            ".2f",
-                        ),
-                        "查验状态": {"true": "真票", "false": "红票"}[
-                            response["data"]["flushedRed"]
-                        ],
-                        "备注": response["data"].get("remark"),
-                        "医保支付": format(
-                            Decimal(
-                                response["data"].get("medicarePay", "0.00")
-                            ).quantize(
-                                Decimal("0.00"),
-                                rounding=ROUND_HALF_UP,
-                            ),
-                            ".2f",
-                        ),
-                        "其它支付": format(
-                            Decimal(
-                                response["data"].get("otherPayment", "0.00")
-                            ).quantize(
-                                Decimal("0.00"),
-                                rounding=ROUND_HALF_UP,
-                            ),
-                            ".2f",
-                        ),
-                        "个人自付": format(
-                            Decimal(
-                                response["data"].get("personalPay", "0.00")
-                            ).quantize(
-                                Decimal("0.00"),
-                                rounding=ROUND_HALF_UP,
-                            ),
-                            ".2f",
-                        ),
-                        "自付一": format(
-                            Decimal(
-                                response["data"].get("self_pay_one", "0.00")
-                            ).quantize(
-                                Decimal("0.00"),
-                                rounding=ROUND_HALF_UP,
-                            ),
-                            ".2f",
-                        ),  # 深圳快瞳票据查验中就部分地区无自付一
-                        "自付二": format(
-                            Decimal(
-                                response["data"].get("classificationPays", "0.00")
-                            ).quantize(
-                                Decimal("0.00"),
-                                rounding=ROUND_HALF_UP,
-                            ),
-                            ".2f",
-                        ),  # 深圳快瞳票据查验中就部分地区无自付二
-                        "个人自费": format(
-                            Decimal(
-                                response["data"].get("personalExpense", "0.00")
-                            ).quantize(
-                                Decimal("0.00"),
-                                rounding=ROUND_HALF_UP,
-                            ),
-                            ".2f",
-                        ),
-                        "住院日期": (
-                            parse(date.split("-")[0]).strftime("%Y-%m-%d")
-                            if (date := response["data"].get("hospitalizationDate"))
-                            else None
-                        ),  # 深圳快瞳票据查验中就收费票据住院日期格式为%Y%m%d-%Y%m%d，即住院日期-出院日期
-                        "出院日期": (
-                            parse(date.split("-")[1]).strftime("%Y-%m-%d")
-                            if date
-                            else None
-                        ),
-                        "医疗机构类型": response["data"]["institutionsType"],
-                        "项目": [
-                            {
-                                "名称": item["itemName"],
-                                "规格": item[
-                                    "medical_level"
-                                ],  # 甲类无自付、乙类有自付、丙类全自付
-                                "单位": item["unit"],
-                                "数量": format(
-                                    Decimal(item["number"]).quantize(
-                                        Decimal("0.00"),
-                                        rounding=ROUND_HALF_UP,
-                                    ),
-                                    ".2f",
-                                ),
-                                "金额": format(
-                                    Decimal(item["totalAmount"]).quantize(
-                                        Decimal("0.00"),
-                                        rounding=ROUND_HALF_UP,
-                                    ),
-                                    ".2f",
-                                ),
-                            }
-                            for item in response["data"]["feedetails"]
-                        ],
-                    }
-                    return extraction
-            # 若请求深圳快瞳票据查验接口或解析发生异常，则根据影像件类型请求深圳快瞳增值税发票/收费票据识别接口
-            except:
                 # 影像件类型
                 image_type = kwargs.get("image_type", globals()["image_type"])
                 if image_type is None:
@@ -870,11 +845,11 @@ if __name__ == "__main__":
                                 guid=hashlib.md5((url + image_guid).encode("utf-8"))
                                 .hexdigest()
                                 .upper(),
-                            )
-                            # 若深圳快瞳增值税发票识别响应非成功则返回NONE
+                                )
+                            # 若深圳快瞳增值税发票识别响应非成功则返回None
                             if not (
-                                response.get("status") == 200
-                                and response.get("code") == 0
+                                    response.get("status") == 200
+                                    and response.get("code") == 0
                             ):
                                 return None
 
@@ -941,7 +916,7 @@ if __name__ == "__main__":
                                             ),
                                             "金额": format(
                                                 (
-                                                    Decimal(amount) + Decimal(tax)
+                                                        Decimal(amount) + Decimal(tax)
                                                 ).quantize(
                                                     Decimal("0.00"),
                                                     rounding=ROUND_HALF_UP,
@@ -950,55 +925,55 @@ if __name__ == "__main__":
                                             ),
                                         }
                                         for name, specification, unit, quantity, amount, tax in zip(
-                                            [
-                                                component["value"]
-                                                for component in response["data"]
-                                                if re.match(
-                                                    r"^项目名称(\d+)?$",
-                                                    component["desc"],
-                                                )
-                                            ],
-                                            [
-                                                component["value"]
-                                                for component in response["data"]
-                                                if re.match(
-                                                    r"^规格型号(\d+)?$",
-                                                    component["desc"],
-                                                )
-                                            ],
-                                            [
-                                                component["value"]
-                                                for component in response["data"]
-                                                if re.match(
-                                                    r"^单位(\d+)?$",
-                                                    component["desc"],
-                                                )
-                                            ],
-                                            [
-                                                component["value"]
-                                                for component in response["data"]
-                                                if re.match(
-                                                    r"^数量(\d+)?$",
-                                                    component["desc"],
-                                                )
-                                            ],
-                                            [
-                                                component["value"]
-                                                for component in response["data"]
-                                                if re.match(
-                                                    r"^金额(\d+)?$",
-                                                    component["desc"],
-                                                )
-                                            ],
-                                            [
-                                                component["value"]
-                                                for component in response["data"]
-                                                if re.match(
-                                                    r"^税额(\d+)?$",
-                                                    component["desc"],
-                                                )
-                                            ],
+                                        [
+                                            component["value"]
+                                            for component in response["data"]
+                                            if re.match(
+                                            r"^项目名称(\d+)?$",
+                                            component["desc"],
                                         )
+                                        ],
+                                        [
+                                            component["value"]
+                                            for component in response["data"]
+                                            if re.match(
+                                            r"^规格型号(\d+)?$",
+                                            component["desc"],
+                                        )
+                                        ],
+                                        [
+                                            component["value"]
+                                            for component in response["data"]
+                                            if re.match(
+                                            r"^单位(\d+)?$",
+                                            component["desc"],
+                                        )
+                                        ],
+                                        [
+                                            component["value"]
+                                            for component in response["data"]
+                                            if re.match(
+                                            r"^数量(\d+)?$",
+                                            component["desc"],
+                                        )
+                                        ],
+                                        [
+                                            component["value"]
+                                            for component in response["data"]
+                                            if re.match(
+                                            r"^金额(\d+)?$",
+                                            component["desc"],
+                                        )
+                                        ],
+                                        [
+                                            component["value"]
+                                            for component in response["data"]
+                                            if re.match(
+                                            r"^税额(\d+)?$",
+                                            component["desc"],
+                                        )
+                                        ],
+                                    )
                                     ]
                                     if invoice_type == "电子发票(普通发票)"
                                     else [
@@ -1024,31 +999,41 @@ if __name__ == "__main__":
                                                 component["value"]
                                                 for component in response["data"]
                                                 if re.match(
-                                                    r"^项目名称明细(\d+)?$",
-                                                    component["desc"],
-                                                )
+                                                r"^项目名称明细(\d+)?$",
+                                                component["desc"],
+                                            )
                                             ],
                                             [
                                                 component["value"]
                                                 for component in response["data"]
                                                 if re.match(
-                                                    r"^项目数量明细(\d+)?$",
-                                                    component["desc"],
-                                                )
+                                                r"^项目数量明细(\d+)?$",
+                                                component["desc"],
+                                            )
                                             ],
                                             [
                                                 component["value"]
                                                 for component in response["data"]
                                                 if re.match(
-                                                    r"^项目金额明细(\d+)?$",
-                                                    component["desc"],
-                                                )
+                                                r"^项目金额明细(\d+)?$",
+                                                component["desc"],
+                                            )
                                             ],
                                         )
                                     ]
                                 ),
                                 "查验状态": "无法查验",
                             }
+
+            # 若查验为假票或无法查验
+            else:
+                if response.get("status") == 400 and (response.get("code") == 10100 or response.get("code") == 10001):
+                    receipt["查验结果"] = "假票"
+                else:
+                    receipt["查验结果"] = "无法查验"
+
+
+
                             return extraction
                         except:
                             return None
@@ -1307,7 +1292,7 @@ if __name__ == "__main__":
                 # 银行卡识别并整合至赔案档案
                 bankcard_recognize(image_guid, image_format, image_base64)
             # TODO: 暂仅支持增值税发票识别和购药及就医类型为药店购药整合至赔案档案，后续逐步添加
-            case "增值税发票" | "门诊收费票据" | "住院收费票据":
+            case "增值税发票" | "医疗门诊收费票据" | "医疗住院收费票据":
                 # 票据识别并整合至赔案档案
                 receipt_recognize(image_guid, image_format, image_base64, image_type)
 
@@ -1373,7 +1358,6 @@ if __name__ == "__main__":
                 image_index, image_guid, image_format, image_base64, insurance_branch, image_type
             )
 
-
 """
 
 
diff --git a/票据理赔自动化/test.py b/票据理赔自动化/test.py
deleted file mode 100644
index bf03dd2..0000000
--- a/票据理赔自动化/test.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import json
-import re
-from csv import DictReader, DictWriter
-from pathlib import Path
-from typing import List, Dict
-
-import torch
-from transformers import BertTokenizerFast, BertForTokenClassification
-
-
-# 命名实体识别
-class NER:
-    def __init__(self):
-        # 实体标签映射
-        self.label_map = {
-            0: "O",  # 非药品命名实体
-            1: "B-DRUG",  # 药品命名实体-开始
-            2: "I-DRUG",  # 药品命名实体-中间
-        }
-
-        # 加载预训练分词器
-        self.tokenizer = BertTokenizerFast.from_pretrained(
-            pretrained_model_name_or_path=Path("./models/bert-base-chinese").resolve()
-        )
-
-        # 加载预训练模型
-        self.model = BertForTokenClassification.from_pretrained(
-            pretrained_model_name_or_path=Path("./models/bert-base-chinese").resolve(),
-        )
-
-        # 设置模型为预测模式
-        self.model.eval()
-
-    def recognize_drugs(self, text: str) -> List[Dict]:
-        """识别药品命名实体"""
-
-        if not text.strip():
-            return []
-
-        # 分词编码
-        inputs = self.tokenizer(
-            text,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            return_offsets_mapping=True,
-        )
-
-        # TOKEN于文本中起止位置
-        offset_mapping = inputs.pop("offset_mapping")[0].cpu().numpy()
-
-        with torch.no_grad():
-            # 模型预测
-            outputs = self.model(**inputs)
-            # 获取TOKEN预测标签
-            predictions = torch.argmax(outputs.logits, dim=2)
-
-        entities = []
-        current_entity = None
-
-        # 遍历所有TOKEN、预测标签索引和起止索引
-        for token, offset, label_id in zip(
-            self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]),
-            offset_mapping,
-            predictions[0].cpu().numpy(),
-        ):
-            print(label_id)
-            continue
-
-            # 映射TOKEN标签
-            label = self.label_map.get(label_id, "O")
-
-            # 若遇到特殊TOKEN则跳过
-            if (
-                token in ["[CLS]", "[SEP]", "[PAD]"]
-                or offset[0] == 0
-                and offset[1] == 0
-            ):
-                continue
-
-            if label == "B-DRUG":
-                if current_entity:
-                    self._combine_tokens(current_entity, text)
-                    entities.append(current_entity)
-
-                current_entity = {
-                    "start": offset[0],
-                    "end": offset[1],
-                    "tokens": [token],
-                    "offsets": [offset],
-                    "type": label,
-                }
-
-            elif label == "I-DRUG":
-                if current_entity:
-                    if offset[0] == current_entity["end"]:
-                        current_entity["end"] = offset[1]
-                        current_entity["tokens"].append(token)
-                        current_entity["offsets"].append(offset)
-                    else:
-                        self._combine_tokens(current_entity, text)
-                        entities.append(current_entity)
-                        current_entity = {
-                            "start": offset[0],
-                            "end": offset[1],
-                            "tokens": [token],
-                            "offsets": [offset],
-                            "type": label,
-                        }
-
-            else:
-                if current_entity:
-                    self._combine_tokens(current_entity, text)
-                    entities.append(current_entity)
-                    current_entity = None
-
-        if current_entity:
-            self._combine_tokens(current_entity, text)
-            entities.append(current_entity)
-
-        return entities
-
-    @staticmethod
-    def _combine_tokens(current_entity: Dict, text: str):
-        """合并TOKEN"""
-
-        # 从文本中提取命名实体文本
-        current_entity["text"] = text[current_entity["start"] : current_entity["end"]]
-
-
-"""
-
-# 使用示例（需要训练好的模型）
-dl_ner = NER()
-text = "患者需要硫酸吗啡缓释片治疗癌症疼痛"
-entities = dl_ner.recognize_drugs(text)
-print(entities)
-
-exit()
-
-"""
-
-
-def drug_extraction(text) -> tuple[str, str | None]:
-    """药品数据提取"""
-
-    # 正则匹配两个“*”之间内容作为药品类别，第二个“*”之后内容作为药品名称。
-    if match := re.match(
-        pattern=r"\*(?P<drug_type>.*?)\*(?P<drug_name>.*)",
-        string=(text := text.strip()),
-    ):
-        # 药品类别
-        drug_type = match.group("drug_type").strip()
-
-        # 药品名称
-        drug_name = (
-            match.group("drug_name")
-            .upper()  # 小写转大写
-            .replace("(", " ")
-            .replace(")", " ")
-            .replace("（", " ")
-            .replace("）", " ")
-            .replace("[", " ")
-            .replace("]", " ")
-            .replace("【", " ")
-            .replace("】", " ")
-            .replace(":", " ")
-            .replace("：", " ")
-            .replace(",", " ")
-            .replace("，", " ")
-            .replace("·", " ")
-            .replace("`", " ")
-            .replace("@", " ")
-            .replace("#", " ")
-            .replace("*", " ")
-            .replace("/", " ")  # 就指定符号替换为空格
-            .strip()
-        )
-
-        # 就药品名称中多个空格替换为一个空格
-        drug_name = re.sub(pattern=r"\s+", repl=" ", string=drug_name)
-
-        for section in drug_name.split(" "):
-            print(section)
-
-    # 若匹配失败则药品类型默认为文本、药品名称默认为None
-    else:
-        drug_type, drug_name = text, None
-
-    return drug_type, drug_name
-
-
-dataframe = []
-
-# 就票据查验结果和疾病对应关系进行数据清洗（暂仅考虑增值税发票且为真票）
-with open("票据查验结果和疾病对应关系.csv", "r", encoding="utf-8") as file:
-    for row in DictReader(file):
-        try:
-            disease = row["疾病"]
-
-            response = json.loads(row["票据查验结果"])
-
-            # 遍历项目
-            for item in response["data"]["details"]["items"]:
-
-                name = item["name"]
-
-                drug_extraction(name)
-
-                exit()
-
-        except Exception as e:
-            print(e)
-            exit()
-
-with open("1.csv", "w", newline="", encoding="utf-8") as file:
-    writer = DictWriter(file, fieldnames=dataframe[0].keys())
-    writer.writeheader()
-    writer.writerows(dataframe)