diff --git a/utils/caches.db b/utils/caches.db index d9b1143..bab1921 100644 Binary files a/utils/caches.db and b/utils/caches.db differ diff --git a/票据理赔自动化/SQLite.db b/票据理赔自动化/database.db similarity index 100% rename from 票据理赔自动化/SQLite.db rename to 票据理赔自动化/database.db diff --git a/票据理赔自动化/main.py b/票据理赔自动化/main.py index 278be05..6292d63 100644 --- a/票据理赔自动化/main.py +++ b/票据理赔自动化/main.py @@ -55,7 +55,7 @@ if __name__ == "__main__": return ZenEngine({"loader": loader}).get_decision(rule_path.as_posix()) # 影像件识别使能 - recognition_enable = rule_engine(Path("rules/影像件识别使能.json")) + recognize_enable = rule_engine(Path("rules/影像件识别使能.json")) class MasterData(SQLiteClient): """主数据""" @@ -65,7 +65,7 @@ if __name__ == "__main__": 初始化主数据 """ # 初始化SQLite客户端 - super().__init__(database="SQLite.db") + super().__init__(database="database.db") try: with self: @@ -465,45 +465,42 @@ if __name__ == "__main__": if not (response.get("status") == 200 and response.get("code") == 0): raise RuntimeError("请求深圳快瞳通用文本识别接口发生异常") - boxes = [] - for box in response["data"]: + blocks = [] + for block in response["data"]: # noinspection PyTypeChecker - boxes.append( + blocks.append( [ - numpy.float64( - box["itemPolygon"]["x"] - ), # 文本标注框左上角的X坐标 - numpy.float64( - box["itemPolygon"]["y"] - ), # 文本标注框左上角的Y坐标 - numpy.float64( - box["itemPolygon"]["height"] - ), # 文本标注框左上角的高度 - box["value"], # 文本标注框的文本 + int(block["itemPolygon"]["x"]), # 文本块左上角的X坐标 + int(block["itemPolygon"]["y"]), # 文本块左上角的Y坐标 + int(block["itemPolygon"]["height"]), # 文本块左上角的高度 + block["value"], # 文本块的文本内容 ] ) - # 按照文本标注框的Y坐标升序(先从上到下) - boxes.sort(key=lambda x: x[1]) + # 使用俄罗斯方块方法整理文本块,先按照文本块的Y坐标升序(从上到下) + blocks.sort(key=lambda x: x[1]) - rows = [] - for idx, box in enumerate(boxes[1:]): + lines = [] + for idx, block in enumerate(blocks[1:]): if idx == 0: - row = [boxes[0]] + line = [blocks[0]] continue - # 若文本标注框的Y坐标与当前行的最后一个文本标注框的Y坐标差值小于阈值则归为同一行,否则结束当前行(分行) - if box[1] - row[-1][1] < row[-1][2] * 0.5: - row.append(box) + # 若当前文本块的Y坐标和当前文本行的平均Y坐标差值小于阈值则归为同一文本行,否则另起一文本行(分行) + if ( + block[1] - numpy.array([e[1] for e in line]).mean() + < numpy.array([e[2] for e in line]).mean() + ): + line.append(block) else: - rows.append(row) - row = [box] - rows.append(row) + lines.append(line) + line = [block] + lines.append(line) - boxes = [] - for row in rows: - boxes.extend( - [re.sub(r"\s", "", x[3]) for x in sorted(row, key=lambda x: x[0])] - ) # 按照文本标注框的X坐标升序(再从左到右)并去除所有空字符 - return "\n".join(boxes) # 整合 + blocks = [] + for line in lines: + blocks.extend( + [re.sub(r"\s", "", x[3]) for x in sorted(line, key=lambda x: x[0])] + ) # 按照文本块的X坐标升序(从左到右)并去除文本块的文本内容中所有空字符 + return "\n".join(blocks) # TODO: 后续添加居民身份证(国徽面)和居民身份证(头像面)合并 # noinspection PyShadowingNames @@ -608,9 +605,8 @@ if __name__ == "__main__": :param image: 影像件 :return: 空 """ - # 方法1:先使用深圳快瞳通用文本识别再使用硅基流动中大语言模型结构化,可行但是需要请求二次 - # 方法2:使用硅基流动中支持OCR的大语言模型 # 请求硅基流动的大语言模型接口 + # noinspection PyTypeChecker response = http_client.post( url="https://api.siliconflow.cn/v1/chat/completions", headers={ @@ -618,18 +614,41 @@ if __name__ == "__main__": "Content-Type": "application/json; charset=utf-8", }, json={ - "model": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", # 通过从DeepSeek-R1-0528模型蒸馏思维链接至Qwen3-8B-Base获得的模型 - "messages": [{"role": "user", "content": ""}], - "max_tokens": 10240, # 生成文本最大令牌数 - "temperature": 0.2, - "top_p": 0.5, - "top_k": 20, - "frequency_penalty": 0.0, - "thinking_budget": 1, + "model": (model := "THUDM/GLM-4.1V-9B-Thinking"), + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": ( + text := "请以JSON字符串的形式输出识别结果" + ), + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/{image["影像件格式"]};base64,{image["影像件BASE64编码"]}" + }, + }, + ], + } + ], + "max_tokens": 8192, + "temperature": 0.95, + "top_p": 0.7, + "top_k": 2, + "frequency_penalty": 1.1, + "thinking_budget": 8192, }, - guid=md5(prompt.encode("utf-8")).hexdigest().upper(), + guid=md5((model + text + image["影像件唯一标识"]).encode("utf-8")) + .hexdigest() + .upper(), ) + print(response) + exit() + recognition = ( json.loads(match.group("json")) if ( @@ -648,6 +667,7 @@ if __name__ == "__main__": # 根据保险分公司匹配结构化识别文本方法 match insurer_company: + # 中银保险有限公司 case _ if insurer_company.startswith("中银保险有限公司"): boc_application_recognize(image) @@ -1382,12 +1402,12 @@ if __name__ == "__main__": ) # 影像件识别使能检查,若影像件不识别则跳过 - if not recognition_enable.evaluate( + if not recognize_enable.evaluate( { "insurer_company": insurer_company, "image_type": image["影像件类型"], } - )["result"]["recognition_enable"]: + )["result"]["recognize_enable"]: return # 根据影像件类型匹配影像件识别方法 diff --git a/票据理赔自动化/rules/影像件识别使能.json b/票据理赔自动化/rules/影像件识别使能.json index 9b46e0a..0a3d028 100644 --- a/票据理赔自动化/rules/影像件识别使能.json +++ b/票据理赔自动化/rules/影像件识别使能.json @@ -139,7 +139,7 @@ { "id": "a2fc744f-930d-43e0-b5cf-824a5928c7f1", "name": "是否识别", - "field": "recognition_enable" + "field": "recognize_enable" } ], "hitPolicy": "first",