From 16d41d8fae5ce8d12376131242fd16584acf5d52 Mon Sep 17 00:00:00 2001 From: liubiren Date: Wed, 17 Dec 2025 02:14:56 +0800 Subject: [PATCH] =?UTF-8?q?=E6=97=A5=E5=B8=B8=E6=9B=B4=E6=96=B0=20from=20m?= =?UTF-8?q?ac=20mini(m1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- utils/caches.db | Bin 372736 -> 372736 bytes 票据理赔自动化/{SQLite.db => database.db} | Bin 票据理赔自动化/main.py | 110 +++++++++++++--------- 票据理赔自动化/rules/影像件识别使能.json | 2 +- 4 files changed, 66 insertions(+), 46 deletions(-) rename 票据理赔自动化/{SQLite.db => database.db} (100%) diff --git a/utils/caches.db b/utils/caches.db index d9b11439e081b753930be25e000513b3862328a0..bab1921069f9173164dac0e0ea48411989086fae 100644 GIT binary patch delta 20565 zcmdUX3v68HbtWmxc4JGT3#U$EZ|p1D4k@i3GZZO_GGte>typO!rIzBfH5HmBM-m-s zMjZ~FW^MNlB|e7Fp)@2%;zJ}QJ|t6onKU0We6-yx7Qwc4ivkO*+b#@c{(EPD?0SK8 z7YPb1_B;Q%_s$)1D9bGxU?tv}d;jM-=Rg1X&+Cr=vnTwYJ<;~m=5J?YWW0&%-{E@k za_+tdzn!tGZrfB@Z`Lzg9?E!T^RF|q{wYh(`ZVjWvi>Y{Fzo&V^{-t@r3ne^}vKl|G0tr&^g4}J2< zP2LVRNYHnW6trLBCYAd#-4A=BWQG2Z6~S;W`<9Y%=qb zdK|Y;;d&C+@8H^nYd5Z=xQ^f=#AzX4!}Thz@8H^x>wCC!%qHK95Exo~4%A_&BoF{CFid{MVmk?)k$7_^i_vZilXkFIaroYR2 z?a{x3P)wP7 z<-cdHWU9|zY}Lc{IYn;egB|$Ty;~8rv$1HW);pkgkBhcuy{)dueY(h9|BD%ZviwmNzR*Wx50M#P5*5e0S@s+Du zPlMh*9iNyJSDN*)Wib}kyK1!Xkk-?uk2PrF*+h6IwlbhqPZ+rAt&^hXR>I#FoA+xS zGoX~#;KwMKF+NhQb=G4DU`t@(A+|8A_17nET&1Gy*!&nMB0^i@-&A>4)GMDa2K2$@ z*h+7FU`W)~Cc?Mk6OErQHWXz`+-mQL;5-J2Pt=)%fU; z(t08XoqG!R=Iz~6koV9&SseNw-mh;sSQPp%mDwuyG60yU#!~D3)d%VG)Lp(lVYhOK5$FxUzFI1ND$p(^AY08v2ZNfD&~eoU{VC9X;~$E z%ZB=$J6ONi{D)SV$di2!I(H}{c$pYpGQq)|JV##sZl`0nb8il+Ca17Cm^yLo#=)G> zAFDf6l=Aivi~X4PV2;x&;SG~3YA!#?U~j}5FY4xo zB)B5YDR41ZdFjD&?ta`!^R0rbDR2Az!{yku-?0WRt)@@ZU4e*@lxj!=(VjFA73|)( zJ8zGKXHyEH{%X(G`v%DA*t08dEkt^_UGE%ch_I#8K$O1@M9xBj==qeTLrHw)TB5#L zwDy64X!l-P!iETKD%kquOA1Ow_p;tlj~1YSF&|eSk{g9t?cnM;i?YEl1x48)L{YZ0 zkVG1hxO%F(S1VKCgw7?4F zqix>&0}3-Cn$u{xm`1W6o^upJSE2=hV-1WAaM#~Mjn%vUUY?sa#nlPX28q?D_cZD4 zw;+2V*JI1wsN#(U3M98o14Ag5=k)eI(SMOPVao);X3et-0jh(9gfxz=jB8^zA$AK7 z*w_l2L!aqSAw^dW28+$pT#(?{Q#gA70znTx`Z}Qhe6ib_4^50ppq8wRMcTQ)QF-Y9 zJf~u3&&%EcZTgm06Vz`)a-g7vKTJ5O4vlzUs%?tq)1uXG}ons3)N{Dm-1T$Nm zf&YE<7A@5&y-g~7L1*K50ena-ob}Qnty?0j+b+{-@vp*O+Z&=*|7G zbg}s+$yRR`x!;5~B2j`EF3Q$bxj6FP4!z?+qnEYpyryp$ke`z?`lHI^+IYt$Rk)*XaY} zyiVrqw=^{{M@2mEl8Ug^okZ{8j6M}*lYwlcIc#9kl*M3|!L=-*rA;BnfIS3qkSeb# z+SNKSHHD*)+Dc^>w&yd|+CZIleO63$V7*|0OvNIt`eG!ufTJK3oA1&iBg%=FUsuH3 zc%uE9)<2N=uo?Q8Xza$R)31#UYTBYDc2J zKG9tVGKjjC_|hG%5ne69n^sdFx+ z(b&OFd;S<&3xFyz9pv?~jx-9ml~1|g^_+24R+f9*B^9dT@_Ieq zou%hor9UcnpH+MwPer9tTH8d6T$ax}ip!i1Xvz_2SK^yhcJ z`bKk#`(&)a8kRg7!A4!Rb(77ywJQB z0EwB00dW#Ka0MIz&Pi{9eT`vWv7&|+baL319~6f!X&o)JQqF&VyoidWc?g`iH|tQm zwvafi?_h0B?M^BbFqfpe2@o;+F@*2iXMCR~P~W#>Z#=BR{e8yV0^XXNx!Ic7vd~cC zG|mW9%Zx4eYIgdEYL#|Oa%KLi%2m|kMg>+Qlb8h0 zUb9rXOPRHyw-`PN5Ig&dBDegKP*y{onz+|7Q~y?M@k1$*~89dNQa3-k8wE7)TWCM6C;4|`b2 z*(qJBhFap>rG#Ewcur+WgwsLP-V#&Sp?fh@MeaPf03aSCk=RlXR>m$#%qDwwDeybP zCWlsIxi?{C!(tb0m$4>#XBdJQrh(Zb-|V3^tY}TOBGmd9+u+$CIB+Y&@%W;!T!|dYnj*MEA^K52MxgY5HCCqxrwE}m`yx#_CVZS&rK|$AnQZ_Q#}158c1D;8WcF? z%#s-LXlEBa_q84K$FblXPV(l zLWSwMFdX?ElX|oU78pb;X+KNw@dRZzI06(&7*!$Ai_6Po2q}q7vF|4rTt-;Qf*nCI zHs|#28EjmwnZyPe8ZzodLz34A6gZ$5BiWCZ14>HVfSYfzoiAu$L=R+L@_6Z(L={~6 zwqib5WEcT;MjS!b+hYl+GJ*-PxD@iYF0Ul61jt16kHjZ0ksGUXK^q95XOSD?4pJ_z zHVr{{C2I`EXir0Yq%J;UsFXNrputmotM!)lag&&Y?bSh+XMH5TayhnWXdq}N^>1lw z!MckrH(5>?9Dz8qun4f=xQYz3`3M>9HNiyhb)m}Bf{iYyzz}BSVsXFq)?4pXdLSPy zetwSEh$4k=LO6=*6>PH8Mlg$_Xb5Z}h_=`T{*e2sMl|;0$i=aNIGH}u0=Iy*wqSxX zPifV8ms|CnD)&`j@#VnSe%R$`Nq(I%>?#(NFi>H|h|5>?;h@$CKXz}TtB3uLB;M~V zNy{D#>)i-XEySWXv6V<1H{1pv#is^H zvIR!Pcvozp0h%UO7Xun#oAVjpDq&FUhGEMSx~k%q6)#2Qbv_+uydD9LLIS-;PqOaEe=p zw*%A|urc&J;$-RQ(PrQ}iRh6JwVQsu`?8qm6O%#6WN0a|C5rUNCqv9LFDTo$mst?J z4&LO6j?zqPMu}IGF9`EtBN2BlT_NqVc3Pi~#wTVF_<=jD}BIRo)2bAoD@L$*-FINNe@Ag~ zu_NW5EJ*o417~ruN)7RCeHHUl>X#!=wy=H5=RNdE{~T%VWO=@a4eqrkkQ z?|VJT`n~qn_gk{2&pyKcp8p*C?pck>`)IL${V`Ooa@?cI{kB+t5@UqIG%eILW$PC? z(haeG^E5RMM4LZQl0uFTfb{JAZV9C05s&w%&vpLPk3C1-v=>fzA#dRaJZUihi8IG| z+q_X;S?=RagX&70HHv*Axp&siqtd=fndqF?g&p;8FxBdrj(DoPN7diROmO9{gOd8M zBj0MAPL@LueHq%R_qk4MCUU*&a(}t0($1u=Wr@{Oy;^?u98RDwgH_t8*qTVnaNM0n zxOKY%#gj~mo@%{g2wr@$FxZta8;H*=;OszHk~KLxSr7q$k}M+$@sS!4xWEFnzF5p&*?pcjG)RFR3_%bGZ^K2sDFNT)vX>ib_N4GgLch!e~y% zvuue7!67fYoORJ5{)(%z)LVXmI-m5>7N$<>0mMJychT>(vziJN;~&>ZYXYv1P7XfB z$Cn99Qp#7W‹%4+K*I%@5R0bJE?4Zya7`yIV3R{|d4N76zq@bPF3BX9$%IG0cF zphZBk#nsleQxiP$0FO_MiQteVvw`EG(o#>A+jru}ZkIQS5g|mAAV)=}2x9(wD6?u> zZI;$qrm%_Tg+y%&=?)aJ#JmWK(44P%Dj)_)`{9XyfH(^&PA4z8N=bQxwrzt5CJOWg z)|*l^rr5kXCc2lHKA4o1B?QJmLw01v7MftX5 zfB@7jZW$S&g5Cr}p9eBt?7i1Y++_&Tc@MeEPL_&yLoZDVt`-B*{Yfi@{wGc<}w&qPJ3gmT}vcXlBk=&~8k_ z$Txy;3`Ok_%wHm%WK0;SaDB+YatWtoKuS_<1%d2=n`mY@u{r42{$<)w;Q{*mFq3v8BdR-=z1f>%;ZDXR;$E zl%OHilHehL;hUCOzOngMq4q`e0Y5(jI6w_nW%nR#Y{M^$d|41zF}!MBG=jpmZ9F)k zVC6($NI_H#;pdse(g2)iG6(i{AFYo9E|HE@gThEZAVElL1*=8{3n$vgk&Z+mpv54q zgc(l(0I*lyq;wXz|3;e>%u6OX3V2Sw=Shun`08%=ku2tp7Ra`~GHG{*vO~&3Rr=t8@;%Y(iQM;sFp(?4P2QbDudgMmOuCiO&KT=7^HMF@B;qW zwMWLj4IU(auhD`#R-wBRzMdiPL-xQ6;l=nMsLgNttCN+1&NF;|~r;$_R?hH!{#xt}FM% z7UZZahkvYrDAfSTi{nHC;oD945i+JkcTPIR0>fF2>=XzpDrAofBy(hfq%_i;q&plrj4H`}nI8dM zEkPWcA52PZ`KkeJCMlg?z+P*`dC&xOED42OKoH*gP)B~&m1avifr=<6O`x_G9lhhS;O6Neu) zklTnHUu^+N1Y^gvUID>W43g|FTQcHGt_h7PmK6+yfCD-8tJ^94G%4}Zirt+rnmzoB zU;;XtQpEBLB#kkD3yEWhf}nzzH(szwV>Cz+#n;J`pHtw>K_hvFn9`Ulq+1k6MUEkq z?PnQfOF&2<8xPB8Lqn+2Z)X=@Lee@**A5wK_t!9`9XP|>!-SBOKD^AFZ)fE3;Q)`Oym4Y*d0O~^Q58Fb z3{goIK+B7dSQb`{4#lK1UTA0+eMfX$EM$3P<{i4Yk=IwglielE7J&bRaE979w{d54WEwu>utfAcP z99aF=ev!9`EbqVDY5PMjxk+SI=x%44Kjm(xdIcY=uYQynSGykouD;e{-;-QXW8r4j zl%&$NW9n|lN6KB5K2k0&H>y*=+Sg8#I%o>h+*g&F3g#fKD~^dJvK?ZPI(^)aoIWE8 zjJ!=5o0KO;6dnr_!<=on(v6c&p}?cU0a;Qg>g%yj68cS9ADhrT>N^(fwU8&)8suyTly zs`gY#1z9ad2p4(NY=*&2glrjEv({B7i`fYUBI)B{G2VbfhKqYIA!^Clkw!r>?*^)5 zE8fbOO{n1+`M%wPX-I$rqIlP!9x5SRF65uN-bRMATo1<9!jnZll1vF7BRE}@ZHA`d zo8rK<%nwRNgHvAY{GCSV>EscF!wG>~8G0_to^@>4_`COM`X>9AyDQ;M@X81s*=eS? z39;~`DSYjA?m91tZZk&$H1rB^1Sv-<7&Z!L6mJ{w3}U`O)mM~VRq66ltPr!kCreO9 zG5Hf`4w=Dd35X>|VF{8+f{{tiaw-K-a+{x(nKZiSAVs|8rD#9B&1pZx3^st9`{+eWjXi?HxuHoK8y$gPR8bbOYGSl7rbx@8>?*(*lFpn>t%X#@+o4X!c9C z{c?BkY?#xg`{v=()eyLA%abMdeMk74)1(fX?rC+~wa@ogl58c{pQlO|n$PJ$xL4_V zW2{?F{BOtqH+^s6pPcy1->t?jIReot^=Z}f+VxM@WmPB1eD`0Lz9pAh`17j%OV`$p zN2q4s7U{w7wY3{w0-nZqA4zv-oZ z=-aH$<1J)_6%D0+9MN8QKcub>y71Qm~xBd0syszRzKTzX=D9>3>- za=iM)o>#U?w2!OxTTM{mNZpkY5UmMGlQ*Fc!^I38Ho+DK+_&%#!Gyt&kpxCr+b8b_ zIril%?-;`7ALC3-^83oS(s*Rd&iXy~ARx_G&kXZHdL&sJHFQ;e`-+n?W!R{i6J=%m zLN#8dM(((F4Zn&(KUsoN4I}B|f`rL<6HC6#%E2KRF^OtQk4GnhN%mt)j3EP_@OSCa zdQd?cWzdhIC6di;M!%samw8l~bm==K^KGI07#gT-hX5=qk`%=@vocegqz-6%B2uy? z$y1tBul|PoTiw9yXnylB_xF@+YacZU$eKG&31Dc~c1Z%4yfa%ROV+ugK~ZIWq`fEa zZ`z7vxiO9$h^DeX-}{M|n)#^#^%&&9PJf4zDzbK675F8=2|4}Z~*ZoQXX zUIw_b@fOwFdHW0AE-BrYf5uf<;Bw^ebCi~q6%?Y-ndb;UQ=GXqv_)*%wYQ+qv43wt z0gyY7*MTdj_o>&u|H`r5-~R5go%y?*Z|p2Qyz|t#ayPxy8!j1n@F-q9kuOopmlNP# zA`b?B=xJ^!&Cw@M6!xr>^p-D9lesMYSz^kMpEJQDMOFq|v|j1_lnYmbSeF4DSiOEiGMiQacCyLt&b%qch^aNxL~WxOBZ9>*EYeh|6c3743QD1&P_(EhcynW+ zel(zMz@-!qA_gTQU28K?(dMAXnu;KpisT@8$;l6}Rm5om{skU>&ph+I9}Htvf>tGX zyOAapMd_8SlWZAlZ6iq~ely_o-^FH`#AsE)@Aw7FIFCh~#vD#y2Cv`+Okq!!`y)X% z#QNAVc9896TN!7on1?RWs%R*x<1|}s3JK3kScZuHKet^?6cV6WzRhy!$}^+3e+q_U ztzxMZ6oEa8$`t2|v!dLI=_FTikl8!#^urPnr#u+}$+vp9oOn0L~zRq^c= zJlAx2rDCJ00N_Rv`*UG9+E~R6={iXyvPD&fo1ByOR53r=!Pk$LBMqWRqKh$^8 zjQNB%8Ee=x^9Fuof7qb8-}ph_>t9hYZoV`28hv=kSirM5t_REzb?5;5z;0pG{HYH+ z=~O1Ibdel4ApW#n^?SfOsdy)y z^q5x+Ce)NxNVwE?q58>h9I;#l%MJc%sEm3<=8pUFga@>eqLt*2YNFJp?z}!j)MbRj IxdpZI7wns?4FCWD diff --git a/票据理赔自动化/SQLite.db b/票据理赔自动化/database.db similarity index 100% rename from 票据理赔自动化/SQLite.db rename to 票据理赔自动化/database.db diff --git a/票据理赔自动化/main.py b/票据理赔自动化/main.py index 278be05..6292d63 100644 --- a/票据理赔自动化/main.py +++ b/票据理赔自动化/main.py @@ -55,7 +55,7 @@ if __name__ == "__main__": return ZenEngine({"loader": loader}).get_decision(rule_path.as_posix()) # 影像件识别使能 - recognition_enable = rule_engine(Path("rules/影像件识别使能.json")) + recognize_enable = rule_engine(Path("rules/影像件识别使能.json")) class MasterData(SQLiteClient): """主数据""" @@ -65,7 +65,7 @@ if __name__ == "__main__": 初始化主数据 """ # 初始化SQLite客户端 - super().__init__(database="SQLite.db") + super().__init__(database="database.db") try: with self: @@ -465,45 +465,42 @@ if __name__ == "__main__": if not (response.get("status") == 200 and response.get("code") == 0): raise RuntimeError("请求深圳快瞳通用文本识别接口发生异常") - boxes = [] - for box in response["data"]: + blocks = [] + for block in response["data"]: # noinspection PyTypeChecker - boxes.append( + blocks.append( [ - numpy.float64( - box["itemPolygon"]["x"] - ), # 文本标注框左上角的X坐标 - numpy.float64( - box["itemPolygon"]["y"] - ), # 文本标注框左上角的Y坐标 - numpy.float64( - box["itemPolygon"]["height"] - ), # 文本标注框左上角的高度 - box["value"], # 文本标注框的文本 + int(block["itemPolygon"]["x"]), # 文本块左上角的X坐标 + int(block["itemPolygon"]["y"]), # 文本块左上角的Y坐标 + int(block["itemPolygon"]["height"]), # 文本块左上角的高度 + block["value"], # 文本块的文本内容 ] ) - # 按照文本标注框的Y坐标升序(先从上到下) - boxes.sort(key=lambda x: x[1]) + # 使用俄罗斯方块方法整理文本块,先按照文本块的Y坐标升序(从上到下) + blocks.sort(key=lambda x: x[1]) - rows = [] - for idx, box in enumerate(boxes[1:]): + lines = [] + for idx, block in enumerate(blocks[1:]): if idx == 0: - row = [boxes[0]] + line = [blocks[0]] continue - # 若文本标注框的Y坐标与当前行的最后一个文本标注框的Y坐标差值小于阈值则归为同一行,否则结束当前行(分行) - if box[1] - row[-1][1] < row[-1][2] * 0.5: - row.append(box) + # 若当前文本块的Y坐标和当前文本行的平均Y坐标差值小于阈值则归为同一文本行,否则另起一文本行(分行) + if ( + block[1] - numpy.array([e[1] for e in line]).mean() + < numpy.array([e[2] for e in line]).mean() + ): + line.append(block) else: - rows.append(row) - row = [box] - rows.append(row) + lines.append(line) + line = [block] + lines.append(line) - boxes = [] - for row in rows: - boxes.extend( - [re.sub(r"\s", "", x[3]) for x in sorted(row, key=lambda x: x[0])] - ) # 按照文本标注框的X坐标升序(再从左到右)并去除所有空字符 - return "\n".join(boxes) # 整合 + blocks = [] + for line in lines: + blocks.extend( + [re.sub(r"\s", "", x[3]) for x in sorted(line, key=lambda x: x[0])] + ) # 按照文本块的X坐标升序(从左到右)并去除文本块的文本内容中所有空字符 + return "\n".join(blocks) # TODO: 后续添加居民身份证(国徽面)和居民身份证(头像面)合并 # noinspection PyShadowingNames @@ -608,9 +605,8 @@ if __name__ == "__main__": :param image: 影像件 :return: 空 """ - # 方法1:先使用深圳快瞳通用文本识别再使用硅基流动中大语言模型结构化,可行但是需要请求二次 - # 方法2:使用硅基流动中支持OCR的大语言模型 # 请求硅基流动的大语言模型接口 + # noinspection PyTypeChecker response = http_client.post( url="https://api.siliconflow.cn/v1/chat/completions", headers={ @@ -618,18 +614,41 @@ if __name__ == "__main__": "Content-Type": "application/json; charset=utf-8", }, json={ - "model": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", # 通过从DeepSeek-R1-0528模型蒸馏思维链接至Qwen3-8B-Base获得的模型 - "messages": [{"role": "user", "content": ""}], - "max_tokens": 10240, # 生成文本最大令牌数 - "temperature": 0.2, - "top_p": 0.5, - "top_k": 20, - "frequency_penalty": 0.0, - "thinking_budget": 1, + "model": (model := "THUDM/GLM-4.1V-9B-Thinking"), + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": ( + text := "请以JSON字符串的形式输出识别结果" + ), + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/{image["影像件格式"]};base64,{image["影像件BASE64编码"]}" + }, + }, + ], + } + ], + "max_tokens": 8192, + "temperature": 0.95, + "top_p": 0.7, + "top_k": 2, + "frequency_penalty": 1.1, + "thinking_budget": 8192, }, - guid=md5(prompt.encode("utf-8")).hexdigest().upper(), + guid=md5((model + text + image["影像件唯一标识"]).encode("utf-8")) + .hexdigest() + .upper(), ) + print(response) + exit() + recognition = ( json.loads(match.group("json")) if ( @@ -648,6 +667,7 @@ if __name__ == "__main__": # 根据保险分公司匹配结构化识别文本方法 match insurer_company: + # 中银保险有限公司 case _ if insurer_company.startswith("中银保险有限公司"): boc_application_recognize(image) @@ -1382,12 +1402,12 @@ if __name__ == "__main__": ) # 影像件识别使能检查,若影像件不识别则跳过 - if not recognition_enable.evaluate( + if not recognize_enable.evaluate( { "insurer_company": insurer_company, "image_type": image["影像件类型"], } - )["result"]["recognition_enable"]: + )["result"]["recognize_enable"]: return # 根据影像件类型匹配影像件识别方法 diff --git a/票据理赔自动化/rules/影像件识别使能.json b/票据理赔自动化/rules/影像件识别使能.json index 9b46e0a..0a3d028 100644 --- a/票据理赔自动化/rules/影像件识别使能.json +++ b/票据理赔自动化/rules/影像件识别使能.json @@ -139,7 +139,7 @@ { "id": "a2fc744f-930d-43e0-b5cf-824a5928c7f1", "name": "是否识别", - "field": "recognition_enable" + "field": "recognize_enable" } ], "hitPolicy": "first",