From ef3699973af3e6775955c645fac109355f2a40df Mon Sep 17 00:00:00 2001 From: liubiren Date: Tue, 16 Dec 2025 22:18:59 +0800 Subject: [PATCH] =?UTF-8?q?=E6=97=A5=E5=B8=B8=E6=9B=B4=E6=96=B0=20from=20N?= =?UTF-8?q?UC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- utils/caches.db | Bin 372736 -> 372736 bytes utils/client.py | 35 ++- 票据理赔自动化/SQLite.db | Bin 85553152 -> 85553152 bytes 票据理赔自动化/main.py | 608 ++++++++++++++++++--------------------- 4 files changed, 318 insertions(+), 325 deletions(-) diff --git a/utils/caches.db b/utils/caches.db index d2ef0d028430d0198db2a754c4667e78ed222048..d9b11439e081b753930be25e000513b3862328a0 100644 GIT binary patch delta 446 zcmZp8AlC3eY=SheD+2>VG!V-GvGzm_b5>Udy)KcBDGT_SHF@7mW;Y08e6u-GU=lAc z4>LQXwe+gyye?m=E>#1U;;dw%YR8RPTu|-CcwLS`|rDq%!cO1)9VZwC74W%CvW^K!enSR zeQ!LY38V4kKY!)fAWquYc!qHbBWp9O!**5&My3K*EufDSxASaZ{2|Y*$t5zK{Q{#S z6PL(#j|+^aI8{}ceHon`%}v}K&0U?`oJKuN>&O=28NbNMrLW2 zMyaN0=4OeBrin>F$|x<_FwG*##K>T}d_1G#WWJ|79A>5#7N#af#?#~XGK)@MYrvR3 RdEV1^j4az{YcQp<0{~0bkYfM< delta 481 zcmZp8AlC3eY=She3j+f~G!V-GvCc#db5<7yy|T`YDGT_SHF$qcW)}$DoG8%EJ8^>8 z_Mdwh*_kFOun>|6(%iudgR6Nj@lNDz;VtLQ;*I4E;C1FT1$*+4jkcTdeW ztk;LiEMZpJ{7nzaS*puze$4_Hu5*CCTPl`1G1D9-!;_?T+ziSQUgGHQ?WzS-*AG(1 zz&h`#>E@rZ>`YA#+Z-6#3s|**QY_nfHZcB>XV&1doX&oM(Q&)i1;$gH6DO#*Ti#*> VVkRJF-fnq|<@C|*EDB72_yGoLjaL8w diff --git a/utils/client.py b/utils/client.py index 5e3baa7..803f11b 100644 --- a/utils/client.py +++ b/utils/client.py @@ -15,7 +15,7 @@ from email.utils import parsedate_to_datetime from functools import wraps from imaplib import IMAP4_SSL from pathlib import Path -from typing import Any, Callable, Dict, Generator, Literal, Optional, Tuple, Union +from typing import Any, Callable, Dict, Generator, List, Literal, Optional, Tuple, Union from urllib.parse import quote_plus from xml.etree import ElementTree @@ -188,6 +188,39 @@ class SQLiteClient: self.threads.connection.rollback() raise RuntimeError("为当前线程查询并获取单行数据发生异常") from exception + def _query_all( + self, sql: str, parameters: Tuple[Any, ...] = () + ) -> List[Dict[str, Any]]: + """ + 为当前线程查询并获取多行数据 + :param sql: 查询SQL语句 + :param parameters: SQL参数 + :return: 多行数据 + """ + # noinspection PyBroadException + try: + # 为当前线程创建数据库连接和游标 + self._connect() + # 检查当前线程无数据库游标,若无则抛出异常 + if not hasattr(self.threads, "cursor") or self.threads.cursor is None: + raise RuntimeError("为当前线程创建数据库游标发生异常") + + # 为当前线程执行SQL + self.threads.cursor.execute(sql, parameters) + result = [] + while batch := self.threads.cursor.fetchmany(1000): + result.extend([dict(row) for row in batch]) + return result + # 若发生异常则回滚事务并抛出异常 + except Exception as exception: + # 检查当前线程有数据库连接,若有则回滚 + if ( + hasattr(self.threads, "connection") + and self.threads.connection is not None + ): + self.threads.connection.rollback() + raise RuntimeError("为当前线程查询并获取单行数据发生异常") from exception + def _execute(self, sql: str, parameters: Tuple[Any, ...] = ()) -> bool: """ 为当前线程执行SQL diff --git a/票据理赔自动化/SQLite.db b/票据理赔自动化/SQLite.db index efad18e8a7deb5ce63860c8376d9102e2180e935..38c69df8e18dcb644b3ab3b729500511fa1f39f0 100644 GIT binary patch delta 8523 zcmeI$cT^SEx4?0@(geBK5Nq6a?0`K)WACx|hA6106e%XC31DO^7C^B#)YvQb0*V@A z?=hO#Vj>sO6gAPc4%?BgBe;WgAF*f-$s zTPxW#>(Zi0w-(Z^Nz?W%=PX75gZ!e9hYn@ z;`rKrzYyO+-hpukjD-tkwgh^Q^bGd#_X`^46A-u0=oB~2Xr1qs>j+KnB`ot;U(cXm z??8{7qf56Iy}F5?f3Wz4j2xMH;9!4WUvIy`-h2gnczFhUr$;SZXZzO zJnpXt8byW%`iBH$PHkv*bD@_m%BeXZMxO%t@_mTiJNYW}=h3yZO@6Z_JCZti;GU zf!X+~$nwe8X2}dc2__b5|HY)vKp1TbGi&C}q!{zwIjhjGS?1_Q7|?4e!vg zn{?>jzJ2Cx$ZmVXAAgP*w&<32B=mQWAIE+MH-wJmsGwcy-4A?EAlrAdHL7j=&VN z^VX7x+((dklV8U0uOmp=6`c~cD=~ZWO_5t~ZB4wJG}SaUGIh%O)P?H_&NO4{?RgVS zn-||rnshsInkiz(&G3j@lXj$TnnsgpL#%1V*1Jhj{}}YE5B1B4`3?IoqF!s7HjAnH z$01)``$GR5GslQo%gCRtfBma^^^{j8=^saZ^NRGg3{B7fH-2)Cnw=uKOwyM@yq2pu zENgRs|L2$>80zgbCGxCkzF!?L#pm)VpaTtduWf_uySzxgGU z+u$NgX4L|&>1%$Zku7(3LsZ7rYn4I&?4>^E%9_3VwPK*`TMjFmRR`p3{{J!{mu0}f z|Lrm$?xDTOJU;GgSvh&S z8_YV;ucv!@zjfZ&@7p@8WLMw9%Bfui&a6;0{UEBvkc|4z`LsR#n>$o6bC_cuv8a4< zZ1^J!3!C(c_kmSdL@#q=>8h^99Qd-ZXyDn`G&44JR*Y#{v}to>YQpriJ?l~u_wsJ9 z<>pe?#m%)rz1l9WwOn0X+!|!Q+FWvf!q8wxbD!?5!)m!TaCLQaajR3y&52I4KCUUd zHkcOdy&bnBZRynbb9cs=JI=Yc`@WR}n>4XMbCHxmtWsH}ib_?LYAV%LYN*syaZzzqaZ{7>$GrHe{em2N8CReGrOROzMCTcwXmUzL6;{Z$62c&H3i@l+Y4GFZh+#am^F z%21VIDn2U1RYs_cRPj~uQ}I^`P#L8Xs1l?StP-L!TIEBPk5occ#;A-{8K?5G$|ove zD&Z;-D&ti?Rhgg?siGl?5sbRTil%R*6?xqOw$FnaXD>%T-pWtW;U0vRY-0%376mD(h7?sBBc(q>`Yr zS!IjLR+Vik+f{a`>{QvMvRh@3N}@`V%3hUyD#sGLMU#VPFxuo*7$~P*PRj#O9Rk@~eUFBPq8!F$a ze6Mm-B}K)glB#k`B~9hF${m#-RDM+XN#$piyDIlo?yLNw@~g@Nm4_<7sXS77tnx(V zsme2z-&LNgyioaLvY1DUISj}H3s}Mm*04cd*uoC^U=Jf4kRJt55QR_}jwk{r6h$!< zM+uZfDZGQyNFTop%Ay>~qXH_T5}Z*PRZtbxP#rZ;6E1Lt8)~68>Yy&_p*|YmUAW^t zG(;o3kH%<%rf7!dXn~e!h1O_;wrGd;_y8Ty5uMN(UCcO{6TQ$Ieb5*E&>sWf zfr0SEAPj~VyfFkrF$_K!ju99MU--cv0T_iq1R)q97>y6{5kfHrV=)dN;}e7-91$3g zPcZ?JP)x)mL}4;u5~bH@J)|xQc7Ij&E@T-{E`QL<&qu#Vw@aHtygD z{D`0MGw$LZ?&BByiU)Xz-|z^J@dQut48P+!Uf>V&$ztH~hXHwD0ZUlH8aBuaTi78V z>|ulh@}mF>q7Vwh5k=sHq9}&qD1nkFg?CUI>Eo9{S(HP0R6s>kf-@?k3aX+Ss-p&K z!Ue8yLoL)s9n?iV)JFrn3wOMShG>NM(HKq86wS~aEzlCJ&>C&f7VXd;AD{y|q7yo! z3%a5kx}yhrq8ECj5Bj1X`eOh*Fc6*?gu(EFH-=y+hQSBJ4I5>IA$D=&aRv)ZbBA$e z4m-~U)EsU8N-_4t(lNqd?b(+?EbqT5h1_12Lb)%*qW6z5MDHI7U-%jJ?e{lWFUf1< z|D`8!JURcs07DfEtBOffj)vsMUIB(fc2?#Nt<4--Cr3~|7Ro3et6GOe)G^POSw8*` zi^n(AhHohi)5b5QB28Twb$ecH{58tNytyb7H--loB(H5oA^d+m`3V7r=!8)SL=b`z zg3U!V!V-_!JWm3B^QALKG%r3Z`Njq7j2wOvenwVJ2o_Hs)Y1 z=3zb-U?CP^G2*cVOR)@}VL4V{C01cI)?h8xVLdirBQ_xco3RC3u?^d?13R$`yRip} zNWxz1Lo)W`01o1F9Kshkj3YRTV>pfzIEhm@jWallb2yJLaRFc9A}--;e1prlf~&ZO z>-ZKo@EyL#O{BntRNO)uZsQJqz>oL|KjSX$;XZ!BuXuom_zjQn7*FsN&+t2*;|2aO zOisuXonQ_F^1uR?u!1#gkQcVFLq6EU2nXax0Te_b6ow;;zzIcB48>6bB~c3Rpfm(! zP!{D-9u-g#mEerZsDi4fhU%z+ns9+D+)xX(Q3rKV5B1Ri@4_ALp&=UKeKbZBG(|Hs zM+>w>E3`%%v_(6##|P+uj_8EW=z^~3hVJNrp6G?%=!3rKhyEA<4-AAS24OI~;Ef>| zied1jZE7@r^v;fTO^e2NK(gkmBlAqtZ* z1yeB%(TG7Treg-;FcY&d8*?xh^DrL^un>!|81YzwrC5f~upBF}605KpYp@pUupS$* z5u1>J&Desi*oN)cft}ce-PnUfBw;W1AsPE|00;3o4&e(N#t|IFF&xJUoWv=d#u=Q& zIh@CrxPY&45tr~azQJW&!Bt$tb$p8(_zvIWCQ@KRDsCYSw{ZtQ;79y~pK%xWa38ZpO5aDgk_Pz$wD2X#>o z_0a(D!X59SAsXR*G)5CNMKd%<3$#Qlv_>1WMLV>|2k3x~=!DMbg0AR>?&yJ@=!M?s zgTCm8{ulrc41^~JVKBVljUgC{Ver9ljKE0v!Vmrkz$gSF2*C)!Xncr|5Q;Gvi*fiE zpCAn3h`@M!iV28>Vj?CX3X?GfQ!x$Eh(Ro-V+P_d6SFWIb1)b4FdqxB5R0%F@mPYT zSccEA94oLAtFRhtuommE9viR`n~;Fb*n+LthV9sao!Eul*n>nQVK4R}8T)Yn2k|)$ z;R_tb5gf%a9LEWq#3`J{8JxvAoX3~AfUj^7m+&>d!DU>*Rb0b$e2W|S4&UP@QeZ+V zZXpe~aR)!(NBo4JaToV+AHU#NJitTzhDUgeCwPiy_#Myj0)JRcPO#+hhXHwD0ZUlH j8aBuaTi78V>|ulh@}mF>q7Vwh5k=r+xiO)r<^KNw?c!rZ delta 9141 zcma*rb&yo|y7loM!x=O<#+&#GK zUF*E{R^40opL?plp9OPfx_9rryU%b2-Mr@?geIg-a>C=OHO1rc#rAmoM?D@-EX$cM z`3X<1-lGzRU-SegeCtX0Hqn9bi{a7TUbF~G9n-92f}qrKHcd#prC3<9!QDe*rySe~ zYi;j_R6W$AX-Y{Qec%rnCAHox7Y?`-S=N>)V_3YKNU6;;1nzm}&v}dQT zemt$`R?4hdg9ZF_{yy?;Up!p15tn=(PyGG5vxUbK z_s8MUiE+Bq4;z%);|WPS`B1SLJ7wd@A`>3aAuR`9-CWN@0~EDn(U(RVk)Y zT&09cNtIG6rB%wPlvOFGQeLHkN=21QDwS2Ls8m&{rczy{hDuG9S}L_w>ZsIJsi*Rr zN_~|EDh*YBS81fuSfz=|A1X~%nyEBbX`#|mrIku+l{PAERsK|Ir_x@fgGxu0PAZ*M zx~Ozj>88?MrH4vSm0l{nRr;v>rP5cWpGtp~0V)Gk{#FU73{n}aGDKyl$}pATDylL< zWu(d|mC-6=RK}`|Q;AR+uQEZ!RGdnr%0!h(Dw9>Fs7zIvrZQb+hRRHpSt?N~(JHf5 z=BUh7nWr*eWr4~{Z#PvR~zZ%0ZPwDu-2$s2o)}rgB{6gvv>kQ!1xb z&ZwMKIj3@7<$}sZl}jrBs9aXLqHxubGd<(|rYl?N&hRUWB4 zR(YcGROMflXDZKCUZ}iOd8P7N<&DZ)m3J!dRX(VERQaUxS>=n$SCwxn-$zSOgan~r z6oNug7>b2rqc|uoiihH(1Sla2M~P5klmsP3$xw3i14@BXqEsj~N`um(bSOQ_fPO@L z{){LS%8at0tjLRe$d9t2>?jAyiGD)4P;Qh5Y%!)9{LT{M-5Oz^gC*V z8lxuY57ZPjL(NeO)DpEqtx+4)7X69Zq4uZ)>WDg_&ZrCOin^ihs0ZqadZFH^5BdxB zMg35JGyn}mf1>~zga)G_Xeb(nh9gBI&`2~2jYeb8STqhrpz&w|GUQMsnusQ$$!H3i zil(9IXa<^zW}zq)jb@`cXfB$E=A#8@AzFkMqa|o5T85UR6=)?|g;t|AXf0ZY)}sw* zBie*Eqb+DF+J?5H9cU+tLA%gyvK6-#2qDSa4dV-#!f6+7a9KAp< z(JS;Cy+Lo$JM|2{*-a{sUnTRpNhvV7-NfdYTyA1^Ob!>> zACuihR>EX+k*P2~=UQU2I+q)h**W~q<gYiqL@2VRd z3v<~G`VDi*4a$tUXy3bG&f9k!sPBw@Pk=dXfo~14r|g>_bHcvj?D>w^*Rq&H_BA=? zpnbtD;X7bo@?iGaXWSCLz4jT`+qc_3XTikSrvaFq_NgvryM4mx@@=zEL71)f@psH7 z`-tn~+h`y73>)l28O&Py5WqF`t+DrwFsto-4$LZh*Bi6K-c`UXvv(;mOYH5Rn8o&% zuW+Hg#V`E6`SylSIM?3rCyy^CeCvU2~ii??SFI!_mH+6ccGLnqtge{Dc{A&+%^nUxYocgc)nkQ(#8hv$mK~_UsqTNPEWfJi`91 zhZ%1F@;nc-r#$+h_LL`Rh&|;A8e~uUU;_4}24%6Y%)@+hcyUuRZ1q z?PHI4E_&J{-15He_7Jz1ubVwAjOl6*`GlSA0bh0pd%!1bZ})p*+Sz@c(zbR#38szR zJsW_Gsey`_Vr{$i z7pA7&;$zpaTWK-X>?ZG|s&$s+d3_yT)fKXxBn8`Ryv+alX8EH7h2MUFnR;WmkC7bJ~?)Ob)xu zJ3pIU=2Q6XpSBpU{gW4y)h_XZX0c0r>`Znk9Fx&5*1}}4i#+=Db^#v-zI1khw{z-% zUEuSkvhzIaDeOGYMKU|jH(nAu$Hxx0bEPl|>>RI8d^_6=6UWZ-evWNtc<0BmGrZcN zc80Go*iQ4JdF=GB_~+<%I~@=6%}(K+=KW%)a6@=M+eutB??(%q#F2VG*hzl%y`AWf zd21(dz}`1@f?s@P$8ke>U)piph~DRRJU!-_9qWa8YRCAUkL(x@#{1Zg;Szcu+EIM6 zc^}wOKjxkt>4v#$M{oyuZ`l!?L+=ecjN|oQv%>*C+EqJ*1NL6AL;Rl0c8CXj$qw>> z&)Pvg%qcs7tL8m!2k^<{J!S_IV2;{;ytlnaY`+(C*!J;257|E47v6)m4-8lK)owhqOW}EHe0dKKge5TE|ix0EGVt9n> zEe4muyUt>gW7gWvPM9^e6Zf2Vwe7@3^RBWTzhPF`4tyqfm)mxHpn8|tc3z4lwmlAJ zk!|C*EVON2y#KrlY%5>dJll%zNfu&TaYK0L+7_OnXxoCj%o}B!TVZC|=De60wrL<{ zx^1eAnP!_(csy~s*+#z5$+nS~BGNYCjpKE;ftS~80}p1rt;gHJ8)55tCCAu$yu$t46v$gn4@D8!H_#W?ITf-mx+t%>W2HG0F`hK<=H=?(%t#d)X>p`<}K6?;meBTgjv7YAbm~I@?MdsJEl7XoKlsD|og8?QJ>FP+MDG9@EB_ zr^B?iW&C1GTgET8uw_Xw&24E@OjBE$8`Id9@Y*%9CA^dkZ3z#wfi1=djklgH##_`| z+ZOW%u4Rj3VQSbS-YnH@5$~s}wvdNbDPRkEY!z)`IHrOvXpAXi3$kNM+XB9(k~W_= zVsV?#=PhdUV`GZgyjqw-Hjme*fX&6Hk2k-~EriKub9tHb*c@Kw+&0IH$z`*7XgO^* zPf&K7%@gFeiqX8;d=|}Dk;S6)T>t+HW*y8k{%(GPj1$5LoP1>!VGtd`PjqUV!W;WL)>EA!~Vf;5r1%yTa*_QaEm;c zzum%`n1OB~E{1=ATYz`Izn@!xcfP-`Tfo!xmz$6GjK7bY&!gz==Hp`cd%1Z$v>q-n zk7u@0#)pxTBVCuP9eCRrEW&n4Xzm}Vc_m{t> zn}K(dzlNJp5>wsHNQ9~4rZ>k_cGGc3`zyNXVVLr68g6rc88^naBiG3da+BO5x5*uHm)s-w$pi9` zJR*IlA5F;X-PVgo@5|D5+NB$ zCX$(CAz6u+_=ulmBiTs~l9T*Ia*^C556Me@CizHyQh*dBzmP(tFeyTcl3z(NQk;|^ zB}pk#nv@}BNjXxUR3H^eB~qDGAyr8=Qk~QwHAyW}o75q7Nj>r#sZSb^hU9nBh%_cm z$RDIBX-1lp7NjLN z{$v0dNd6`P@}K)}F#j}!3?;+JaH3=c8A(Qw(PRu6OU981GM-EzMjVMG6UihpnM@&5 z$uu&Z%pfz#ED}Yc$!s!*%q8>4e6oNnB#X#mvV<%p%gA!Ff~+K~$ZE2NtR?Hnda{9R zB%8=)vW09V+sJmZgX||~AK6b1kb~q9IZTd_qvRMlPEL@M1yClAO&@`yYpPsmg9FL_3ulNaPA zc|~56H{>mON8XbU674#3TtxN|KS}0aB3sLJE<>qzEZWekH|7aZ-YmB&A4cQihZz16lL2HP`I`jDf9}7*{L>IJ zlnf)oiINdyBpF3UlQCp08Al?>crt+)aU_yVB$LQwGKEYf)5vr(gUlqeNEC@Cv&kGX zm&_yc$pW&FEFz1^60(#mBg@GOvXZPKtH~O&maHS|$p*5KY$BV<7P6IWBiqRivXjJ+ zU1T@eL-vw=WIs7T4w6IUFgZewl4ImJIYCa6Q{*%`L(Y+A99&oAy>&Y za-G~DH_0t>o7^FH$vtwPJRlFrBl4I$Ay3J_ACKLn9r5<)^r7>Pw dict | None: - """通用数据提取""" - - # 影像件全局唯一标识:优先使用关键词变量,其次使用全局变量,再次使用随机唯一标识 - image_guid = kwargs.get( - "image_guid", globals().get("image_guid", uuid.uuid4().hex.upper()) - ) - - # 影像件格式 - image_format = kwargs.get("image_format", globals()["image_format"]) - if image_format is None: - raise RuntimeError("请入参:image_format") - - # 影像件BASE64编码 - image_base64 = kwargs.get("image_base64", globals()["image_base64"]) - if image_base64 is None: - raise RuntimeError("请入参:image_base64") - - # 请求深圳快瞳通用文本识别接口 - response = globals()["http_client"].post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/general"), - headers={"X-RequestId-Header": image_guid}, - data={ - "token": globals()["authenticator"].get_token(servicer="szkt"), - "imgBase64": f"data:image/{image_format};base64,{image_base64}", - }, - guid=hashlib.md5((url + image_guid).encode("utf-8")).hexdigest().upper(), - ) - # 若响应非成功,则返回NONE - if not (response.get("status") == 200 and response.get("code") == 0): - return None - - # 基于空间坐标法就识别结果中文本框进行分行排序 - texts = [] - # 重构文本框列表 - for text in response["data"]: - texts.append( - [ - # 文本框左上角的X坐标 - numpy.float64(text["itemPolygon"]["x"]), - # 文本框左上角的Y坐标 - numpy.float64(text["itemPolygon"]["y"]), - # 文本框的高度 - numpy.float64( - text["itemPolygon"]["height"] - ), # 深圳快瞳基于文本框的Y坐标最大值和最小值的差值 - text["value"], - ] - ) - # 按照文本框Y坐标升序(使用空间坐标算法,从上到下,从左到右) - texts.sort(key=lambda x: x[1]) - rows = [] - # 分行 - for index, text in enumerate(texts[1:]): - # 若为第一行则初始化当前行 - if index == 0: - row = [texts[0]] - continue - # 若文本框的Y坐标与当前行中最后一个文本框的Y坐标差值小于阈值,则归为同一行 - # noinspection PyUnboundLocalVariable - # noinspection PyTypeChecker - if text[1] - row[-1][1] < numpy.mean([x[2] for x in row]) * 0.5: - row.append(text) - # 否则结束当前行、初始化当前行 - else: - rows.append(row) - row = [text] - # 添加最后一行 - rows.append(row) - extraction = [] - # 按照文本框X坐标升序 - for row in rows: - extraction.extend( - [x[3].replace(" ", "") for x in sorted(row, key=lambda x: x[0])] - ) - # 以空格拼接 - extraction = " ".join(extraction) - - # 根据理赔申请书匹配提示词 - match application_form := kwargs.get( - "application_form", globals().get("application_form") - ): - case "中行员工福利保障计划索赔申请书": - prompt = f""" - 指令:你是一个从OCR文本中智能提取信息并生成JSON的工具,请严格按照要求执行。 - - 输入:OCR文本(可能包含错漏): - {extraction} - - 输出要求: - 1、只输出可被Python中json.loads()解析的JSON格式字符串,不包含任何代码块标记、说明文字等其它非JSON格式内容 - 2、无法确定的值设置为`null`(不是"null"字符串) - - JSON结构: - {{ - "基础信息": {{ - "申请人": "字符串或null", - "性别": "字符串或null", - "年龄": "字符串或null", - "手机": "字符串或null", - "身份证号": "字符串或null", - "开户银行": "字符串或null", - "户名": "字符串或null", - "账号": "字符串或null", - }}, - "票据表格": [ - {{ - "就诊序号": "字符串或null", - "发票日期": "YYYY-MM-DD或null", - "发票上的就诊医院/药店": "字符串或null", - "票据张数": "字符串或null", - "票据金额": "字符串或null", - "诊断": "字符串或null" - }}, - ] - }} - - 开始输出: - """ - case _: - raise RuntimeError(f"理赔申请书{application_form}未设置处理方法") - - # 请求大语言模型创建对话接口 - response = globals()["http_client"].post( - url="https://api.siliconflow.cn/v1/chat/completions", - headers={ - "Authorization": "Bearer sk-xsnuwirjjphhfdbvznfdfjqlinfdlrnlxuhkbbqynfnbhiqz", # 基于硅基流动 - "Content-Type": "application/json; charset=utf-8", - }, - json={ - "model": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", # 通过从DeepSeek-R1-0528模型蒸馏思维链接至Qwen3-8B-Base获得的模型 - "messages": [{"role": "user", "content": prompt}], - "max_tokens": 10240, # 生成文本最大令牌数 - "temperature": 0.2, - "top_p": 0.5, - "top_k": 20, - "frequency_penalty": 0.0, - "thinking_budget": 1, - }, - guid=hashlib.md5(prompt.encode("utf-8")).hexdigest().upper(), - ) - - extraction = ( - json.loads(match.group("json")) - if ( - match := re.search( - r"```json\s*(?P\{.*})\s*```", - response["choices"][0]["message"]["content"], - re.DOTALL, - ) - ) - else None - ) - - return extraction - - -def disease_diagnosis(**kwargs) -> str | None: - """疾病推定""" - - # 赔案档案:优先使用关键词变量,其次使用全局变量 - dossier = kwargs.get("dossier", globals().get("dossier")) - - prompt = f""" - 指令:你是一个医学疾病分类诊断的工具,请严格按照要求执行。 - - 患者信息: - 性别 {gender if (gender := dossier["赔案层"]["申请人信息"].get("性别")) is not None else "未知"}, - 年龄 {age if (age := dossier["赔案层"]["申请人信息"].get("年龄")) is not None else "未知"}, - 近期在药房/医院开具发票中内容 {dossier["赔案层"]["其它信息"]["小项合集"]} - - 输出要求: - 1、患者自述症状在 {dossier["赔案层"]["其它信息"]["自述症状"]} 其中之一 - 2、依据患者信息、自述症状和其提供的发票中内容 {kwargs["items"]} 综合诊断,只输出一个最可能的ICD-11中的疾病分类中亚类目代码对应的中文名称字符串,不包含任何代码块标记、说明文字等 - - 开始输出: - """ - - # 请求大语言模型创建对话接口 - response = globals()["http_client"].post( - url="https://ark.cn-beijing.volces.com/api/v3/chat/completions", - headers={ - "Authorization": "Bearer 2c28ab07-888c-45be-84a2-fc4b2cb5f3f2", # 火山引擎 - "Content-Type": "application/json; charset=utf-8", - }, - json={ - "model": "deepseek-r1-250528", - "messages": [ - {"role": "system", "content": "你是人工智能助手"}, - {"role": "user", "content": prompt}, - ], - "temperature": 0.2, - "top_p": 0.5, - "top_k": 20, - "frequency_penalty": 0.0, - "thinking_budget": 1, - }, - guid=hashlib.md5(prompt.encode("utf-8")).hexdigest().upper(), - ) - - recognition = ( - match.group("text") - if ( - match := re.match( - r"\s*(?P.*)", response["choices"][0]["message"]["content"] - ) - ) - else None - ) - - return recognition - - # ------------------------- # 主逻辑 # ------------------------- @@ -298,57 +83,41 @@ if __name__ == "__main__": --所在市 city TEXT NOT NULL ) - """ + """ ) - # 初始化团单表 + # 初始化在保被保人表(TPA作业系统包括团单、个单和被保人表,此处直接整合为宽表) self._execute( sql=""" - CREATE TABLE IF NOT EXISTS group_policies + CREATE TABLE IF NOT EXISTS insured_person_policies ( - --团单号,一张团单包括多张个单 - group_policy TEXT NOT NULL, + --被保人 + insured_person TEXT NOT NULL, + --被保人的证件类型 + identity_type TEXT NOT NULL, + --被保人的证件号码 + identity_number TEXT NOT NULL, + --与主被保人关系,包括本人和附属(配偶、父母和子女) + relationship TEXT NOT NULL, + --个单号 + person_policy TEXT NOT NULL, + --主被保人 + master_insured_person TEXT NOT NULL, + --保险起期(取个单和团单起期最大值) + commencement_date REAL NOT NULL, + --保险止期(取个单和团单止期最小值) + termination_date REAL NOT NULL, + --团单号 + group_policy TEXT NOT NULL, --投保公司 - insurance_company TEXT NOT NULL, + insurance_company TEXT NOT NULL, --保险分公司 - insurer_company TEXT NOT NULL, - --团单有效起期 - from_date REAL NOT NULL, - --团单有效止期 - to_date REAL NOT NULL, - --联合主键:团单号+投保公司+保险分公司 - PRIMARY KEY (group_policy, insurance_company, insurer_company) + insurer_company TEXT NOT NULL, + --联合主键(投保公司+保险分公司+被保人+被保人的证件类型+被保人的证件号码) + PRIMARY KEY (insurance_company, insurer_company, insured_person, identity_type, + identity_number) ) - """ - ) - - # 初始化个单表 - self._execute( - sql=""" - CREATE TABLE IF NOT EXISTS person_policies - ( - group_policy TEXT NOT NULL, - person_policy TEXT NOT NULL, - from_date REAL NOT NULL, - to_date REAL NOT NULL, - PRIMARY KEY (person_policy, group_policy) - ) - """ - ) - - # 初始化被保人表 - self._execute( - sql=""" - CREATE TABLE IF NOT EXISTS insured_persons - ( - insured_person TEXT NOT NULL, - identity_type TEXT NOT NULL, - identity_number TEXT NOT NULL, - relationship TEXT NOT NULL, - person_policy TEXT NOT NULL, - PRIMARY KEY (person_policy, insured_person, identity_type, identity_number) - ) - """ + """ ) except Exception as exception: @@ -368,7 +137,11 @@ if __name__ == "__main__": with self: # noinspection SqlResolve result = self._query_one( - sql="SELECT institution_type FROM institutions WHERE institution = ?", + sql=""" + SELECT institution_type + FROM institutions + WHERE institution = ? + """, parameters=(institution,), ) return ( @@ -380,10 +153,70 @@ if __name__ == "__main__": ) from exception # noinspection PyShadowingNames - def query_individual_policy( - self, insurer_company: str, certificate_type: str, certificate_number: str + def query_insured_person_records( + self, + insurance_company: str, + insurer_company: str, + insured_person: str, + identity_type: str, + identity_number: str, ) -> Optional[List[Dict[str, Any]]]: - pass + """ + 查询并获取多条被保人记录(例如,若夫妻同在投保公司则互为附加被保人,一方被保人记录包括本人和配偶两条) + :param insurance_company: 投保公司 + :param insurer_company: 保险分公司 + :param insured_person: 被保人 + :param identity_type: 被保人的证件类型 + :param identity_number: 被保人的证件号码 + :return: 被保人记录 + """ + # noinspection PyBroadException + try: + with self: + # noinspection SqlResolve + result = self._query_all( + sql=""" + SELECT insured_person AS "被保人", + relationship AS "与主被保人关系", + person_policy AS "个单号", + master_insured_person AS "主被保人", + commencement_date AS "保险起期", + termination_date AS "保险止期" + FROM insured_person_policies + WHERE insurance_company = ? + AND insurer_company = ? + AND insured_person = ? + AND identity_type = ? + AND identity_number = ? + """, + parameters=( + insurance_company, + insurer_company, + insured_person, + identity_type, + identity_number, + ), + ) + return ( + None + if result == [] + else [ + { + k: ( + datetime.fromtimestamp(v) + if k in ["保险起期", "保险止期"] + else v + ) + for k, v in e.items() + } + for e in result + ] # 将保险起期和保险止期由时间戳转为datetime对象 + ) + + except Exception as exception: + raise RuntimeError( + "查询并获取多条个单和被保人记录发生异常" + ) from exception # 实例化主数据 master_data = MasterData() @@ -605,6 +438,73 @@ if __name__ == "__main__": :return: 空 """ + # noinspection PyShadowingNames + def general_text_recognize(image) -> str: + """ + 通用文本识别 + :param image: 影像件 + :return: 识别文本 + """ + # 请求深圳快瞳通用文本识别接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/general"), + headers={ + "X-RequestId-Header": image["影像件唯一标识"] + }, # 以影像件唯一标识作为请求唯一标识,用于双方联查 + data={ + "token": authenticator.get_token( + servicer="szkt" + ), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["影像件格式"].lstrip(".")};base64,{image["影像件BASE64编码"]}", + }, + guid=md5((url + image["影像件唯一标识"]).encode("utf-8")) + .hexdigest() + .upper(), + ) + # TODO: 若响应非成功则流转至人工处理 + if not (response.get("status") == 200 and response.get("code") == 0): + raise RuntimeError("请求深圳快瞳通用文本识别接口发生异常") + + boxes = [] + for box in response["data"]: + # noinspection PyTypeChecker + boxes.append( + [ + numpy.float64( + box["itemPolygon"]["x"] + ), # 文本标注框左上角的X坐标 + numpy.float64( + box["itemPolygon"]["y"] + ), # 文本标注框左上角的Y坐标 + numpy.float64( + box["itemPolygon"]["height"] + ), # 文本标注框左上角的高度 + box["value"], # 文本标注框的文本 + ] + ) + # 按照文本标注框的Y坐标升序(先从上到下) + boxes.sort(key=lambda x: x[1]) + + rows = [] + for idx, box in enumerate(boxes[1:]): + if idx == 0: + row = [boxes[0]] + continue + # 若文本标注框的Y坐标与当前行的最后一个文本标注框的Y坐标差值小于阈值则归为同一行,否则结束当前行(分行) + if box[1] - row[-1][1] < row[-1][2] * 0.5: + row.append(box) + else: + rows.append(row) + row = [box] + rows.append(row) + + boxes = [] + for row in rows: + boxes.extend( + [re.sub(r"\s", "", x[3]) for x in sorted(row, key=lambda x: x[0])] + ) # 按照文本标注框的X坐标升序(再从左到右)并去除所有空字符 + return "\n".join(boxes) # 整合 + # TODO: 后续添加居民身份证(国徽面)和居民身份证(头像面)合并 # noinspection PyShadowingNames def identity_card_recognize(image, insurance_company, insurer_company) -> None: @@ -631,7 +531,6 @@ if __name__ == "__main__": .hexdigest() .upper(), ) - # 若响应非成功则抛出异常 # TODO: 若响应非成功则流转至人工处理 if not (response.get("status") == 200 and response.get("code") == 0): raise RuntimeError("请求深圳快瞳居民身份证识别接口发生异常") @@ -643,15 +542,14 @@ if __name__ == "__main__": # noinspection PyTypeChecker dossier["出险人层"].update( { - "有效期起": parse( - (period := response["data"]["validDate"].split("-"))[0] - ).strftime( - "%Y-%m-%d" - ), # 就有效期限解析为有效期起和有效期止。其中,若有效期止为长期则默认为9999-12-31 - "有效期止": ( - datetime(9999, 12, 31).strftime("%Y-%m-%d") + "有效起期": datetime.strptime( + (period := response["data"]["validDate"].split("-"))[0], + "%Y.%m.%d", + ), # 就有效期限解析为有效起期和有效止期。其中,若有效止期为长期则默认为9999-12-31 + "有效止期": ( + datetime(9999, 12, 31) if period[1] == "长期" - else parse(period[1]).strftime("%Y-%m-%d") + else datetime.strptime(period[1], "%Y.%m.%d") ), } ) @@ -663,9 +561,9 @@ if __name__ == "__main__": # noinspection PyTypeChecker dossier["出险人层"].update( { - "姓名": response["data"]["name"], - "证件类型": "居民身份证", - "证件号码": response["data"]["idNo"], + "姓名": (insured_person := response["data"]["name"]), + "证件类型": (identity_type := "居民身份证"), + "证件号码": (indentity_number := response["data"]["idNo"]), "性别": response["data"]["sex"], "出生": datetime.strptime( response["data"]["birthday"], "%Y-%m-%d" @@ -681,63 +579,83 @@ if __name__ == "__main__": } ) - # 查询 - print(dossier["报案层"]) - print(1) - exit() + # 查询并获取多条被保人记录 + insured_person_records = master_data.query_insured_person_records( + insurance_company, + insurer_company, + insured_person, # 出险人和被保人为同一人,视角不同:出险人为理赔,被保人为承保/保全 + identity_type, + indentity_number, + ) + # TODO: 若查询并获取多条被保人记录发生异常则流转至项目运营岗人工处理 + if insured_person_records is None: + raise RuntimeError("查询并获取多条被保人记录发生异常") + dossier["被保人层"] = insured_person_records # noinspection PyShadowingNames - def bank_card_recognize(image_guid, image_format, image_base64) -> None: + def application_recognize(image, insurer_company) -> None: """ - 银行卡识别并整合至赔案档案 - :param image_guid: 影像件唯一标识 - :param image_format: 影像件格式 - :param image_base64: 影像件BASE64编码 + 理赔申请书识别并整合至赔案档案 + :param image: 影像件 + :param insurer_company: 保险分公司 :return: 空 """ - # 请求深圳快瞳居民身份证识别接口 - response = http_client.post( - url=(url := "https://ai.inspirvision.cn/s/api/ocr/bankCard"), - headers={"X-RequestId-Header": image_guid}, - data={ - "token": authenticator.get_token( - servicer="szkt" - ), # 获取深圳快瞳访问令牌 - "imgBase64": f"data:image/{image_format.lstrip(".")};base64,{image_base64}", - }, - guid=md5((url + image_guid).encode("utf-8")).hexdigest().upper(), - ) - # 若响应非成功或银行卡类型非借记卡则抛出异常 - # TODO: 若响应非成功则流转至人工处理 - if not ( - response.get("status") == 200 - and response.get("code") == 0 - and response.get("data", {}).get("bankCardType") == 1 - ): - raise RuntimeError( - "请求深圳快瞳居民身份证识别接口发生异常或已识别非借记卡" + + # noinspection PyShadowingNames + def boc_application_recognize(image: str) -> str: + """ + 中银保险有限公司-理赔申请书识别并整合至赔案档案 + :param image: 影像件 + :return: 空 + """ + # 方法1:先使用深圳快瞳通用文本识别再使用硅基流动中大语言模型结构化,可行但是需要请求二次 + # 方法2:使用硅基流动中支持OCR的大语言模型 + # 请求硅基流动的大语言模型接口 + response = http_client.post( + url="https://api.siliconflow.cn/v1/chat/completions", + headers={ + "Authorization": "Bearer sk-xsnuwirjjphhfdbvznfdfjqlinfdlrnlxuhkbbqynfnbhiqz", + "Content-Type": "application/json; charset=utf-8", + }, + json={ + "model": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", # 通过从DeepSeek-R1-0528模型蒸馏思维链接至Qwen3-8B-Base获得的模型 + "messages": [{"role": "user", "content": ""}], + "max_tokens": 10240, # 生成文本最大令牌数 + "temperature": 0.2, + "top_p": 0.5, + "top_k": 20, + "frequency_penalty": 0.0, + "thinking_budget": 1, + }, + guid=md5(prompt.encode("utf-8")).hexdigest().upper(), ) - # noinspection PyTypeChecker - dossier["受益人层"].update( - { - "开户行": response["data"]["bankInfo"], - "户名": None, - "户号": response["data"]["cardNo"].replace(" ", ""), - } - ) + recognition = ( + json.loads(match.group("json")) + if ( + match := re.search( + r"```json\s*(?P\{.*})\s*```", + response["choices"][0]["message"]["content"], + re.DOTALL, + ) + ) + else None + ) + + print(recognition) + + exit() + + # 根据保险分公司匹配结构化识别文本方法 + match insurer_company: + case _ if insurer_company.startswith("中银保险有限公司"): + boc_application_recognize(image) # noinspection PyShadowingNames - def receipt_recognize( - image_index, image_guid, image_format, image_base64, image_type - ) -> None: + def receipt_recognize(image) -> None: """ 票据识别并整合至赔案档案 - :param image_index: 影像件编号 - :param image_guid: 影像件唯一标识 - :param image_format: 影像件格式 - :param image_base64: 影像件BASE64编码 - :param image_type: 影像件类型 + :param image: 影像件 :return: 空 """ # 初始化票据数据 @@ -1425,6 +1343,44 @@ if __name__ == "__main__": except: return None + # noinspection PyShadowingNames + def bank_card_recognize(image) -> None: + """ + 银行卡识别并整合至赔案档案 + :param image: 影像件 + :return: 空 + """ + # 请求深圳快瞳银行卡识别接口 + response = http_client.post( + url=(url := "https://ai.inspirvision.cn/s/api/ocr/bankCard"), + headers={"X-RequestId-Header": image["影像件唯一标识"]}, + data={ + "token": authenticator.get_token( + servicer="szkt" + ), # 获取深圳快瞳访问令牌 + "imgBase64": f"data:image/{image["影像件格式"].lstrip(".")};base64,{image["影像件BASE64编码"]}", + }, + guid=md5((url + image["影像件唯一标识"]).encode("utf-8")) + .hexdigest() + .upper(), + ) + # TODO: 若响应非成功则流转至人工处理 + if not ( + response.get("status") == 200 + and response.get("code") == 0 + and response.get("data", {}).get("bankCardType") + == 1 # # 实际作业亦仅支持借记卡 + ): + raise RuntimeError("请求深圳快瞳银行卡识别接口发生异常或非借记卡") + # noinspection PyTypeChecker + dossier["受益人层"].update( + { + "开户行": response["data"]["bankInfo"], + "户名": None, + "户号": response["data"]["cardNo"].replace(" ", ""), + } + ) + # 影像件识别使能检查,若影像件不识别则跳过 if not recognition_enable.evaluate( { @@ -1447,13 +1403,16 @@ if __name__ == "__main__": # TODO: 后续添加居民户口簿识别和整合方法 case "中国港澳台地区及境外护照": raise RuntimeError("暂不支持中国港澳台地区及境外护照") - case "银行卡": - # 银行卡识别并整合至赔案档案 - bank_card_recognize(image_guid, image_format, image_base64) # TODO: 暂仅支持增值税发票识别且购药及就医类型为药店购药整合至赔案档案,后续逐步添加 + case "理赔申请书": + application_recognize(image, insurer_company) case "增值税发票" | "医疗门诊收费票据" | "医疗住院收费票据": # 票据识别并整合至赔案档案 - receipt_recognize(image_guid, image_format, image_base64, image_type) + # receipt_recognize(image) + pass + case "银行卡": + # 银行卡识别并整合至赔案档案 + bank_card_recognize(image) # 遍历工作目录中赔案目录并创建赔案档案(模拟自动化域就待自动化任务创建理赔档案) for case_path in [x for x in directory_path.iterdir() if x.is_dir()]: @@ -1473,6 +1432,7 @@ if __name__ == "__main__": }, "影像件层": [], "出险人层": {}, + "被保人层": [], "受益人层": {}, }