From c62b5990f514ed1d67299191afd337aaa3fabb4d Mon Sep 17 00:00:00 2001 From: zxjyes <9093499+zxjyes@user.noreply.gitee.com> Date: Mon, 31 Mar 2025 10:07:03 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96translate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../__pycache__/local_encoder.cpython-310.pyc | Bin 0 -> 3336 bytes .../__pycache__/utils.cpython-310.pyc | Bin 0 -> 5496 bytes Retrieval_new/utils.py | 101 +++++++++++++++--- 3 files changed, 84 insertions(+), 17 deletions(-) create mode 100644 Retrieval_new/__pycache__/local_encoder.cpython-310.pyc create mode 100644 Retrieval_new/__pycache__/utils.cpython-310.pyc diff --git a/Retrieval_new/__pycache__/local_encoder.cpython-310.pyc b/Retrieval_new/__pycache__/local_encoder.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f6da96be6d09fe7f9cbd57e82e2ca78c9f897a80 GIT binary patch literal 3336 zcmaJ@?Q10srxly!h`5fua z?A0L5lG*RN9BKJ3HcXRRwf-P&%o=SfQ%DPE2K&bw8{#MVx3jAf~LND zL04a+V8BMC$ylJj;;|z=yMl&P2TC|XTj_?FeP7O()2~wb(6d0%A3?}p;#x<@_>fj1PB!n)KLf=G&*e2L2l#{~T zM4+zoBsak-#Zw1K-ioq&t49Rqj$2kc2?t?nD+g)*;?(>He~Zt*xiocd;m*Z{=47%nkbZ8>RBKw#{K*RMfVr*&eJ+LnMeVV8CKJ{-qotwkvbw5DFCE$lafM5mmr zlaXZ8Y{B8FG0OHZ;l@FtlOQ#|gOUcopq7SSi|awU&L&{i%M9l{0d@^`oE@XbNInrZ z@Ndu$nBwWn@h5M@r!Rir9UYraAwG9v{+-YIJ9V7k{+4rF{XpjXb4yny7yo`^;r7Hm zuU?v5{PG|1SCgG#VG90W{>I1g2iNCrpA9omTj}X=Eq1FDxz{{>9=sf`d!RYK`uXC` zYyG)_d`6;oqeR6kmjS)H;{wd^0HW?8=~eAfDFRt^T*0GMuncmErxF?J|07+D6)RC{ z?<*yt;2jx4N>4@_ZV4FRi^@=Y5h^|=jQxSd7 z?K^1i-@U8v_pqpg^V+UB)>rXLP!etlAR>E5$0O6W9oGqLy9XxoH$s+3GP;&#;IU{A z&C+$0QS()Hx3*z5l6#j&Mk!yo0n913?hpDpVS%=iwReF{-w7SNDqa2udl5rE6+t2~MDIL%|p;6+R+ll|;F_;;jmk{OJ zNO~kQ(pA^F#nU`_INY8b~2 zh5hdE-{=v9EvC6!GmBrn9iP0`rgWR$D|yhd>=b^80i83*ZB-KaN5IUJIn+!n4Gsb& z;xq5e-TG^M?pl1~{QUIAb|IdAcd>Q7Gd2EEr?U9J*n4&Xv}lE-Juv&Bh>Zq-7|etBds(V zmMA!EIU$S?QV+rFZ-xvGm<(EfVhuQakTljL2#>ef%Mf1_NAfgcfd|X7-d{!ZXhg?8 zrBQ;5F{dPyf!qMoMAi=Ol!B619o@MD=SJ#oCbjJkz`iU3TP$n3-@8Lm%HP?0f3(Ek z9azHgOIU=!Lq{WxJJs+}sDA&1=imJzo|;*@@Lqqe{ln`b%kdr84~oDli93}L$U!Fp zPnL$|&#(cfiZYIG7srZDrFf(wWH+`yrVO1xRQ*EI_3W}N@`$-^!5RWrx1(m1axnr{ zFwFi~*}GvC)F2Uf6+nz!gA=7OLlw*?B&wd=1X-c0Egl?XV>Ap3nYQwJVF0zry6+hI zBOHPfC4Y*Ua&dOG^R_^T-wT6uK-2|U!z+MoN0x2FFNPH?XKefBu&6EvP)pi2_e!?i_Ag4GG%&;KQDi7|%imyw z+Qj_~cOo%(po4r9GLz^ut3R<;wU(fvlaf&C#N3-u3CM_www$$CgVdf1v^uI5sMh;jVFof(D_b4wi%n5 zxjr{@cK*(Vg{w22C_6X%HMA?+g7w0IJ$v`tFYY?@eC_9}yGCDrM|z^#PPA@ZfAfJwy=SNB5jyV$Usv3Y^A)}2ZMJv2* zL|R3-ZK@&56b$ppc6{&d5DC#MDur&4gED}4S?1-hafo7M&k?xF4zJLp71UFxRn*g~ zaP=5~L4FRhgn=~E?=r(=x@OUh)YL5F37TbFFxY=5p9Ujj5stF${F*2OcNf>ki?fiv zy4I+xi~J=R%6_y?;>8F45>#Y-6uyc`ZAzhDN9iL)-zn|(+##nN>H7wrJrJe9HT%_d uN&Xh+QNBTHsr7$|c#*lIP3N6>_UgjYLrjX#I@$v}$ue`d`ItrZ@BRmYUi5qb literal 0 HcmV?d00001 diff --git a/Retrieval_new/__pycache__/utils.cpython-310.pyc b/Retrieval_new/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe13fba42068191960432d20ccbcf84547d3d1ae GIT binary patch literal 5496 zcmaJ_>vI&xwV&5Mv|0&C1_6GtV4SemMgl_|Ec@D_a2!){L@KyK-9gIMvOSU(&Cbd* zvj#S+lnmmHkO&VO5SBy$V+Rm+Qbr&V<$p-!%Y3wUR;v%U_>d2`ib|aOJ3Xt_3hLgK zrlr(YRN5B`FrhUwn|%jS-2t{60tUMrvOT1zE17v>N(P9YgBY< z_N7f0v(w$0k<4l!1bWg|%#L@WTE7c@t(yITmP*BKbekYHZ0m~KzckaS9{WQr)uYF4 zEh;%RI}cd4mimQeS@BNI3BR97#f?%6Od(?0(W4n#x1$x@(PJmmMqJNsetBKoO4!Mi z7PTVlBQLKr;^nHYeXQvf?UW}wYGFDb?{xefX%j0`pRvujVPPqd8nZO><77fh02}2W z)dP68;j!LDkrPG*TO5^`$V6M-tBlEGVy|HPM*U1;a!weNOD%Wb1lXDlZ6-JNF9=!i( z>X`eN!|t6+kEV{7yJnByFWfmXd+UZfc#nvKU*)H-%$~pK4xBHJ5Ab;kC#N8<@Y%RK zbfa+l53^%;FbSG8{PJji;sNUU$S|R!^b@dbLTD= z$H8^kTmATed;gol;E+3bVIA2ql{7k|?(a`wTKD1A;`C?k>5+AM2?kuvQTn(+%yr%S z*8D_Ye&Qs}GJG7i1n?Q4az1Nn!;d|pS$%5ChG$6_Q~yZbj= z*Q@dM*$wO8XtUZM%Wc*x(STDmZ&}TeE!%XeQt@t$(;PqASf*3mlQ9x@hOf$zOd6Qk z^QGu0wzl8?QZQG*+>d|M+?7sg&DpNF(Yb$bGl1RwUQ*YZf3|&R?1ODvn|}qNN$umf z9y7Fk%^5qXTTML&oZ22!>oL;_Krf~lozD?`$Tu|xY9;zpMDtlRpTZh>D2}yXp<)AV zhX`FB-lP(qs3o(?ma?ejTh#KifXBy?7tH(`g1HX!6Stp6UP~D}UblM$SOe%v81&FO z)Avc?{9X6TJNRholSfk{;01Vqf$#c; zzw5gKI0MKteJ5u6PR{fV&GemF-=1BzyXn={@4VTvrtODq>)N7iyW84-+uLr|LY5gu z5tYq4sxPHt85NCGkiEQOC2CGhg$c4aCRt{Y703cI9&UwKQ7KtL zw8fkVYtYV@`h}eIF4;zA@~D&}AE4wE>};iVkuVZO0(av1$BQUBVbb>MHmMhaa?&UQ z&X{OQIq{72kpy;8m0DW?!H{LgP21X+w7a6RBQDgsl8K^Fvi z0GAXlGo?b|zEQbC;r>=fHsS`y#iB_nRc}6zNf19o!+e2ivbhQ`GlIfVI`pS>YOX;~ zD-C%GMTIJ}UH`W#6%I;A>9Mp5?yiAB**m>R^qxIOLQd-&C)1PSmX(Vp+<53&oBGG_fwGBNZeeS-_-1kX)J;d zL5HFehtFPAXd~Ld zQOGXgnUIo`DT^Ef5 zy#Umjt!mr7uIWGCiMF-R{TsgweG?)O(2}3{#>*t!se|ssL`wwGKa{kR27IWI&=x|4 z6X-}9461Xge!4%Q@l?|B>uESR)A4DhnKq+}r>sCv+_p8-a018@6J46+h}x4z2m^~| zJYmPkrFLOC7De*uB9%>3j1nbUVI}GMaf{krgPU(Zj891VG3M zW}s||Dy}!!fyuoR*@7uKu|s0MoYWz8LdU&uhkKQr!2APOEAw;eAGra*jFdAgFxxi(CEy?*&)`4GS^@pXE!ezm(qP&CEcPr-mticf3 zGq7fWteO2;&Nmv$DVQ~w3+}D%t+KvmpFUwX5rX9TKM2l{x6q3d9&w6+%pf|GdByvBgOIl{Nz1P2AphR;Ewy? zEICsCp|o0FL9Sjr^swxty-)Xc9;U)#!IGbxbWe=&)5N6^SHi}oCv{dh-iHj|V~0+P z(|3yxZ{f3`9C!S_``{eO;VG%tJAcG@tto3)sY$;@&w`IREGJ93^W`USG4jeNAC>%AFeYT>L!O=B5Z_do&!r7KXXk zE_h#z8QsI*<|i+BE^1!y#qoRjiR%zqICXsX@E7@s6ZEMFR9_z~9RH#?MyW7YQHHYA zl8$+n4NbKCHjy?c$o1ls!Vku(uZvE^NL~V^u51m{G$a{X_j&{fJG-LGwtK9W=H_@$ zvT44n3Hf05C98Y=CT;U;+K)DE*^}7x#^#M1<9oJj`0?u-H*R>HZP~l&G&7Fqq0Ld)Gn-LlD8=`1=ia>nJVzdj%{-bR zx1W0romsi5sfp)hnbl|(jt#jdM!e&l$2U897I8STuFN0x1`dA(!2Lf1vob}0MvHiz z1an|{Lfk7y-D_VL|9qX``{`)mPv?uL@8N7p$Jg`6DyPj*a^?|=(?@4-o&J*9?I@%=FhY3Q?@K&j>gP0i{_8@TxRIZccd!OG~i`6e1qL_2c{ zXb13EAE2o9DF|;$h(a9xz7P8(NfNz3|38&rgR})fFofQq9F$ied0mTl2+3@%Tq`{< z2BkVN{BH!se<^-0*$KvCsd&+ji`DKSM{gN_(|LG87{{l4s-S4^+j_?QXS( z%BK0NsD%@a8s3}K_QacTF-1AK-+P;{LXKL+dFaZ*q%6p6pn}pUwO)tipv$Du~xgvt?%t{s&J0_fM+8dHvEEspHKM-iYNbGKF1HglueX%N4H(E7)Oao38CZb!4-#H zVMKn~;OFAtTiEIuU3;5$zV#CnK_QHM1%Lh^OcX}O66SB5J!E7vFmOo0d9E(l5DW*a U8XFtygWDB;GHDRQlJfX}0q`DjhX4Qo literal 0 HcmV?d00001 diff --git a/Retrieval_new/utils.py b/Retrieval_new/utils.py index 4fb0213..2c6611e 100644 --- a/Retrieval_new/utils.py +++ b/Retrieval_new/utils.py @@ -13,6 +13,16 @@ import re from langchain.prompts import ChatPromptTemplate from langchain.schema import SystemMessage, HumanMessage from langchain_openai import ChatOpenAI +import logging +from typing import Optional +import time + +# 配置日志 +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger('translation_service') def preprocess_eng(text): ''' @@ -69,7 +79,7 @@ def get_noun_zh(text): noun = ' '.join(nouns) return noun -def has_no_chinese(text): +def all_chinese(text): """ 判断一个文本是否不包含中文字符 @@ -85,28 +95,45 @@ def has_no_chinese(text): '\u2f00' <= char <= '\u2fdf' or \ '\u3100' <= char <= '\u312f' or \ '\u31a0' <= char <= '\u31bf': + flag = 1 + else: return False return True -def extract_List(text): - pattern = r'\[(.*?)\]' - matches = re.findall(pattern,text) +def extract_list(text: str) -> Optional[str]: + """从文本中提取方括号内的内容""" + if not isinstance(text, str): + return None + try: + pattern = r'\[(.*?)\]' + matches = re.findall(pattern, text) + + if not matches: + return None + return matches[-1] except Exception as e: - print("字符串处理异常!",e) + logger.error(f"字符串处理异常: {e}") return None -def translate(query): +def translate(query: str) -> Optional[str]: + """ + 将查询中的英文翻译为中文。 + 如果提取列表为空,最多重试三次。 + """ + if not query or not isinstance(query, str): + return None + sys_template = ''' 你是一个专注于化工、环境学科领域的翻译专家。 用户将提供一个生命周期评价领域数据库的查询,查询可能包含中英文字符。你的任务是: 1. 将查询中的所有英文表述转化为对应的中文表述; 2. 确保转化后的查询中不含任何非中文语言; - 3. 将完整的中文查询以“[]”格式返回; - 4. 不返回除“[]”格式外的任何其他内容。 + 3. 将完整的中文查询以"[]"格式返回; + 4. 不返回除"[]"格式外的任何其他内容。 请严格按照上述要求执行。 ''' human_template = "查询内容为:{context}" @@ -117,18 +144,58 @@ def translate(query): ]) messages = chat_prompt.format_messages(context=query) - # print(messages) + llm = ChatOpenAI( - model = "deepseek-chat", + model="deepseek-chat", base_url="https://api.deepseek.com", api_key="sk-3e42e538bc39411ab80761106d83dda9", temperature=0, ) - response = llm.invoke(messages) - content = response.content - result = extract_List(content) - return result -if __name__ == '__main__': - res = translate("HCOOH的定义是什么?") - print(res) + + # 最多尝试三次 + max_attempts = 3 + + for attempt in range(1, max_attempts + 1): + logger.info(f"翻译尝试 {attempt}/{max_attempts}:{query[:50]}{'...' if len(query) > 50 else ''}") + + try: + # 调用API获取翻译结果 + response = llm.invoke(messages) + content = response.content + + # 尝试提取结果 + result = extract_list(content) + + # 如果成功提取到结果,直接返回 + if result is not None: + logger.info(f"成功提取翻译结果 (尝试 {attempt}/{max_attempts})") + return result + + # 提取失败,记录信息 + logger.warning(f"未能提取翻译结果 (尝试 {attempt}/{max_attempts}): {content[:100]}") + + # 如果已经是最后一次尝试,则返回None + if attempt == max_attempts: + logger.error("所有尝试均失败,无法获取有效翻译结果") + return None + + # 短暂等待后继续下一次尝试 + time.sleep(1) + + except Exception as e: + logger.error(f"翻译过程中发生异常 (尝试 {attempt}/{max_attempts}): {e}") + + if attempt == max_attempts: + return None + + return None + +# 使用示例 +if __name__ == "__main__": + query = "HCOOH" + result = translate(query) + if result: + print(f"翻译结果: {result}") + else: + print("翻译失败") \ No newline at end of file