优化translate
This commit is contained in:
parent
426b755c33
commit
c62b5990f5
Binary file not shown.
Binary file not shown.
|
@ -13,6 +13,16 @@ import re
|
|||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain.schema import SystemMessage, HumanMessage
|
||||
from langchain_openai import ChatOpenAI
|
||||
import logging
|
||||
from typing import Optional
|
||||
import time
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger('translation_service')
|
||||
|
||||
def preprocess_eng(text):
|
||||
'''
|
||||
|
@ -69,7 +79,7 @@ def get_noun_zh(text):
|
|||
noun = ' '.join(nouns)
|
||||
return noun
|
||||
|
||||
def has_no_chinese(text):
|
||||
def all_chinese(text):
|
||||
"""
|
||||
判断一个文本是否不包含中文字符
|
||||
|
||||
|
@ -85,28 +95,45 @@ def has_no_chinese(text):
|
|||
'\u2f00' <= char <= '\u2fdf' or \
|
||||
'\u3100' <= char <= '\u312f' or \
|
||||
'\u31a0' <= char <= '\u31bf':
|
||||
flag = 1
|
||||
else:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def extract_List(text):
|
||||
pattern = r'\[(.*?)\]'
|
||||
matches = re.findall(pattern,text)
|
||||
try:
|
||||
return matches[-1]
|
||||
except Exception as e:
|
||||
print("字符串处理异常!",e)
|
||||
def extract_list(text: str) -> Optional[str]:
|
||||
"""从文本中提取方括号内的内容"""
|
||||
if not isinstance(text, str):
|
||||
return None
|
||||
|
||||
try:
|
||||
pattern = r'\[(.*?)\]'
|
||||
matches = re.findall(pattern, text)
|
||||
|
||||
if not matches:
|
||||
return None
|
||||
|
||||
return matches[-1]
|
||||
except Exception as e:
|
||||
logger.error(f"字符串处理异常: {e}")
|
||||
return None
|
||||
|
||||
def translate(query: str) -> Optional[str]:
|
||||
"""
|
||||
将查询中的英文翻译为中文。
|
||||
如果提取列表为空,最多重试三次。
|
||||
"""
|
||||
if not query or not isinstance(query, str):
|
||||
return None
|
||||
|
||||
def translate(query):
|
||||
sys_template = '''
|
||||
你是一个专注于化工、环境学科领域的翻译专家。
|
||||
用户将提供一个生命周期评价领域数据库的查询,查询可能包含中英文字符。你的任务是:
|
||||
|
||||
1. 将查询中的所有英文表述转化为对应的中文表述;
|
||||
2. 确保转化后的查询中不含任何非中文语言;
|
||||
3. 将完整的中文查询以“[]”格式返回;
|
||||
4. 不返回除“[]”格式外的任何其他内容。
|
||||
3. 将完整的中文查询以"[]"格式返回;
|
||||
4. 不返回除"[]"格式外的任何其他内容。
|
||||
请严格按照上述要求执行。
|
||||
'''
|
||||
human_template = "查询内容为:{context}"
|
||||
|
@ -117,18 +144,58 @@ def translate(query):
|
|||
])
|
||||
|
||||
messages = chat_prompt.format_messages(context=query)
|
||||
# print(messages)
|
||||
|
||||
llm = ChatOpenAI(
|
||||
model = "deepseek-chat",
|
||||
model="deepseek-chat",
|
||||
base_url="https://api.deepseek.com",
|
||||
api_key="sk-3e42e538bc39411ab80761106d83dda9",
|
||||
temperature=0,
|
||||
)
|
||||
|
||||
# 最多尝试三次
|
||||
max_attempts = 3
|
||||
|
||||
for attempt in range(1, max_attempts + 1):
|
||||
logger.info(f"翻译尝试 {attempt}/{max_attempts}:{query[:50]}{'...' if len(query) > 50 else ''}")
|
||||
|
||||
try:
|
||||
# 调用API获取翻译结果
|
||||
response = llm.invoke(messages)
|
||||
content = response.content
|
||||
result = extract_List(content)
|
||||
return result
|
||||
if __name__ == '__main__':
|
||||
res = translate("HCOOH的定义是什么?")
|
||||
print(res)
|
||||
|
||||
# 尝试提取结果
|
||||
result = extract_list(content)
|
||||
|
||||
# 如果成功提取到结果,直接返回
|
||||
if result is not None:
|
||||
logger.info(f"成功提取翻译结果 (尝试 {attempt}/{max_attempts})")
|
||||
return result
|
||||
|
||||
# 提取失败,记录信息
|
||||
logger.warning(f"未能提取翻译结果 (尝试 {attempt}/{max_attempts}): {content[:100]}")
|
||||
|
||||
# 如果已经是最后一次尝试,则返回None
|
||||
if attempt == max_attempts:
|
||||
logger.error("所有尝试均失败,无法获取有效翻译结果")
|
||||
return None
|
||||
|
||||
# 短暂等待后继续下一次尝试
|
||||
time.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"翻译过程中发生异常 (尝试 {attempt}/{max_attempts}): {e}")
|
||||
|
||||
if attempt == max_attempts:
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
query = "HCOOH"
|
||||
result = translate(query)
|
||||
if result:
|
||||
print(f"翻译结果: {result}")
|
||||
else:
|
||||
print("翻译失败")
|
||||
|
Loading…
Reference in New Issue