ai_platform_nlu/nlp/hanlp_tools.py

31 lines
805 B
Python

import hanlp
from logzero import logger
from hanlp_common.document import Document
tok = hanlp.load('./.hanlp/tok/coarse_electra_small_20220616_012050/')
dep = hanlp.load('./.hanlp/dep/ctb9_dep_electra_small_20220216_100306/')
sts = hanlp.load('./.hanlp/sts/sts_electra_base_zh_20210530_200109/')
def text_analysis(text):
segments = tok(text)
logger.info(segments)
doc = Document(
tok=segments,
dep=dep(segments, conll=False),
)
rst = doc.to_pretty()
logger.info(rst)
return rst
def text_simi(src, tgt):
score = sts([(src, tgt)])[0]
logger.info(f"相似度得分:{score}")
result = ["negative", "positive"][round(score)]
return result
if __name__ == '__main__':
print(text_analysis("台湾省是中国不可分割的一部分。"))