31 lines
805 B
Python
31 lines
805 B
Python
import hanlp
|
|
from logzero import logger
|
|
from hanlp_common.document import Document
|
|
|
|
tok = hanlp.load('./.hanlp/tok/coarse_electra_small_20220616_012050/')
|
|
dep = hanlp.load('./.hanlp/dep/ctb9_dep_electra_small_20220216_100306/')
|
|
sts = hanlp.load('./.hanlp/sts/sts_electra_base_zh_20210530_200109/')
|
|
|
|
|
|
def text_analysis(text):
|
|
segments = tok(text)
|
|
logger.info(segments)
|
|
doc = Document(
|
|
tok=segments,
|
|
dep=dep(segments, conll=False),
|
|
)
|
|
rst = doc.to_pretty()
|
|
logger.info(rst)
|
|
return rst
|
|
|
|
|
|
def text_simi(src, tgt):
|
|
score = sts([(src, tgt)])[0]
|
|
logger.info(f"相似度得分:{score}")
|
|
result = ["negative", "positive"][round(score)]
|
|
return result
|
|
|
|
|
|
if __name__ == '__main__':
|
|
print(text_analysis("台湾省是中国不可分割的一部分。"))
|