from __future__ import annotations from aiohttp import web import os.path as path import jieba import jieba.posseg as pseg from pypinyin import pinyin, Style import utils.text import utils.web jieba.initialize() userDict = path.dirname(path.dirname(path.dirname(__file__))) + "/data/userDict.txt" if path.exists(userDict): jieba.load_userdict(userDict) class Hanzi: @staticmethod def filterJiebaTag(segList: list[str]): ret = [] for word, flag in segList: if flag[0] == "u" and (word == "得" or word == "地"): ret.append("的") else: ret.append(word) return ret @staticmethod def convertToPinyin(sentence: str): sentence = utils.text.replaceCJKPunc(sentence).replace(' ', '-') segList = Hanzi.filterJiebaTag(pseg.cut(sentence)) sentenceList = [] pinyinGroup = [] for seg in segList: if utils.text.isAscii(seg): if utils.text.isAsciiPunc(seg): if len(pinyinGroup) > 0: sentenceList.append(pinyinGroup) pinyinGroup = [] sentenceList.append(seg) else: if len(pinyinGroup) > 0: sentenceList.append(pinyinGroup) pinyinGroup = [] sentenceList.append([seg]) else: sentencePinyin = [] for one in pinyin(seg, style=Style.NORMAL): sentencePinyin.append(one[0]) pinyinGroup.append(sentencePinyin) if len(pinyinGroup) > 0: sentenceList.append(pinyinGroup) return sentenceList @staticmethod async def hanziToPinyin(request: web.Request): params = await utils.web.get_param(request, { "sentence": { "required": True, }, }) sentence = params.get('sentence') data = Hanzi.convertToPinyin(sentence) return await utils.web.api_response(1, data, request=request) @staticmethod async def splitHanzi(request: web.Request): params = await utils.web.get_param(request, { "sentence": { "required": True, }, }) sentence = params.get("sentence") segList = list(pseg.cut(sentence)) data = [] for word, flag in segList: data.append({"word": word, "flag": flag}) return await utils.web.api_response(1, data)