You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

82 lines
2.6 KiB
Python

from __future__ import annotations
from aiohttp import web
import os.path as path
import jieba
import jieba.posseg as pseg
from pypinyin import pinyin, Style
import utils.text
import utils.web
jieba.initialize()
userDict = path.dirname(path.dirname(path.dirname(__file__))) + "/data/userDict.txt"
if path.exists(userDict):
jieba.load_userdict(userDict)
class Hanzi:
@staticmethod
def filterJiebaTag(segList: list[str]):
ret = []
for word, flag in segList:
if flag[0] == "u" and (word == "" or word == ""):
ret.append("")
else:
ret.append(word)
return ret
@staticmethod
def convertToPinyin(sentence: str):
sentence = utils.text.replaceCJKPunc(sentence).replace(' ', '-')
segList = Hanzi.filterJiebaTag(pseg.cut(sentence))
sentenceList = []
pinyinGroup = []
for seg in segList:
if utils.text.isAscii(seg):
if utils.text.isAsciiPunc(seg):
if len(pinyinGroup) > 0:
sentenceList.append(pinyinGroup)
pinyinGroup = []
sentenceList.append(seg)
else:
if len(pinyinGroup) > 0:
sentenceList.append(pinyinGroup)
pinyinGroup = []
sentenceList.append([seg])
else:
sentencePinyin = []
for one in pinyin(seg, style=Style.NORMAL):
sentencePinyin.append(one[0])
pinyinGroup.append(sentencePinyin)
if len(pinyinGroup) > 0:
sentenceList.append(pinyinGroup)
return sentenceList
@staticmethod
async def hanziToPinyin(request: web.Request):
params = await utils.web.get_param(request, {
"sentence": {
"required": True,
},
})
sentence = params.get('sentence')
data = Hanzi.convertToPinyin(sentence)
return await utils.web.api_response(1, data, request=request)
@staticmethod
async def splitHanzi(request: web.Request):
params = await utils.web.get_param(request, {
"sentence": {
"required": True,
},
})
sentence = params.get("sentence")
segList = list(pseg.cut(sentence))
data = []
for word, flag in segList:
data.append({"word": word, "flag": flag})
return await utils.web.api_response(1, data)