You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
82 lines
2.6 KiB
Python
82 lines
2.6 KiB
Python
2 years ago
|
from __future__ import annotations
|
||
|
|
||
|
from aiohttp import web
|
||
|
import os.path as path
|
||
|
import jieba
|
||
|
import jieba.posseg as pseg
|
||
|
from pypinyin import pinyin, Style
|
||
|
import utils.text
|
||
|
import utils.web
|
||
|
|
||
|
jieba.initialize()
|
||
|
userDict = path.dirname(path.dirname(path.dirname(__file__))) + "/data/userDict.txt"
|
||
|
if path.exists(userDict):
|
||
|
jieba.load_userdict(userDict)
|
||
|
|
||
|
|
||
|
class Hanzi:
|
||
|
@staticmethod
|
||
|
def filterJiebaTag(segList: list[str]):
|
||
|
ret = []
|
||
|
for word, flag in segList:
|
||
|
if flag[0] == "u" and (word == "得" or word == "地"):
|
||
|
ret.append("的")
|
||
|
else:
|
||
|
ret.append(word)
|
||
|
return ret
|
||
|
|
||
|
@staticmethod
|
||
|
def convertToPinyin(sentence: str):
|
||
|
sentence = utils.text.replaceCJKPunc(sentence).replace(' ', '-')
|
||
|
segList = Hanzi.filterJiebaTag(pseg.cut(sentence))
|
||
|
sentenceList = []
|
||
|
pinyinGroup = []
|
||
|
for seg in segList:
|
||
|
if utils.text.isAscii(seg):
|
||
|
if utils.text.isAsciiPunc(seg):
|
||
|
if len(pinyinGroup) > 0:
|
||
|
sentenceList.append(pinyinGroup)
|
||
|
pinyinGroup = []
|
||
|
sentenceList.append(seg)
|
||
|
else:
|
||
|
if len(pinyinGroup) > 0:
|
||
|
sentenceList.append(pinyinGroup)
|
||
|
pinyinGroup = []
|
||
|
sentenceList.append([seg])
|
||
|
else:
|
||
|
sentencePinyin = []
|
||
|
for one in pinyin(seg, style=Style.NORMAL):
|
||
|
sentencePinyin.append(one[0])
|
||
|
pinyinGroup.append(sentencePinyin)
|
||
|
if len(pinyinGroup) > 0:
|
||
|
sentenceList.append(pinyinGroup)
|
||
|
|
||
|
return sentenceList
|
||
|
|
||
|
@staticmethod
|
||
|
async def hanziToPinyin(request: web.Request):
|
||
|
params = await utils.web.get_param(request, {
|
||
|
"sentence": {
|
||
|
"required": True,
|
||
|
},
|
||
|
})
|
||
|
sentence = params.get('sentence')
|
||
|
|
||
|
data = Hanzi.convertToPinyin(sentence)
|
||
|
return await utils.web.api_response(1, data, request=request)
|
||
|
|
||
|
@staticmethod
|
||
|
async def splitHanzi(request: web.Request):
|
||
|
params = await utils.web.get_param(request, {
|
||
|
"sentence": {
|
||
|
"required": True,
|
||
|
},
|
||
|
})
|
||
|
sentence = params.get("sentence")
|
||
|
|
||
|
segList = list(pseg.cut(sentence))
|
||
|
data = []
|
||
|
for word, flag in segList:
|
||
|
data.append({"word": word, "flag": flag})
|
||
|
return await utils.web.api_response(1, data)
|