增加noawait,支持Azure API
parent
e21a28a85f
commit
2f68357c1d
@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
DIRNAME=`dirname $0`
|
||||||
|
cd $DIRNAME
|
||||||
|
./.venv/bin/activate
|
@ -1,36 +1,36 @@
|
|||||||
from aiohttp import web
|
from aiohttp import web
|
||||||
import utils.web
|
import utils.web
|
||||||
import utils.text
|
import utils.text
|
||||||
from extend.hangul_romanize import Transliter
|
from extend.hangul_romanize import Transliter
|
||||||
from extend.hangul_romanize.rule import academic
|
from extend.hangul_romanize.rule import academic
|
||||||
|
|
||||||
class Hanja:
|
class Hanja:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def convertToRomaja(self, hanja: str):
|
def convertToRomaja(self, hanja: str):
|
||||||
transliter = Transliter(academic)
|
transliter = Transliter(academic)
|
||||||
segList = utils.text.splitAscii(hanja)
|
segList = utils.text.splitAscii(hanja)
|
||||||
sentenceList = []
|
sentenceList = []
|
||||||
for seg in segList:
|
for seg in segList:
|
||||||
if seg == " ":
|
if seg == " ":
|
||||||
sentenceList.append("-")
|
sentenceList.append("-")
|
||||||
elif utils.text.isAscii(seg):
|
elif utils.text.isAscii(seg):
|
||||||
if utils.text.isAsciiPunc(seg):
|
if utils.text.isAsciiPunc(seg):
|
||||||
sentenceList.append(seg)
|
sentenceList.append(seg)
|
||||||
else:
|
else:
|
||||||
sentenceList.append([seg])
|
sentenceList.append([seg])
|
||||||
else:
|
else:
|
||||||
roma = transliter.translit(seg)
|
roma = transliter.translit(seg)
|
||||||
sentenceList.append(roma.split(" "))
|
sentenceList.append(roma.split(" "))
|
||||||
return sentenceList
|
return sentenceList
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def hanja2roma(request: web.Request):
|
async def hanja2roma(request: web.Request):
|
||||||
params = await utils.web.get_param(request, {
|
params = await utils.web.get_param(request, {
|
||||||
"sentence": {
|
"sentence": {
|
||||||
"required": True,
|
"required": True,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
sentence = params.get('sentence')
|
sentence = params.get('sentence')
|
||||||
|
|
||||||
data = Hanja.convertToRomaja(sentence)
|
data = Hanja.convertToRomaja(sentence)
|
||||||
return await utils.web.api_response(1, data, request=request)
|
return await utils.web.api_response(1, data, request=request)
|
||||||
|
@ -1,81 +1,81 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from aiohttp import web
|
from aiohttp import web
|
||||||
import os.path as path
|
import os.path as path
|
||||||
import jieba
|
import jieba
|
||||||
import jieba.posseg as pseg
|
import jieba.posseg as pseg
|
||||||
from pypinyin import pinyin, Style
|
from pypinyin import pinyin, Style
|
||||||
import utils.text
|
import utils.text
|
||||||
import utils.web
|
import utils.web
|
||||||
|
|
||||||
jieba.initialize()
|
jieba.initialize()
|
||||||
userDict = path.dirname(path.dirname(path.dirname(__file__))) + "/data/userDict.txt"
|
userDict = path.dirname(path.dirname(path.dirname(__file__))) + "/data/userDict.txt"
|
||||||
if path.exists(userDict):
|
if path.exists(userDict):
|
||||||
jieba.load_userdict(userDict)
|
jieba.load_userdict(userDict)
|
||||||
|
|
||||||
|
|
||||||
class Hanzi:
|
class Hanzi:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def filterJiebaTag(segList: list[str]):
|
def filterJiebaTag(segList: list[str]):
|
||||||
ret = []
|
ret = []
|
||||||
for word, flag in segList:
|
for word, flag in segList:
|
||||||
if flag[0] == "u" and (word == "得" or word == "地"):
|
if flag[0] == "u" and (word == "得" or word == "地"):
|
||||||
ret.append("的")
|
ret.append("的")
|
||||||
else:
|
else:
|
||||||
ret.append(word)
|
ret.append(word)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def convertToPinyin(sentence: str):
|
def convertToPinyin(sentence: str):
|
||||||
sentence = utils.text.replaceCJKPunc(sentence).replace(' ', '-')
|
sentence = utils.text.replaceCJKPunc(sentence).replace(' ', '-')
|
||||||
segList = Hanzi.filterJiebaTag(pseg.cut(sentence))
|
segList = Hanzi.filterJiebaTag(pseg.cut(sentence))
|
||||||
sentenceList = []
|
sentenceList = []
|
||||||
pinyinGroup = []
|
pinyinGroup = []
|
||||||
for seg in segList:
|
for seg in segList:
|
||||||
if utils.text.isAscii(seg):
|
if utils.text.isAscii(seg):
|
||||||
if utils.text.isAsciiPunc(seg):
|
if utils.text.isAsciiPunc(seg):
|
||||||
if len(pinyinGroup) > 0:
|
if len(pinyinGroup) > 0:
|
||||||
sentenceList.append(pinyinGroup)
|
sentenceList.append(pinyinGroup)
|
||||||
pinyinGroup = []
|
pinyinGroup = []
|
||||||
sentenceList.append(seg)
|
sentenceList.append(seg)
|
||||||
else:
|
else:
|
||||||
if len(pinyinGroup) > 0:
|
if len(pinyinGroup) > 0:
|
||||||
sentenceList.append(pinyinGroup)
|
sentenceList.append(pinyinGroup)
|
||||||
pinyinGroup = []
|
pinyinGroup = []
|
||||||
sentenceList.append([seg])
|
sentenceList.append([seg])
|
||||||
else:
|
else:
|
||||||
sentencePinyin = []
|
sentencePinyin = []
|
||||||
for one in pinyin(seg, style=Style.NORMAL):
|
for one in pinyin(seg, style=Style.NORMAL):
|
||||||
sentencePinyin.append(one[0])
|
sentencePinyin.append(one[0])
|
||||||
pinyinGroup.append(sentencePinyin)
|
pinyinGroup.append(sentencePinyin)
|
||||||
if len(pinyinGroup) > 0:
|
if len(pinyinGroup) > 0:
|
||||||
sentenceList.append(pinyinGroup)
|
sentenceList.append(pinyinGroup)
|
||||||
|
|
||||||
return sentenceList
|
return sentenceList
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def hanziToPinyin(request: web.Request):
|
async def hanziToPinyin(request: web.Request):
|
||||||
params = await utils.web.get_param(request, {
|
params = await utils.web.get_param(request, {
|
||||||
"sentence": {
|
"sentence": {
|
||||||
"required": True,
|
"required": True,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
sentence = params.get('sentence')
|
sentence = params.get('sentence')
|
||||||
|
|
||||||
data = Hanzi.convertToPinyin(sentence)
|
data = Hanzi.convertToPinyin(sentence)
|
||||||
return await utils.web.api_response(1, data, request=request)
|
return await utils.web.api_response(1, data, request=request)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def splitHanzi(request: web.Request):
|
async def splitHanzi(request: web.Request):
|
||||||
params = await utils.web.get_param(request, {
|
params = await utils.web.get_param(request, {
|
||||||
"sentence": {
|
"sentence": {
|
||||||
"required": True,
|
"required": True,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
sentence = params.get("sentence")
|
sentence = params.get("sentence")
|
||||||
|
|
||||||
segList = list(pseg.cut(sentence))
|
segList = list(pseg.cut(sentence))
|
||||||
data = []
|
data = []
|
||||||
for word, flag in segList:
|
for word, flag in segList:
|
||||||
data.append({"word": word, "flag": flag})
|
data.append({"word": word, "flag": flag})
|
||||||
return await utils.web.api_response(1, data)
|
return await utils.web.api_response(1, data)
|
||||||
|
@ -1,32 +1,32 @@
|
|||||||
from aiohttp import web
|
from aiohttp import web
|
||||||
import utils.web
|
import utils.web
|
||||||
import utils.text
|
import utils.text
|
||||||
from extend.kanji_to_romaji import kanji_to_romaji
|
from extend.kanji_to_romaji import kanji_to_romaji
|
||||||
|
|
||||||
class Kanji:
|
class Kanji:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def convertToRomaji(self, kanji: str):
|
def convertToRomaji(self, kanji: str):
|
||||||
segList = utils.text.splitAscii(kanji)
|
segList = utils.text.splitAscii(kanji)
|
||||||
sentenceList = []
|
sentenceList = []
|
||||||
for seg in segList:
|
for seg in segList:
|
||||||
if utils.text.isAscii(seg):
|
if utils.text.isAscii(seg):
|
||||||
if utils.text.isAsciiPunc(seg):
|
if utils.text.isAsciiPunc(seg):
|
||||||
sentenceList.append(seg)
|
sentenceList.append(seg)
|
||||||
else:
|
else:
|
||||||
sentenceList.append([seg])
|
sentenceList.append([seg])
|
||||||
else:
|
else:
|
||||||
romaji = kanji_to_romaji(seg)
|
romaji = kanji_to_romaji(seg)
|
||||||
sentenceList.append(romaji.split(" "))
|
sentenceList.append(romaji.split(" "))
|
||||||
return sentenceList
|
return sentenceList
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def kanji2romaji(request: web.Request):
|
async def kanji2romaji(request: web.Request):
|
||||||
params = await utils.web.get_param(request, {
|
params = await utils.web.get_param(request, {
|
||||||
"sentence": {
|
"sentence": {
|
||||||
"required": True,
|
"required": True,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
sentence = params.get('sentence')
|
sentence = params.get('sentence')
|
||||||
|
|
||||||
data = Kanji.convertToRomaji(sentence)
|
data = Kanji.convertToRomaji(sentence)
|
||||||
return await utils.web.api_response(1, data, request=request)
|
return await utils.web.api_response(1, data, request=request)
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
from .core import Transliter # noqa
|
from .core import Transliter # noqa
|
||||||
|
@ -1,89 +1,89 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
try:
|
try:
|
||||||
unicode(0)
|
unicode(0)
|
||||||
except NameError:
|
except NameError:
|
||||||
# py3
|
# py3
|
||||||
unicode = str
|
unicode = str
|
||||||
unichr = chr
|
unichr = chr
|
||||||
|
|
||||||
|
|
||||||
class Syllable(object):
|
class Syllable(object):
|
||||||
"""Hangul syllable interface"""
|
"""Hangul syllable interface"""
|
||||||
|
|
||||||
MIN = ord('가')
|
MIN = ord('가')
|
||||||
MAX = ord('힣')
|
MAX = ord('힣')
|
||||||
|
|
||||||
def __init__(self, char=None, code=None):
|
def __init__(self, char=None, code=None):
|
||||||
if char is None and code is None:
|
if char is None and code is None:
|
||||||
raise TypeError('__init__ takes char or code as a keyword argument (not given)')
|
raise TypeError('__init__ takes char or code as a keyword argument (not given)')
|
||||||
if char is not None and code is not None:
|
if char is not None and code is not None:
|
||||||
raise TypeError('__init__ takes char or code as a keyword argument (both given)')
|
raise TypeError('__init__ takes char or code as a keyword argument (both given)')
|
||||||
if char:
|
if char:
|
||||||
code = ord(char)
|
code = ord(char)
|
||||||
if not self.MIN <= code <= self.MAX:
|
if not self.MIN <= code <= self.MAX:
|
||||||
raise TypeError('__init__ expected Hangul syllable but {0} not in [{1}..{2}]'.format(code, self.MIN, self.MAX))
|
raise TypeError('__init__ expected Hangul syllable but {0} not in [{1}..{2}]'.format(code, self.MIN, self.MAX))
|
||||||
self.code = code
|
self.code = code
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def index(self):
|
def index(self):
|
||||||
return self.code - self.MIN
|
return self.code - self.MIN
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def initial(self):
|
def initial(self):
|
||||||
return self.index // 588
|
return self.index // 588
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def vowel(self):
|
def vowel(self):
|
||||||
return (self.index // 28) % 21
|
return (self.index // 28) % 21
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def final(self):
|
def final(self):
|
||||||
return self.index % 28
|
return self.index % 28
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def char(self):
|
def char(self):
|
||||||
return unichr(self.code)
|
return unichr(self.code)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return self.char
|
return self.char
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '''<Syllable({}({}),{}({}),{}({}),{}({}))>'''.format(
|
return '''<Syllable({}({}),{}({}),{}({}),{}({}))>'''.format(
|
||||||
self.code, self.char, self.initial, '', self.vowel, '', self.final, '')
|
self.code, self.char, self.initial, '', self.vowel, '', self.final, '')
|
||||||
|
|
||||||
|
|
||||||
class Transliter(object):
|
class Transliter(object):
|
||||||
"""General transliting interface"""
|
"""General transliting interface"""
|
||||||
|
|
||||||
def __init__(self, rule):
|
def __init__(self, rule):
|
||||||
self.rule = rule
|
self.rule = rule
|
||||||
|
|
||||||
def translit(self, text):
|
def translit(self, text):
|
||||||
"""Translit text to romanized text
|
"""Translit text to romanized text
|
||||||
|
|
||||||
:param text: Unicode string or unicode character iterator
|
:param text: Unicode string or unicode character iterator
|
||||||
"""
|
"""
|
||||||
result = []
|
result = []
|
||||||
pre = None, None
|
pre = None, None
|
||||||
now = None, None
|
now = None, None
|
||||||
for c in text:
|
for c in text:
|
||||||
try:
|
try:
|
||||||
post = c, Syllable(c)
|
post = c, Syllable(c)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
post = c, None
|
post = c, None
|
||||||
|
|
||||||
if now[0] is not None:
|
if now[0] is not None:
|
||||||
out = self.rule(now, pre=pre, post=post)
|
out = self.rule(now, pre=pre, post=post)
|
||||||
if out is not None:
|
if out is not None:
|
||||||
result.append(out)
|
result.append(out)
|
||||||
|
|
||||||
pre = now
|
pre = now
|
||||||
now = post
|
now = post
|
||||||
|
|
||||||
if now is not None:
|
if now is not None:
|
||||||
out = self.rule(now, pre=pre, post=(None, None))
|
out = self.rule(now, pre=pre, post=(None, None))
|
||||||
if out is not None:
|
if out is not None:
|
||||||
result.append(out)
|
result.append(out)
|
||||||
|
|
||||||
return ''.join(result)
|
return ''.join(result)
|
||||||
|
@ -1,47 +1,47 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
REVISED_INITIALS = 'g', 'kk', 'n', 'd', 'tt', 'l', 'm', 'b', 'pp', 's', 'ss', '', 'j', 'jj', 'ch', 'k', 't', 'p', 'h'
|
REVISED_INITIALS = 'g', 'kk', 'n', 'd', 'tt', 'l', 'm', 'b', 'pp', 's', 'ss', '', 'j', 'jj', 'ch', 'k', 't', 'p', 'h'
|
||||||
REVISED_VOWELS = 'a', 'ae', 'ya', 'yae', 'eo', 'e', 'yeo', 'ye', 'o', 'wa', 'wae', 'oe', 'yo', 'u', 'wo', 'we', 'wi', 'yu', 'eu', 'ui', 'i'
|
REVISED_VOWELS = 'a', 'ae', 'ya', 'yae', 'eo', 'e', 'yeo', 'ye', 'o', 'wa', 'wae', 'oe', 'yo', 'u', 'wo', 'we', 'wi', 'yu', 'eu', 'ui', 'i'
|
||||||
REVISED_FINALS = '', 'g', 'kk', 'gs', 'n', 'nj', 'nh', 'd', 'l', 'lg', 'lm', 'lb', 'ls', 'lt', 'lp', 'lh', 'm', 'b', 'bs', 's', 'ss', 'ng', 'j', 'ch', 'k', 't', 'p', 'h'
|
REVISED_FINALS = '', 'g', 'kk', 'gs', 'n', 'nj', 'nh', 'd', 'l', 'lg', 'lm', 'lb', 'ls', 'lt', 'lp', 'lh', 'm', 'b', 'bs', 's', 'ss', 'ng', 'j', 'ch', 'k', 't', 'p', 'h'
|
||||||
|
|
||||||
|
|
||||||
def academic_ambiguous_patterns():
|
def academic_ambiguous_patterns():
|
||||||
import itertools
|
import itertools
|
||||||
result = set()
|
result = set()
|
||||||
for final, initial in itertools.product(REVISED_FINALS, REVISED_INITIALS):
|
for final, initial in itertools.product(REVISED_FINALS, REVISED_INITIALS):
|
||||||
check = False
|
check = False
|
||||||
combined = final + initial
|
combined = final + initial
|
||||||
for i in range(len(combined)):
|
for i in range(len(combined)):
|
||||||
head, tail = combined[:i], combined[i:]
|
head, tail = combined[:i], combined[i:]
|
||||||
if head in REVISED_FINALS and tail in REVISED_INITIALS:
|
if head in REVISED_FINALS and tail in REVISED_INITIALS:
|
||||||
if not check:
|
if not check:
|
||||||
check = True
|
check = True
|
||||||
else:
|
else:
|
||||||
result.add(combined)
|
result.add(combined)
|
||||||
break
|
break
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
ACADEMIC_AMBIGUOUS_PATTERNS = academic_ambiguous_patterns()
|
ACADEMIC_AMBIGUOUS_PATTERNS = academic_ambiguous_patterns()
|
||||||
|
|
||||||
|
|
||||||
def academic(now, pre, **options):
|
def academic(now, pre, **options):
|
||||||
"""Rule for academic translition."""
|
"""Rule for academic translition."""
|
||||||
c, s = now
|
c, s = now
|
||||||
if not s:
|
if not s:
|
||||||
return c
|
return c
|
||||||
|
|
||||||
ps = pre[1] if pre else None
|
ps = pre[1] if pre else None
|
||||||
|
|
||||||
marker = False
|
marker = False
|
||||||
if ps:
|
if ps:
|
||||||
if s.initial == 11:
|
if s.initial == 11:
|
||||||
marker = True
|
marker = True
|
||||||
elif ps and (REVISED_FINALS[ps.final] + REVISED_INITIALS[s.initial]) in ACADEMIC_AMBIGUOUS_PATTERNS:
|
elif ps and (REVISED_FINALS[ps.final] + REVISED_INITIALS[s.initial]) in ACADEMIC_AMBIGUOUS_PATTERNS:
|
||||||
marker = True
|
marker = True
|
||||||
|
|
||||||
r = u''
|
r = u''
|
||||||
if marker:
|
if marker:
|
||||||
r += '-'
|
r += '-'
|
||||||
r += REVISED_INITIALS[s.initial] + REVISED_VOWELS[s.vowel] + REVISED_FINALS[s.final]
|
r += REVISED_INITIALS[s.initial] + REVISED_VOWELS[s.vowel] + REVISED_FINALS[s.final]
|
||||||
return r
|
return r
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from .kanji_to_romaji_module import convert_hiragana_to_katakana, translate_to_romaji, translate_soukon, \
|
from .kanji_to_romaji_module import convert_hiragana_to_katakana, translate_to_romaji, translate_soukon, \
|
||||||
translate_long_vowel, translate_soukon_ch, kanji_to_romaji
|
translate_long_vowel, translate_soukon_ch, kanji_to_romaji
|
||||||
__all__ = ["load_mappings_dict", "convert_hiragana_to_katakana", "convert_katakana_to_hiragana",
|
__all__ = ["load_mappings_dict", "convert_hiragana_to_katakana", "convert_katakana_to_hiragana",
|
||||||
"translate_to_romaji", "translate_soukon",
|
"translate_to_romaji", "translate_soukon",
|
||||||
"translate_long_vowel", "translate_soukon_ch", "kanji_to_romaji"]
|
"translate_long_vowel", "translate_soukon_ch", "kanji_to_romaji"]
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,120 +1,120 @@
|
|||||||
{
|
{
|
||||||
"ぁ": "a",
|
"ぁ": "a",
|
||||||
"あ": "a",
|
"あ": "a",
|
||||||
"ぃ": "i",
|
"ぃ": "i",
|
||||||
"い": "i",
|
"い": "i",
|
||||||
"ぅ": "u",
|
"ぅ": "u",
|
||||||
"う": "u",
|
"う": "u",
|
||||||
"ぇ": "e",
|
"ぇ": "e",
|
||||||
"え": "e",
|
"え": "e",
|
||||||
"ぉ": "o",
|
"ぉ": "o",
|
||||||
"お": "o",
|
"お": "o",
|
||||||
"か": "ka",
|
"か": "ka",
|
||||||
"が": "ga",
|
"が": "ga",
|
||||||
"き": "ki",
|
"き": "ki",
|
||||||
"きゃ": "kya",
|
"きゃ": "kya",
|
||||||
"きゅ": "kyu",
|
"きゅ": "kyu",
|
||||||
"きょ": "kyo",
|
"きょ": "kyo",
|
||||||
"ぎ": "gi",
|
"ぎ": "gi",
|
||||||
"ぎゃ": "gya",
|
"ぎゃ": "gya",
|
||||||
"ぎゅ": "gyu",
|
"ぎゅ": "gyu",
|
||||||
"ぎょ": "gyo",
|
"ぎょ": "gyo",
|
||||||
"く": "ku",
|
"く": "ku",
|
||||||
"ぐ": "gu",
|
"ぐ": "gu",
|
||||||
"け": "ke",
|
"け": "ke",
|
||||||
"げ": "ge",
|
"げ": "ge",
|
||||||
"こ": "ko",
|
"こ": "ko",
|
||||||
"ご": "go",
|
"ご": "go",
|
||||||
"さ": "sa",
|
"さ": "sa",
|
||||||
"ざ": "za",
|
"ざ": "za",
|
||||||
"し": "shi",
|
"し": "shi",
|
||||||
"しゃ": "sha",
|
"しゃ": "sha",
|
||||||
"しゅ": "shu",
|
"しゅ": "shu",
|
||||||
"しょ": "sho",
|
"しょ": "sho",
|
||||||
"じ": "ji",
|
"じ": "ji",
|
||||||
"じゃ": "ja",
|
"じゃ": "ja",
|
||||||
"じゅ": "ju",
|
"じゅ": "ju",
|
||||||
"じょ": "jo",
|
"じょ": "jo",
|
||||||
"す": "su",
|
"す": "su",
|
||||||
"ず": "zu",
|
"ず": "zu",
|
||||||
"せ": "se",
|
"せ": "se",
|
||||||
"ぜ": "ze",
|
"ぜ": "ze",
|
||||||
"そ": "so",
|
"そ": "so",
|
||||||
"ぞ": "zo",
|
"ぞ": "zo",
|
||||||
"た": "ta",
|
"た": "ta",
|
||||||
"だ": "da",
|
"だ": "da",
|
||||||
"ち": "chi",
|
"ち": "chi",
|
||||||
"ちゃ": "cha",
|
"ちゃ": "cha",
|
||||||
"ちゅ": "chu",
|
"ちゅ": "chu",
|
||||||
"ちょ": "cho",
|
"ちょ": "cho",
|
||||||
"ぢ": "ji",
|
"ぢ": "ji",
|
||||||
"つ": "tsu",
|
"つ": "tsu",
|
||||||
"づ": "zu",
|
"づ": "zu",
|
||||||
"て": "te",
|
"て": "te",
|
||||||
"で": "de",
|
"で": "de",
|
||||||
"と": "to",
|
"と": "to",
|
||||||
"ど": "do",
|
"ど": "do",
|
||||||
"な": "na",
|
"な": "na",
|
||||||
"に": "ni",
|
"に": "ni",
|
||||||
"にゃ": "nya",
|
"にゃ": "nya",
|
||||||
"にゅ": "nyu",
|
"にゅ": "nyu",
|
||||||
"にょ": "nyo",
|
"にょ": "nyo",
|
||||||
"ぬ": "nu",
|
"ぬ": "nu",
|
||||||
"ね": "ne",
|
"ね": "ne",
|
||||||
"の": "no",
|
"の": "no",
|
||||||
"は": "ha",
|
"は": "ha",
|
||||||
"ば": "ba",
|
"ば": "ba",
|
||||||
"ぱ": "pa",
|
"ぱ": "pa",
|
||||||
"ひ": "hi",
|
"ひ": "hi",
|
||||||
"ひゃ": "hya",
|
"ひゃ": "hya",
|
||||||
"ひゅ": "hyu",
|
"ひゅ": "hyu",
|
||||||
"ひょ": "hyo",
|
"ひょ": "hyo",
|
||||||
"び": "bi",
|
"び": "bi",
|
||||||
"びゃ": "bya",
|
"びゃ": "bya",
|
||||||
"びゅ": "byu",
|
"びゅ": "byu",
|
||||||
"びょ": "byo",
|
"びょ": "byo",
|
||||||
"ぴ": "pi",
|
"ぴ": "pi",
|
||||||
"ぴゃ": "pya",
|
"ぴゃ": "pya",
|
||||||
"ぴゅ": "pyu",
|
"ぴゅ": "pyu",
|
||||||
"ぴょ": "pyo",
|
"ぴょ": "pyo",
|
||||||
"ふ": "fu",
|
"ふ": "fu",
|
||||||
"ぶ": "bu",
|
"ぶ": "bu",
|
||||||
"ぷ": "pu",
|
"ぷ": "pu",
|
||||||
"へ": "he",
|
"へ": "he",
|
||||||
"べ": "be",
|
"べ": "be",
|
||||||
"ぺ": "pe",
|
"ぺ": "pe",
|
||||||
"ほ": "ho",
|
"ほ": "ho",
|
||||||
"ぼ": "bo",
|
"ぼ": "bo",
|
||||||
"ぽ": "po",
|
"ぽ": "po",
|
||||||
"ま": "ma",
|
"ま": "ma",
|
||||||
"み": "mi",
|
"み": "mi",
|
||||||
"みゃ": "mya",
|
"みゃ": "mya",
|
||||||
"みゅ": "myu",
|
"みゅ": "myu",
|
||||||
"みょ": "myo",
|
"みょ": "myo",
|
||||||
"む": "mu",
|
"む": "mu",
|
||||||
"め": "me",
|
"め": "me",
|
||||||
"も": "mo",
|
"も": "mo",
|
||||||
"や": "ya",
|
"や": "ya",
|
||||||
"ゆ": "yu",
|
"ゆ": "yu",
|
||||||
"よ": "yo",
|
"よ": "yo",
|
||||||
"ら": "ra",
|
"ら": "ra",
|
||||||
"り": "ri",
|
"り": "ri",
|
||||||
"りゃ": "rya",
|
"りゃ": "rya",
|
||||||
"りゅ": "ryu",
|
"りゅ": "ryu",
|
||||||
"りょ": "ryo",
|
"りょ": "ryo",
|
||||||
"る": "ru",
|
"る": "ru",
|
||||||
"れ": "re",
|
"れ": "re",
|
||||||
"ろ": "ro",
|
"ろ": "ro",
|
||||||
"ゎ": "wa",
|
"ゎ": "wa",
|
||||||
"わ": "wa",
|
"わ": "wa",
|
||||||
"ゐ": "wi",
|
"ゐ": "wi",
|
||||||
"ゑ": "we",
|
"ゑ": "we",
|
||||||
"を": " wo ",
|
"を": " wo ",
|
||||||
"ん": "n",
|
"ん": "n",
|
||||||
"ゔ": "vu",
|
"ゔ": "vu",
|
||||||
"ゕ": "ka",
|
"ゕ": "ka",
|
||||||
"ゖ": "ke",
|
"ゖ": "ke",
|
||||||
"ゝ": "iteration_mark",
|
"ゝ": "iteration_mark",
|
||||||
"ゞ": "voiced_iteration_mark",
|
"ゞ": "voiced_iteration_mark",
|
||||||
"ゟ": "yori"
|
"ゟ": "yori"
|
||||||
}
|
}
|
File diff suppressed because it is too large
Load Diff
@ -1,18 +1,18 @@
|
|||||||
{
|
{
|
||||||
"今日": {
|
"今日": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "kyou"
|
"romaji": "kyou"
|
||||||
},
|
},
|
||||||
"明日": {
|
"明日": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "ashita"
|
"romaji": "ashita"
|
||||||
},
|
},
|
||||||
"本": {
|
"本": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "hon"
|
"romaji": "hon"
|
||||||
},
|
},
|
||||||
"中": {
|
"中": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "naka"
|
"romaji": "naka"
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,78 +1,78 @@
|
|||||||
{
|
{
|
||||||
"朝日奈丸佳": {
|
"朝日奈丸佳": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Asahina Madoka"
|
"romaji": "Asahina Madoka"
|
||||||
},
|
},
|
||||||
"高海千歌": {
|
"高海千歌": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Takami Chika"
|
"romaji": "Takami Chika"
|
||||||
},
|
},
|
||||||
"鏡音レン": {
|
"鏡音レン": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Kagamine Len"
|
"romaji": "Kagamine Len"
|
||||||
},
|
},
|
||||||
"鏡音リン": {
|
"鏡音リン": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Kagamine Rin"
|
"romaji": "Kagamine Rin"
|
||||||
},
|
},
|
||||||
"逢坂大河": {
|
"逢坂大河": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Aisaka Taiga"
|
"romaji": "Aisaka Taiga"
|
||||||
},
|
},
|
||||||
"水樹奈々": {
|
"水樹奈々": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Mizuki Nana"
|
"romaji": "Mizuki Nana"
|
||||||
},
|
},
|
||||||
"桜内梨子": {
|
"桜内梨子": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Sakurauchi Riko"
|
"romaji": "Sakurauchi Riko"
|
||||||
},
|
},
|
||||||
"山吹沙綾": {
|
"山吹沙綾": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Yamabuki Saaya"
|
"romaji": "Yamabuki Saaya"
|
||||||
},
|
},
|
||||||
"初音ミク": {
|
"初音ミク": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Hatsune Miku"
|
"romaji": "Hatsune Miku"
|
||||||
},
|
},
|
||||||
"渡辺曜": {
|
"渡辺曜": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Watanabe You"
|
"romaji": "Watanabe You"
|
||||||
},
|
},
|
||||||
"原由実": {
|
"原由実": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Hara Yumi"
|
"romaji": "Hara Yumi"
|
||||||
},
|
},
|
||||||
"北宇治": {
|
"北宇治": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Kita Uji"
|
"romaji": "Kita Uji"
|
||||||
},
|
},
|
||||||
"六本木": {
|
"六本木": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Roppongi"
|
"romaji": "Roppongi"
|
||||||
},
|
},
|
||||||
"久美子": {
|
"久美子": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Kumiko"
|
"romaji": "Kumiko"
|
||||||
},
|
},
|
||||||
"政宗": {
|
"政宗": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Masamune"
|
"romaji": "Masamune"
|
||||||
},
|
},
|
||||||
"小林": {
|
"小林": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Kobayashi"
|
"romaji": "Kobayashi"
|
||||||
},
|
},
|
||||||
"奥寺": {
|
"奥寺": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Okudera"
|
"romaji": "Okudera"
|
||||||
},
|
},
|
||||||
"佐藤": {
|
"佐藤": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Satou"
|
"romaji": "Satou"
|
||||||
},
|
},
|
||||||
"玲子": {
|
"玲子": {
|
||||||
"w_type": "noun",
|
"w_type": "noun",
|
||||||
"romaji": "Reiko"
|
"romaji": "Reiko"
|
||||||
}
|
}
|
||||||
}
|
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,29 +1,29 @@
|
|||||||
class KanjiBlock(str):
|
class KanjiBlock(str):
|
||||||
def __new__(cls, *args, **kwargs):
|
def __new__(cls, *args, **kwargs):
|
||||||
obj = str.__new__(cls, "@")
|
obj = str.__new__(cls, "@")
|
||||||
kanji = args[0]
|
kanji = args[0]
|
||||||
kanji_dict = args[1]
|
kanji_dict = args[1]
|
||||||
|
|
||||||
obj.kanji = kanji
|
obj.kanji = kanji
|
||||||
if len(kanji) == 1:
|
if len(kanji) == 1:
|
||||||
obj.romaji = " " + kanji_dict["romaji"]
|
obj.romaji = " " + kanji_dict["romaji"]
|
||||||
else:
|
else:
|
||||||
if "verb stem" in kanji_dict["w_type"]:
|
if "verb stem" in kanji_dict["w_type"]:
|
||||||
obj.romaji = " " + kanji_dict["romaji"]
|
obj.romaji = " " + kanji_dict["romaji"]
|
||||||
else:
|
else:
|
||||||
obj.romaji = " " + kanji_dict["romaji"] + " "
|
obj.romaji = " " + kanji_dict["romaji"] + " "
|
||||||
|
|
||||||
if "other_readings" in kanji_dict:
|
if "other_readings" in kanji_dict:
|
||||||
obj.w_type = [kanji_dict["w_type"]]
|
obj.w_type = [kanji_dict["w_type"]]
|
||||||
obj.w_type.extend(
|
obj.w_type.extend(
|
||||||
[k for k in kanji_dict["other_readings"].keys()]
|
[k for k in kanji_dict["other_readings"].keys()]
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
obj.w_type = kanji_dict["w_type"]
|
obj.w_type = kanji_dict["w_type"]
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return self.kanji.encode("unicode_escape")
|
return self.kanji.encode("unicode_escape")
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.romaji.encode("utf-8")
|
return self.romaji.encode("utf-8")
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
class Particle(str):
|
class Particle(str):
|
||||||
def __new__(cls, *args, **kwargs):
|
def __new__(cls, *args, **kwargs):
|
||||||
particle_str = args[0]
|
particle_str = args[0]
|
||||||
obj = str.__new__(cls, " " + particle_str + " ")
|
obj = str.__new__(cls, " " + particle_str + " ")
|
||||||
obj.pname = particle_str
|
obj.pname = particle_str
|
||||||
return obj
|
return obj
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
# noinspection PyClassHasNoInit
|
# noinspection PyClassHasNoInit
|
||||||
class UnicodeRomajiMapping: # caching
|
class UnicodeRomajiMapping: # caching
|
||||||
kana_mapping = {}
|
kana_mapping = {}
|
||||||
kanji_mapping = {}
|
kanji_mapping = {}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from .UnicodeRomajiMapping import UnicodeRomajiMapping
|
from .UnicodeRomajiMapping import UnicodeRomajiMapping
|
||||||
from .KanjiBlock import KanjiBlock
|
from .KanjiBlock import KanjiBlock
|
||||||
from .Particle import Particle
|
from .Particle import Particle
|
||||||
|
|
||||||
__all__ = ["UnicodeRomajiMapping", "KanjiBlock", "Particle"]
|
__all__ = ["UnicodeRomajiMapping", "KanjiBlock", "Particle"]
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
import asyncio
|
||||||
|
from noawait import NoAwaitPool
|
||||||
|
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
noawait = NoAwaitPool(loop)
|
@ -1,51 +1,65 @@
|
|||||||
import asyncio
|
from local import loop, noawait
|
||||||
from typing import TypedDict
|
|
||||||
from aiohttp import web
|
from aiohttp import web
|
||||||
import asyncpg
|
import config
|
||||||
import config
|
import api.route
|
||||||
import api.route
|
import utils.web
|
||||||
import utils.web
|
from service.database import DatabaseService
|
||||||
from service.database import DatabaseService
|
from service.mediawiki_api import MediaWikiApi
|
||||||
from service.mediawiki_api import MediaWikiApi
|
|
||||||
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker
|
# Auto create Table
|
||||||
|
from api.model.base import BaseModel
|
||||||
from service.tiktoken import TikTokenService
|
from api.model.toolkit_ui.conversation import ConversationModel as _
|
||||||
|
from api.model.chat_complete.conversation import ConversationChunkModel as _
|
||||||
async def index(request: web.Request):
|
from api.model.embedding_search.title_collection import TitleCollectionModel as _
|
||||||
return utils.web.api_response(1, data={"message": "Isekai toolkit API"}, request=request)
|
from api.model.embedding_search.title_index import TitleIndexModel as _
|
||||||
|
|
||||||
async def init_mw_api(app: web.Application):
|
from service.tiktoken import TikTokenService
|
||||||
mw_api = MediaWikiApi.create()
|
|
||||||
if config.MW_BOT_LOGIN_USERNAME and config.MW_BOT_LOGIN_PASSWORD:
|
async def index(request: web.Request):
|
||||||
await mw_api.robot_login(config.MW_BOT_LOGIN_USERNAME, config.MW_BOT_LOGIN_PASSWORD)
|
return utils.web.api_response(1, data={"message": "Isekai toolkit API"}, request=request)
|
||||||
|
|
||||||
site_meta = await mw_api.get_site_meta()
|
async def init_mw_api(app: web.Application):
|
||||||
|
mw_api = MediaWikiApi.create()
|
||||||
print("Connected to Wiki %s, Robot username: %s" % (site_meta["sitename"], site_meta["user"]))
|
if config.MW_BOT_LOGIN_USERNAME and config.MW_BOT_LOGIN_PASSWORD:
|
||||||
|
try:
|
||||||
async def init_database(app: web.Application):
|
await mw_api.robot_login(config.MW_BOT_LOGIN_USERNAME, config.MW_BOT_LOGIN_PASSWORD)
|
||||||
dbs = await DatabaseService.create(app)
|
except Exception as e:
|
||||||
print("Database connected.")
|
print("Cannot login to Robot account, please check config.")
|
||||||
|
|
||||||
async def init_tiktoken(app: web.Application):
|
site_meta = await mw_api.get_site_meta()
|
||||||
await TikTokenService.create()
|
|
||||||
print("Tiktoken model loaded.")
|
print("Connected to Wiki %s, Robot username: %s" % (site_meta["sitename"], site_meta["user"]))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
async def init_database(app: web.Application):
|
||||||
loop = asyncio.get_event_loop()
|
dbs = await DatabaseService.create(app)
|
||||||
|
print("Database connected.")
|
||||||
app = web.Application()
|
|
||||||
|
async with dbs.engine.begin() as conn:
|
||||||
if config.DATABASE:
|
await conn.run_sync(BaseModel.metadata.create_all)
|
||||||
app.on_startup.append(init_database)
|
|
||||||
|
async def init_tiktoken(app: web.Application):
|
||||||
if config.MW_API:
|
await TikTokenService.create()
|
||||||
app.on_startup.append(init_mw_api)
|
print("Tiktoken model loaded.")
|
||||||
|
|
||||||
if config.OPENAI_TOKEN:
|
async def stop_noawait_pool(app: web.Application):
|
||||||
app.on_startup.append(init_tiktoken)
|
await noawait.end()
|
||||||
|
|
||||||
app.router.add_route('*', '/', index)
|
if __name__ == '__main__':
|
||||||
api.route.init(app)
|
app = web.Application()
|
||||||
web.run_app(app, host='0.0.0.0', port=config.PORT, loop=loop)
|
|
||||||
|
if config.DATABASE:
|
||||||
|
app.on_startup.append(init_database)
|
||||||
|
|
||||||
|
if config.MW_API:
|
||||||
|
app.on_startup.append(init_mw_api)
|
||||||
|
|
||||||
|
if config.OPENAI_TOKEN:
|
||||||
|
app.on_startup.append(init_tiktoken)
|
||||||
|
|
||||||
|
app.on_shutdown.append(stop_noawait_pool)
|
||||||
|
|
||||||
|
app.router.add_route('*', '/', index)
|
||||||
|
api.route.init(app)
|
||||||
|
|
||||||
|
web.run_app(app, host='0.0.0.0', port=config.PORT, loop=loop)
|
@ -0,0 +1,72 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
from asyncio import AbstractEventLoop, Task
|
||||||
|
import asyncio
|
||||||
|
from functools import wraps
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
from typing import Callable, Coroutine
|
||||||
|
|
||||||
|
class NoAwaitPool:
|
||||||
|
def __init__(self, loop: AbstractEventLoop):
|
||||||
|
self.task_list: list[Task] = []
|
||||||
|
self.loop = loop
|
||||||
|
self.running = True
|
||||||
|
|
||||||
|
self.on_error: list[Callable] = []
|
||||||
|
|
||||||
|
self.gc_task = loop.create_task(self._run_gc())
|
||||||
|
|
||||||
|
async def end(self):
|
||||||
|
print("Stopping NoAwait Tasks...")
|
||||||
|
self.running = False
|
||||||
|
for task in self.task_list:
|
||||||
|
await self._finish_task(task)
|
||||||
|
|
||||||
|
await self.gc_task
|
||||||
|
|
||||||
|
def add_task(self, coroutine: Coroutine):
|
||||||
|
task = self.loop.create_task(coroutine)
|
||||||
|
self.task_list.append(task)
|
||||||
|
|
||||||
|
def wrap(self, f):
|
||||||
|
@wraps(f)
|
||||||
|
def decorated_function(*args, **kwargs):
|
||||||
|
coroutine = f(*args, **kwargs)
|
||||||
|
self.add_task(coroutine)
|
||||||
|
|
||||||
|
return decorated_function
|
||||||
|
|
||||||
|
async def _finish_task(self, task: Task):
|
||||||
|
try:
|
||||||
|
if not task.done():
|
||||||
|
task.cancel()
|
||||||
|
await task
|
||||||
|
except Exception as e:
|
||||||
|
handled = False
|
||||||
|
for handler in self.on_error:
|
||||||
|
try:
|
||||||
|
handler_ret = handler(e)
|
||||||
|
if handler_ret is Coroutine:
|
||||||
|
await handler_ret
|
||||||
|
handled = True
|
||||||
|
except Exception as handler_err:
|
||||||
|
print("Exception on error handler: " + str(handler_err), file=sys.stderr)
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
if not handled:
|
||||||
|
print(e, file=sys.stderr)
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
|
async def _run_gc(self):
|
||||||
|
while self.running:
|
||||||
|
should_remove = []
|
||||||
|
for task in self.task_list:
|
||||||
|
if task.done():
|
||||||
|
await self._finish_task(task)
|
||||||
|
should_remove.append(task)
|
||||||
|
for task in should_remove:
|
||||||
|
self.task_list.remove(task)
|
||||||
|
|
||||||
|
await asyncio.sleep(0.1)
|
@ -1,16 +1,17 @@
|
|||||||
aiohttp==3.8.4
|
aiohttp==3.8.4
|
||||||
jieba==0.42.1
|
jieba==0.42.1
|
||||||
pypinyin==0.37.0
|
pypinyin==0.37.0
|
||||||
simplejson==3.17.0
|
simplejson==3.17.0
|
||||||
beautifulsoup4==4.11.2
|
beautifulsoup4==4.11.2
|
||||||
markdownify==0.11.6
|
markdownify==0.11.6
|
||||||
asyncpg==0.27.0
|
asyncpg==0.27.0
|
||||||
aiofiles==23.1.0
|
aiofiles==23.1.0
|
||||||
pgvector==0.1.6
|
pgvector==0.1.6
|
||||||
websockets==11.0
|
websockets==11.0
|
||||||
PyJWT==2.6.0
|
PyJWT==2.6.0
|
||||||
asyncpg-stubs==0.27.0
|
asyncpg-stubs==0.27.0
|
||||||
sqlalchemy==2.0.9
|
sqlalchemy==2.0.9
|
||||||
aiohttp-sse-client2==0.3.0
|
aiohttp-sse-client2==0.3.0
|
||||||
OpenCC==1.1.6
|
OpenCC==1.1.6
|
||||||
event-emitter-asyncio==1.0.4
|
event-emitter-asyncio==1.0.4
|
||||||
|
tiktoken-async==0.3.2
|
Loading…
Reference in New Issue