增加noawait，支持Azure API

2 years ago · 2f68357c1d
parent e21a28a85f
commit 2f68357c1d
42 changed files with 924612 additions and 924390 deletions
--- a/activate.sh
+++ b/activate.sh
@ -0,0 +1,4 @@
+#!/bin/sh
+DIRNAME=`dirname $0`
+cd $DIRNAME
+./.venv/bin/activate
--- a/api/controller/ChatComplete.py
+++ b/api/controller/ChatComplete.py
@ -2,80 +2,29 @@ import asyncio
 import json
 import time
 import traceback
+from local import noawait
+from typing import Optional
 from aiohttp import WSMsgType, web
 from sqlalchemy import select
 from api.model.chat_complete.conversation import ConversationModel, ConversationChunkModel
+from noawait import NoAwaitPool
 from service.chat_complete import ChatCompleteService
 from service.database import DatabaseService
 from service.mediawiki_api import MediaWikiApi
 from service.tiktoken import TikTokenService
 import utils.web

+class ChatCompleteTaskList:
+    def __init__(self, dbs: DatabaseService):
+        self.on_message = None
+        self.chunks: list[str] = []

-class ChatCompleteWebSocketController:
-    def __init__(self, request: web.Request):
-        self.request = request
-        self.ws = None
-        self.db = None
-        self.chat_complete = None
-
-        self.closed = False
-
-        self.refreshed_time = 0
-
-    async def run(self):
-        self.ws = web.WebSocketResponse()
-        await self.ws.prepare(self.request)
-        self.refreshed_time = time.time()
-
-        self.db = await DatabaseService.create(self.request.app)
-
-        self.query = self.request.query
-        if self.request.get("caller") == "user":
-            user_id = self.request.get("user")
-        else:
-            user_id = self.query.get("user_id")
-        title = self.query.get("title")
-
-        # create heartbeat task
-        asyncio.ensure_future(self._timeout_task())
-
-        async for msg in self.ws:
-            if msg.type == WSMsgType.TEXT:
-                try:
-                    data = json.loads(msg.data)
-                    event = data.get('event')
-                    self.refreshed_time = time.time()
-                    if event == 'chatcomplete':
-                        asyncio.ensure_future(self._chatcomplete(data))
-                    if event == 'ping':
-                        await self.ws.send_json({
-                            'event': 'pong'
-                        })
-                except Exception as e:
-                    print(e)
-                    traceback.print_exc()
-                    await self.ws.send_json({
-                        'event': 'error',
-                        'error': str(e)
-                    })
-            elif msg.type == WSMsgType.ERROR:
-                print('ws connection closed with exception %s' %
-                      self.ws.exception())
-
-    async def _timeout_task(self):
-        while not self.closed:
-            if time.time() - self.refreshed_time > 30:
-                self.closed = True
-                await self.ws.close()
-                return
-
-            await asyncio.sleep(1)
-
-    async def _chatcomplete(self, params: dict):
-        question = params.get("question")
-        conversation_id = params.get("conversation_id")
+    async def run():
+        pass

+    @noawait.wrap
+    async def start(self):
+        await self.run()

 class ChatComplete:
    @staticmethod
@ -244,8 +193,11 @@ class ChatComplete:
                        tokens = await tiktoken.get_tokens(question)

                        transatcion_id = None
+                        point_cost = 0
                        if request.get("caller") == "user":
-                            transatcion_id = await mwapi.chat_complete_start_transaction(user_id, "chatcomplete", tokens, extra_limit)
+                            usage_res = await mwapi.chat_complete_start_transaction(user_id, "chatcomplete", tokens, extra_limit)
+                            transatcion_id = usage_res.get("transaction_id")
+                            point_cost = usage_res.get("point_cost")

                        async def on_message(text: str):
                            # Send message to client, start with "+" to indicate it's a message
@ -261,8 +213,7 @@ class ChatComplete:

                        try:
                            chat_res = await chat_complete_service \
-                                .chat_complete(question, on_message, on_extracted_doc,
-                                            conversation_id=conversation_id, user_id=user_id, embedding_search={
+                                .prepare_chat_complete(question, conversation_id=conversation_id, user_id=user_id, embedding_search={
                                                "limit": extra_limit,
                                                "in_collection": in_collection,
                                            })
@ -272,6 +223,8 @@ class ChatComplete:
                                **chat_res,
                            })

+                            await chat_complete_service.set_latest_point_cost(point_cost)
+
                            if transatcion_id:
                                result = await mwapi.chat_complete_end_transaction(transatcion_id, chat_res["total_tokens"])
                        except Exception as e:
--- a/api/controller/EmbeddingSearch.py
+++ b/api/controller/EmbeddingSearch.py
@ -31,7 +31,8 @@ class EmbeddingSearch:
                    if await embedding_search.should_update_page_index():
                        if request.get("caller") == "user":
                            user_id = request.get("user")
-                            transatcion_id = await mwapi.chat_complete_start_transaction(user_id, "embeddingpage")
+                            usage_res = await mwapi.chat_complete_start_transaction(user_id, "embeddingpage")
+                            transatcion_id = usage_res.get("transaction_id")

                        await embedding_search.prepare_update_index()

@ -107,7 +108,8 @@ class EmbeddingSearch:
                    if await embedding_search.should_update_page_index():
                        if request.get("caller") == "user":
                            user_id = request.get("user")
-                            transatcion_id = await mwapi.chat_complete_start_transaction(user_id, "embeddingpage")
+                            usage_res = await mwapi.chat_complete_start_transaction(user_id, "embeddingpage")
+                            transatcion_id = usage_res.get("transaction_id")
                            
                        await embedding_search.prepare_update_index()

--- a/api/controller/Hanja.py
+++ b/api/controller/Hanja.py
@ -1,36 +1,36 @@
-from aiohttp import web
-import utils.web
-import utils.text
-from extend.hangul_romanize import Transliter
-from extend.hangul_romanize.rule import academic
-
-class Hanja:
-    @staticmethod
-    def convertToRomaja(self, hanja: str):
-        transliter = Transliter(academic)
-        segList = utils.text.splitAscii(hanja)
-        sentenceList = []
-        for seg in segList:
-            if seg == " ":
-                sentenceList.append("-")
-            elif utils.text.isAscii(seg):
-                if utils.text.isAsciiPunc(seg):
-                    sentenceList.append(seg)
-                else:
-                    sentenceList.append([seg])
-            else:
-                roma = transliter.translit(seg)
-                sentenceList.append(roma.split(" "))
-        return sentenceList
-
-    @staticmethod
-    async def hanja2roma(request: web.Request):
-        params = await utils.web.get_param(request, {
-            "sentence": {
-                "required": True,
-            },
-        })
-        sentence = params.get('sentence')
-
-        data = Hanja.convertToRomaja(sentence)
-        return await utils.web.api_response(1, data, request=request)
+from aiohttp import web
+import utils.web
+import utils.text
+from extend.hangul_romanize import Transliter
+from extend.hangul_romanize.rule import academic
+
+class Hanja:
+    @staticmethod
+    def convertToRomaja(self, hanja: str):
+        transliter = Transliter(academic)
+        segList = utils.text.splitAscii(hanja)
+        sentenceList = []
+        for seg in segList:
+            if seg == " ":
+                sentenceList.append("-")
+            elif utils.text.isAscii(seg):
+                if utils.text.isAsciiPunc(seg):
+                    sentenceList.append(seg)
+                else:
+                    sentenceList.append([seg])
+            else:
+                roma = transliter.translit(seg)
+                sentenceList.append(roma.split(" "))
+        return sentenceList
+
+    @staticmethod
+    async def hanja2roma(request: web.Request):
+        params = await utils.web.get_param(request, {
+            "sentence": {
+                "required": True,
+            },
+        })
+        sentence = params.get('sentence')
+
+        data = Hanja.convertToRomaja(sentence)
+        return await utils.web.api_response(1, data, request=request)
--- a/api/controller/Hanzi.py
+++ b/api/controller/Hanzi.py
@ -1,81 +1,81 @@
-from __future__ import annotations
-
-from aiohttp import web
-import os.path as path
-import jieba
-import jieba.posseg as pseg
-from pypinyin import pinyin, Style
-import utils.text
-import utils.web
-
-jieba.initialize()
-userDict = path.dirname(path.dirname(path.dirname(__file__))) + "/data/userDict.txt"
-if path.exists(userDict):
-    jieba.load_userdict(userDict)
-
-
-class Hanzi:
-    @staticmethod
-    def filterJiebaTag(segList: list[str]):
-        ret = []
-        for word, flag in segList:
-            if flag[0] == "u" and (word == "得" or word == "地"):
-                ret.append("的")
-            else:
-                ret.append(word)
-        return ret
-
-    @staticmethod
-    def convertToPinyin(sentence: str):
-        sentence = utils.text.replaceCJKPunc(sentence).replace(' ', '-')
-        segList = Hanzi.filterJiebaTag(pseg.cut(sentence))
-        sentenceList = []
-        pinyinGroup = []
-        for seg in segList:
-            if utils.text.isAscii(seg):
-                if utils.text.isAsciiPunc(seg):
-                    if len(pinyinGroup) > 0:
-                        sentenceList.append(pinyinGroup)
-                        pinyinGroup = []
-                    sentenceList.append(seg)
-                else:
-                    if len(pinyinGroup) > 0:
-                        sentenceList.append(pinyinGroup)
-                        pinyinGroup = []
-                    sentenceList.append([seg])
-            else:
-                sentencePinyin = []
-                for one in pinyin(seg, style=Style.NORMAL):
-                    sentencePinyin.append(one[0])
-                pinyinGroup.append(sentencePinyin)
-        if len(pinyinGroup) > 0:
-            sentenceList.append(pinyinGroup)
-
-        return sentenceList
-
-    @staticmethod
-    async def hanziToPinyin(request: web.Request):
-        params = await utils.web.get_param(request, {
-            "sentence": {
-                "required": True,
-            },
-        })
-        sentence = params.get('sentence')
-        
-        data = Hanzi.convertToPinyin(sentence)
-        return await utils.web.api_response(1, data, request=request)
-
-    @staticmethod
-    async def splitHanzi(request: web.Request):
-        params = await utils.web.get_param(request, {
-            "sentence": {
-                "required": True,
-            },
-        })
-        sentence = params.get("sentence")
-
-        segList = list(pseg.cut(sentence))
-        data = []
-        for word, flag in segList:
-            data.append({"word": word, "flag": flag})
-        return await utils.web.api_response(1, data)
+from __future__ import annotations
+
+from aiohttp import web
+import os.path as path
+import jieba
+import jieba.posseg as pseg
+from pypinyin import pinyin, Style
+import utils.text
+import utils.web
+
+jieba.initialize()
+userDict = path.dirname(path.dirname(path.dirname(__file__))) + "/data/userDict.txt"
+if path.exists(userDict):
+    jieba.load_userdict(userDict)
+
+
+class Hanzi:
+    @staticmethod
+    def filterJiebaTag(segList: list[str]):
+        ret = []
+        for word, flag in segList:
+            if flag[0] == "u" and (word == "得" or word == "地"):
+                ret.append("的")
+            else:
+                ret.append(word)
+        return ret
+
+    @staticmethod
+    def convertToPinyin(sentence: str):
+        sentence = utils.text.replaceCJKPunc(sentence).replace(' ', '-')
+        segList = Hanzi.filterJiebaTag(pseg.cut(sentence))
+        sentenceList = []
+        pinyinGroup = []
+        for seg in segList:
+            if utils.text.isAscii(seg):
+                if utils.text.isAsciiPunc(seg):
+                    if len(pinyinGroup) > 0:
+                        sentenceList.append(pinyinGroup)
+                        pinyinGroup = []
+                    sentenceList.append(seg)
+                else:
+                    if len(pinyinGroup) > 0:
+                        sentenceList.append(pinyinGroup)
+                        pinyinGroup = []
+                    sentenceList.append([seg])
+            else:
+                sentencePinyin = []
+                for one in pinyin(seg, style=Style.NORMAL):
+                    sentencePinyin.append(one[0])
+                pinyinGroup.append(sentencePinyin)
+        if len(pinyinGroup) > 0:
+            sentenceList.append(pinyinGroup)
+
+        return sentenceList
+
+    @staticmethod
+    async def hanziToPinyin(request: web.Request):
+        params = await utils.web.get_param(request, {
+            "sentence": {
+                "required": True,
+            },
+        })
+        sentence = params.get('sentence')
+        
+        data = Hanzi.convertToPinyin(sentence)
+        return await utils.web.api_response(1, data, request=request)
+
+    @staticmethod
+    async def splitHanzi(request: web.Request):
+        params = await utils.web.get_param(request, {
+            "sentence": {
+                "required": True,
+            },
+        })
+        sentence = params.get("sentence")
+
+        segList = list(pseg.cut(sentence))
+        data = []
+        for word, flag in segList:
+            data.append({"word": word, "flag": flag})
+        return await utils.web.api_response(1, data)
--- a/api/controller/Kanji.py
+++ b/api/controller/Kanji.py
@ -1,32 +1,32 @@
-from aiohttp import web
-import utils.web
-import utils.text
-from extend.kanji_to_romaji import kanji_to_romaji
-
-class Kanji:
-    @staticmethod
-    def convertToRomaji(self, kanji: str):
-        segList = utils.text.splitAscii(kanji)
-        sentenceList = []
-        for seg in segList:
-            if utils.text.isAscii(seg):
-                if utils.text.isAsciiPunc(seg):
-                    sentenceList.append(seg)
-                else:
-                    sentenceList.append([seg])
-            else:
-                romaji = kanji_to_romaji(seg)
-                sentenceList.append(romaji.split(" "))
-        return sentenceList
-
-    @staticmethod
-    async def kanji2romaji(request: web.Request):
-        params = await utils.web.get_param(request, {
-            "sentence": {
-                "required": True,
-            },
-        })
-        sentence = params.get('sentence')
-
-        data = Kanji.convertToRomaji(sentence)
+from aiohttp import web
+import utils.web
+import utils.text
+from extend.kanji_to_romaji import kanji_to_romaji
+
+class Kanji:
+    @staticmethod
+    def convertToRomaji(self, kanji: str):
+        segList = utils.text.splitAscii(kanji)
+        sentenceList = []
+        for seg in segList:
+            if utils.text.isAscii(seg):
+                if utils.text.isAsciiPunc(seg):
+                    sentenceList.append(seg)
+                else:
+                    sentenceList.append([seg])
+            else:
+                romaji = kanji_to_romaji(seg)
+                sentenceList.append(romaji.split(" "))
+        return sentenceList
+
+    @staticmethod
+    async def kanji2romaji(request: web.Request):
+        params = await utils.web.get_param(request, {
+            "sentence": {
+                "required": True,
+            },
+        })
+        sentence = params.get('sentence')
+
+        data = Kanji.convertToRomaji(sentence)
        return await utils.web.api_response(1, data, request=request)
--- a/api/model/chat_complete/conversation.py
+++ b/api/model/chat_complete/conversation.py
@ -13,7 +13,7 @@ class ConversationChunkModel(BaseModel):
    __tablename__ = "chat_complete_conversation_chunk"

    id: Mapped[int] = mapped_column(sqlalchemy.Integer, primary_key=True, autoincrement=True)
-    conversation_id: Mapped[int] = mapped_column(sqlalchemy.ForeignKey("chat_complete_conversation.id"), index=True)
+    conversation_id: Mapped[int] = mapped_column(sqlalchemy.ForeignKey(ConversationModel.id), index=True)
    message_data: Mapped[list] = mapped_column(sqlalchemy.JSON, nullable=True)
    tokens: Mapped[int] = mapped_column(sqlalchemy.Integer, default=0)
    updated_at: Mapped[int] = mapped_column(sqlalchemy.TIMESTAMP, index=True)
--- a/api/model/embedding_search/page_index.py
+++ b/api/model/embedding_search/page_index.py
@ -1,12 +1,13 @@
+from __future__ import annotations
 import hashlib
-from typing import Optional
+from typing import Optional, Type

 import asyncpg
 from api.model.base import BaseModel
 import config
 import numpy as np
 import sqlalchemy
-from sqlalchemy import select, update, delete
+from sqlalchemy import Index, select, update, delete, Select
 from sqlalchemy.orm import mapped_column, Mapped
 from sqlalchemy.ext.asyncio import AsyncSession
 from pgvector.asyncpg import register_vector
@ -14,19 +15,38 @@ from pgvector.sqlalchemy import Vector

 from service.database import DatabaseService

-class PageIndexModel(BaseModel):
+page_index_model_list: dict[int, Type[AbstractPageIndexModel]] = {}
+
+
+class AbstractPageIndexModel(BaseModel):
    __abstract__ = True

    id: Mapped[int] = mapped_column(sqlalchemy.Integer, primary_key=True, autoincrement=True)
    page_id: Mapped[int] = mapped_column(sqlalchemy.Integer, index=True)
    sha1: Mapped[str] = mapped_column(sqlalchemy.String(40), index=True)
+    embedding: Mapped[np.ndarray] = mapped_column(Vector(config.EMBEDDING_VECTOR_SIZE))
    text: Mapped[str] = mapped_column(sqlalchemy.Text)
    text_len: Mapped[int] = mapped_column(sqlalchemy.Integer)
-    embedding: Mapped[np.ndarray] = mapped_column(Vector(config.EMBEDDING_VECTOR_SIZE))
    markdown: Mapped[str] = mapped_column(sqlalchemy.Text, nullable=True)
    markdown_len: Mapped[int] = mapped_column(sqlalchemy.Integer, nullable=True)
    temp_doc_session_id: Mapped[int] = mapped_column(sqlalchemy.Integer, nullable=True)

+
+def create_page_index_model(collection_id: int):
+    if collection_id in page_index_model_list:
+        return page_index_model_list[collection_id]
+    else:
+        class PageIndexModel(AbstractPageIndexModel):
+            __tablename__ = "embedding_search_page_index_%s" % str(collection_id)
+    
+            embedding_index = sqlalchemy.Index(__tablename__ + "_embedding_idx", AbstractPageIndexModel.embedding,
+                                            postgresql_using='ivfflat',
+                                            postgresql_ops={'embedding': 'vector_cosine_ops'})
+
+        page_index_model_list[collection_id] = PageIndexModel
+
+        return PageIndexModel
+
 class PageIndexHelper:
    columns = [
        "id",
@ -60,12 +80,19 @@ class PageIndexHelper:

        await register_vector(self.dbi)

+        self.create_session = self.dbs.create_session
+        self.session = self.dbs.create_session()
+        await self.session.__aenter__()
+
+        self.orm = create_page_index_model(self.collection_id)
+
        self.initialized = True

        return self

    async def __aexit__(self, exc_type, exc, tb):
        await self.dbpool.__aexit__(exc_type, exc, tb)
+        await self.session.__aexit__(exc_type, exc, tb)

    async def table_exists(self):
        exists = await self.dbi.fetchval("""SELECT EXISTS (
@ -83,27 +110,31 @@ class PageIndexHelper:

        # create table if not exists
        if not await self.table_exists():
-            await self.dbi.execute(("""CREATE TABLE IF NOT EXISTS /*_*/ (
-                id SERIAL PRIMARY KEY,
-                page_id INTEGER NOT NULL,
-                sha1 VARCHAR(40) NOT NULL,
-                text TEXT NOT NULL,
-                text_len INTEGER NOT NULL,
-                embedding VECTOR(%d) NOT NULL,
-                markdown TEXT NULL,
-                markdown_len INTEGER NULL,
-                temp_doc_session_id INTEGER NULL
-            );
-            CREATE INDEX /*_*/_page_id_idx ON /*_*/ (page_id);
-            CREATE INDEX /*_*/_sha1_idx ON /*_*/ (sha1);
-            CREATE INDEX /*_*/_temp_doc_session_id_idx ON /*_*/ (temp_doc_session_id);
-            """ % config.EMBEDDING_VECTOR_SIZE).replace("/*_*/", self.table_name))
-
-        self.table_initialized = False
+            async with self.dbs.engine.begin() as conn:
+                await conn.run_sync(self.orm.__table__.create)
+
+            # await self.dbi.execute(("""CREATE TABLE IF NOT EXISTS /*_*/ (
+            #     id SERIAL PRIMARY KEY,
+            #     page_id INTEGER NOT NULL,
+            #     sha1 VARCHAR(40) NOT NULL,
+            #     text TEXT NOT NULL,
+            #     text_len INTEGER NOT NULL,
+            #     embedding VECTOR(%d) NOT NULL,
+            #     markdown TEXT NULL,
+            #     markdown_len INTEGER NULL,
+            #     temp_doc_session_id INTEGER NULL
+            # );
+            # CREATE INDEX /*_*/_page_id_idx ON /*_*/ (page_id);
+            # CREATE INDEX /*_*/_sha1_idx ON /*_*/ (sha1);
+            # CREATE INDEX /*_*/_temp_doc_session_id_idx ON /*_*/ (temp_doc_session_id);
+            # """ % config.EMBEDDING_VECTOR_SIZE).replace("/*_*/", self.table_name))
+
+        self.table_initialized = True

    async def create_embedding_index(self):
-        await self.dbi.execute("CREATE INDEX IF NOT EXISTS /*_*/_embedding_idx ON /*_*/ USING ivfflat (embedding vector_cosine_ops);"
-                               .replace("/*_*/", self.table_name))
+        pass
+        # await self.dbi.execute("CREATE INDEX IF NOT EXISTS /*_*/_embedding_idx ON /*_*/ USING ivfflat (embedding vector_cosine_ops);"
+        #                        .replace("/*_*/", self.table_name))

    def sha1_doc(self, doc: list):
        for item in doc:
@ -113,25 +144,20 @@ class PageIndexHelper:

    async def get_indexed_sha1(self, with_temporary: bool = True, in_collection: bool = False):
        indexed_sha1_list = []
-        sql = "SELECT sha1 FROM %s" % (self.table_name)

-        where = []
-        params = []
+        stmt = select(self.orm).column(self.orm.sha1)

        if not with_temporary:
-            where.append("temp_doc_session_id IS NULL")
+            stmt = stmt.where(self.orm.temp_doc_session_id == None)

        if not in_collection:
-            params.append(self.page_id)
-            where.append("page_id = $%d" % len(params))
+            stmt = stmt.where(self.orm.page_id == self.page_id)

-        if len(where) > 0:
-            sql += " WHERE " + (" AND ".join(where))
-
-        ret = await self.dbi.fetch(sql, *params)
+        ret: list[AbstractPageIndexModel] = await self.session.scalars(stmt)

        for row in ret:
-            indexed_sha1_list.append(row[0])
+            indexed_sha1_list.append(row.sha1)
+
        return indexed_sha1_list

    async def get_unindexed_doc(self, doc: list, with_temporary: bool = True):
@ -202,11 +228,11 @@ class PageIndexHelper:
        if len(should_index) > 0:
            await self.dbi.executemany("""INSERT INTO %s (sha1, page_id, text, text_len, markdown, markdown_len, embedding, temp_doc_session_id)
                VALUES ($1, $2, $3, $4, $5, $6, $7, NULL);""" % (self.table_name),
-                                       [(item["sha1"], self.page_id, item["text"], len(item["text"]), item["markdown"], len(item["markdown"]), item["embedding"]) for item in should_index])
+                                    [(item["sha1"], self.page_id, item["text"], len(item["text"]), item["markdown"], len(item["markdown"]), item["embedding"]) for item in should_index])

        if len(should_persist) > 0:
            await self.dbi.executemany("UPDATE %s SET temp_doc_session_id = NULL WHERE page_id = $1 AND sha1 = $2" % (self.table_name),
-                                       [(self.page_id, sha1) for sha1 in should_persist])
+                                    [(self.page_id, sha1) for sha1 in should_persist])
        
        if need_create_index:
            await self.create_embedding_index()
--- a/api/model/embedding_search/title_collection.py
+++ b/api/model/embedding_search/title_collection.py
@ -11,7 +11,7 @@ class TitleCollectionModel(BaseModel):

    id: Mapped[int] = mapped_column(sqlalchemy.Integer, primary_key=True, autoincrement=True)
    title: Mapped[str] = mapped_column(sqlalchemy.String(255), index=True)
-    page_id: Mapped[int] = mapped_column(sqlalchemy.Integer, index=True)
+    page_id: Mapped[int] = mapped_column(sqlalchemy.Integer, index=True, nullable=True)

 class TitleCollectionHelper:
    def __init__(self, dbs: DatabaseService):
@ -29,7 +29,6 @@ class TitleCollectionHelper:

    async def __aexit__(self, exc_type, exc, tb):
        await self.session.__aexit__(exc_type, exc, tb)
-        pass

    async def add(self, title: str, page_id: Optional[int] = None) -> Union[int, bool]:
        stmt = select(TitleCollectionModel.id).where(TitleCollectionModel.title == title)
--- a/api/model/embedding_search/title_index.py
+++ b/api/model/embedding_search/title_index.py
@ -20,7 +20,11 @@ class TitleIndexModel(BaseModel):
    page_id: Mapped[int] = mapped_column(sqlalchemy.Integer, index=True)
    collection_id: Mapped[int] = mapped_column(sqlalchemy.Integer, index=True)
    rev_id: Mapped[int] = mapped_column(sqlalchemy.Integer, index=True)
-    embedding: Mapped[np.ndarray] = mapped_column(Vector(config.EMBEDDING_VECTOR_SIZE), index=True)
+    embedding: Mapped[np.ndarray] = mapped_column(Vector(config.EMBEDDING_VECTOR_SIZE))
+
+    embedding_index = sqlalchemy.Index("embedding_search_title_index_embedding_idx", embedding,
+                                       postgresql_using='ivfflat',
+                                       postgresql_ops={'embedding': 'vector_cosine_ops'})

 class TitleIndexHelper:
    __tablename__ = "embedding_search_title_index"
--- a/config-example.py
+++ b/config-example.py
@ -1,66 +1,73 @@
-PORT = 8144
-HOST = "www.isekai.cn"
-MW_API = "http://dev.isekai.cn/api.php"
-
-DEBUG = True
-
-DATABASE = {
-    "host": "127.0.0.1",
-    "database": "isekai_toolkit",
-    "user": "",
-    "password": "",
-    "port": "5432",
-}
-
-EMBEDDING_VECTOR_SIZE = 1536
-
-OPENAI_API = "https://api.openai.com"
-OPENAI_TOKEN = "sk-"
-
-CHATCOMPLETE_MAX_MEMORY_TOKENS = 1024
-CHATCOMPLETE_MAX_INPUT_TOKENS = 768
-
-CHATCOMPLETE_OUTPUT_REPLACE = {
-  "OpenAI": "オーペンエーアイ",
-  "ChatGPT": "チャットジーピーティー",
-  "GPT": "ジーピーティー",
-  "上下文": "消息",
-  "AI": "虛擬人物程序",
-  "语言模型": "虛擬人物程序",
-  "人工智能程序": "虛擬人物程序",
-  "語言模型": "虛擬人物程序",
-  "人工智能程式": "虛擬人物程序",
-}
-
-CHATCOMPLETE_DEFAULT_CONVERSATION_TITLE = "无标题"
-
-CHATCOMPLETE_BOT_NAME = "寫作助手"
-
-PROMPTS = {
-    "chat": {
-        "system_prompt": "You are a writer. You are the writing assistant of the '異世界百科'. Your name is '{bot_name}'. You need to help users complete the characters and settings in their novel.",
-    },
-    "title": {
-        "system_prompt": "You are a writing assistant, you only need to assist in writing, do not express your opinion.",
-        "prompt": "Write a short title in Chinese for the following conversation, don't use quotes:\n\n{content}"
-    },
-    "suggestions": {
-        "prompt": "根據下面的對話，提出幾個問題：\n\n{content}"
-    },
-    "summary": {
-        "system_prompt": "You are a writing assistant, you only need to assist in writing, do not express your opinion. Output in Chinese.",
-        "prompt": "為“{bot_name}”概括下面的聊天記錄，排除不重要的對話，不要表明自己的意見，儘量簡潔。使用中文輸出，“User”是同一個人。\n\n{content}"
-    },
-    "extracted_doc": {
-        "prompt": "Here are some relevant informations:\n\n{content}"
-    }
-}
-
-REQUEST_PROXY = "http://127.0.0.1:7890"
-
-AUTH_TOKENS = {
-    "isekaiwiki": "sk-123456"
-}
-
-MW_BOT_LOGIN_USERNAME = "Hyperzlib@ChatComplete"
+PORT = 8144
+HOST = "www.isekai.cn"
+MW_API = "http://dev.isekai.cn/api.php"
+
+DEBUG = True
+
+DATABASE = {
+    "host": "127.0.0.1",
+    "database": "isekai_toolkit",
+    "user": "",
+    "password": "",
+    "port": "5432",
+}
+
+EMBEDDING_VECTOR_SIZE = 1536
+
+OPENAI_API_TYPE = "openai" # openai or azure
+OPENAI_API = "https://api.openai.com"
+OPENAI_TOKEN = "sk-"
+OPENAI_API = None
+OPENAI_TOKEN = ""
+AZURE_OPENAI_ENDPOINT = "https://your-instance.openai.azure.com"
+AZURE_OPENAI_KEY = ""
+AZURE_OPENAI_CHATCOMPLETE_DEPLOYMENT_NAME = ""
+AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME = ""
+
+CHATCOMPLETE_MAX_MEMORY_TOKENS = 1024
+CHATCOMPLETE_MAX_INPUT_TOKENS = 768
+
+CHATCOMPLETE_OUTPUT_REPLACE = {
+  "OpenAI": "オーペンエーアイ",
+  "ChatGPT": "チャットジーピーティー",
+  "GPT": "ジーピーティー",
+  "上下文": "消息",
+  "AI": "虛擬人物程序",
+  "语言模型": "虛擬人物程序",
+  "人工智能程序": "虛擬人物程序",
+  "語言模型": "虛擬人物程序",
+  "人工智能程式": "虛擬人物程序",
+}
+
+CHATCOMPLETE_DEFAULT_CONVERSATION_TITLE = "无标题"
+
+CHATCOMPLETE_BOT_NAME = "寫作助手"
+
+PROMPTS = {
+    "chat": {
+        "system_prompt": "You are a writer. You are the writing assistant of the '異世界百科'. Your name is '{bot_name}'. You need to help users complete the characters and settings in their novel.",
+    },
+    "title": {
+        "system_prompt": "You are a writing assistant, you only need to assist in writing, do not express your opinion.",
+        "prompt": "Write a short title in Chinese for the following conversation, don't use quotes:\n\n{content}"
+    },
+    "suggestions": {
+        "prompt": "根據下面的對話，提出幾個問題：\n\n{content}"
+    },
+    "summary": {
+        "system_prompt": "You are a writing assistant, you only need to assist in writing, do not express your opinion. Output in Chinese.",
+        "prompt": "為“{bot_name}”概括下面的聊天記錄，排除不重要的對話，不要表明自己的意見，儘量簡潔。使用中文輸出，“User”是同一個人。\n\n{content}"
+    },
+    "extracted_doc": {
+        "prompt": "Here are some relevant informations:\n\n{content}"
+    }
+}
+
+REQUEST_PROXY = "http://127.0.0.1:7890"
+
+AUTH_TOKENS = {
+    "isekaiwiki": "sk-123456"
+}
+
+MW_BOT_LOGIN_USERNAME = "Hyperzlib@ChatComplete"
 MW_BOT_LOGIN_PASSWORD = ""
--- a/extend/hangul_romanize/init.py
+++ b/extend/hangul_romanize/init.py
@ -1,2 +1,2 @@
-
-from .core import Transliter  # noqa
+
+from .core import Transliter  # noqa
--- a/extend/hangul_romanize/core.py
+++ b/extend/hangul_romanize/core.py
@ -1,89 +1,89 @@
-# -*- coding: utf-8 -*-
-
-try:
-    unicode(0)
-except NameError:
-    # py3
-    unicode = str
-    unichr = chr
-
-
-class Syllable(object):
-    """Hangul syllable interface"""
-
-    MIN = ord('가')
-    MAX = ord('힣')
-
-    def __init__(self, char=None, code=None):
-        if char is None and code is None:
-            raise TypeError('__init__ takes char or code as a keyword argument (not given)')
-        if char is not None and code is not None:
-            raise TypeError('__init__ takes char or code as a keyword argument (both given)')
-        if char:
-            code = ord(char)
-        if not self.MIN <= code <= self.MAX:
-            raise TypeError('__init__ expected Hangul syllable but {0} not in [{1}..{2}]'.format(code, self.MIN, self.MAX))
-        self.code = code
-
-    @property
-    def index(self):
-        return self.code - self.MIN
-
-    @property
-    def initial(self):
-        return self.index // 588
-
-    @property
-    def vowel(self):
-        return (self.index // 28) % 21
-
-    @property
-    def final(self):
-        return self.index % 28
-
-    @property
-    def char(self):
-        return unichr(self.code)
-
-    def __unicode__(self):
-        return self.char
-
-    def __repr__(self):
-        return '''<Syllable({}({}),{}({}),{}({}),{}({}))>'''.format(
-            self.code, self.char, self.initial, '', self.vowel, '', self.final, '')
-
-
-class Transliter(object):
-    """General transliting interface"""
-
-    def __init__(self, rule):
-        self.rule = rule
-
-    def translit(self, text):
-        """Translit text to romanized text
-
-        :param text: Unicode string or unicode character iterator
-        """
-        result = []
-        pre = None, None
-        now = None, None
-        for c in text:
-            try:
-                post = c, Syllable(c)
-            except TypeError:
-                post = c, None
-
-            if now[0] is not None:
-                out = self.rule(now, pre=pre, post=post)
-                if out is not None:
-                    result.append(out)
-
-            pre = now
-            now = post
-
-        if now is not None:
-            out = self.rule(now, pre=pre, post=(None, None))
-            if out is not None:
-                result.append(out)
-
-        return ''.join(result)
+# -*- coding: utf-8 -*-
+
+try:
+    unicode(0)
+except NameError:
+    # py3
+    unicode = str
+    unichr = chr
+
+
+class Syllable(object):
+    """Hangul syllable interface"""
+
+    MIN = ord('가')
+    MAX = ord('힣')
+
+    def __init__(self, char=None, code=None):
+        if char is None and code is None:
+            raise TypeError('__init__ takes char or code as a keyword argument (not given)')
+        if char is not None and code is not None:
+            raise TypeError('__init__ takes char or code as a keyword argument (both given)')
+        if char:
+            code = ord(char)
+        if not self.MIN <= code <= self.MAX:
+            raise TypeError('__init__ expected Hangul syllable but {0} not in [{1}..{2}]'.format(code, self.MIN, self.MAX))
+        self.code = code
+
+    @property
+    def index(self):
+        return self.code - self.MIN
+
+    @property
+    def initial(self):
+        return self.index // 588
+
+    @property
+    def vowel(self):
+        return (self.index // 28) % 21
+
+    @property
+    def final(self):
+        return self.index % 28
+
+    @property
+    def char(self):
+        return unichr(self.code)
+
+    def __unicode__(self):
+        return self.char
+
+    def __repr__(self):
+        return '''<Syllable({}({}),{}({}),{}({}),{}({}))>'''.format(
+            self.code, self.char, self.initial, '', self.vowel, '', self.final, '')
+
+
+class Transliter(object):
+    """General transliting interface"""
+
+    def __init__(self, rule):
+        self.rule = rule
+
+    def translit(self, text):
+        """Translit text to romanized text
+
+        :param text: Unicode string or unicode character iterator
+        """
+        result = []
+        pre = None, None
+        now = None, None
+        for c in text:
+            try:
+                post = c, Syllable(c)
+            except TypeError:
+                post = c, None
+
+            if now[0] is not None:
+                out = self.rule(now, pre=pre, post=post)
+                if out is not None:
+                    result.append(out)
+
+            pre = now
+            now = post
+
+        if now is not None:
+            out = self.rule(now, pre=pre, post=(None, None))
+            if out is not None:
+                result.append(out)
+
+        return ''.join(result)
--- a/extend/hangul_romanize/rule.py
+++ b/extend/hangul_romanize/rule.py
@ -1,47 +1,47 @@
-# -*- coding: utf-8 -*-
-
-REVISED_INITIALS = 'g', 'kk', 'n', 'd', 'tt', 'l', 'm', 'b', 'pp', 's', 'ss', '', 'j', 'jj', 'ch', 'k', 't', 'p', 'h'
-REVISED_VOWELS = 'a', 'ae', 'ya', 'yae', 'eo', 'e', 'yeo', 'ye', 'o', 'wa', 'wae', 'oe', 'yo', 'u', 'wo', 'we', 'wi', 'yu', 'eu', 'ui', 'i'
-REVISED_FINALS = '', 'g', 'kk', 'gs', 'n', 'nj', 'nh', 'd', 'l', 'lg', 'lm', 'lb', 'ls', 'lt', 'lp', 'lh', 'm', 'b', 'bs', 's', 'ss', 'ng', 'j', 'ch', 'k', 't', 'p', 'h'
-
-
-def academic_ambiguous_patterns():
-    import itertools
-    result = set()
-    for final, initial in itertools.product(REVISED_FINALS, REVISED_INITIALS):
-        check = False
-        combined = final + initial
-        for i in range(len(combined)):
-            head, tail = combined[:i], combined[i:]
-            if head in REVISED_FINALS and tail in REVISED_INITIALS:
-                if not check:
-                    check = True
-                else:
-                    result.add(combined)
-                    break
-    return result
-
-
-ACADEMIC_AMBIGUOUS_PATTERNS = academic_ambiguous_patterns()
-
-
-def academic(now, pre, **options):
-    """Rule for academic translition."""
-    c, s = now
-    if not s:
-        return c
-
-    ps = pre[1] if pre else None
-
-    marker = False
-    if ps:
-        if s.initial == 11:
-            marker = True
-        elif ps and (REVISED_FINALS[ps.final] + REVISED_INITIALS[s.initial]) in ACADEMIC_AMBIGUOUS_PATTERNS:
-            marker = True
-
-    r = u''
-    if marker:
-        r += '-'
-    r += REVISED_INITIALS[s.initial] + REVISED_VOWELS[s.vowel] + REVISED_FINALS[s.final]
-    return r
+# -*- coding: utf-8 -*-
+
+REVISED_INITIALS = 'g', 'kk', 'n', 'd', 'tt', 'l', 'm', 'b', 'pp', 's', 'ss', '', 'j', 'jj', 'ch', 'k', 't', 'p', 'h'
+REVISED_VOWELS = 'a', 'ae', 'ya', 'yae', 'eo', 'e', 'yeo', 'ye', 'o', 'wa', 'wae', 'oe', 'yo', 'u', 'wo', 'we', 'wi', 'yu', 'eu', 'ui', 'i'
+REVISED_FINALS = '', 'g', 'kk', 'gs', 'n', 'nj', 'nh', 'd', 'l', 'lg', 'lm', 'lb', 'ls', 'lt', 'lp', 'lh', 'm', 'b', 'bs', 's', 'ss', 'ng', 'j', 'ch', 'k', 't', 'p', 'h'
+
+
+def academic_ambiguous_patterns():
+    import itertools
+    result = set()
+    for final, initial in itertools.product(REVISED_FINALS, REVISED_INITIALS):
+        check = False
+        combined = final + initial
+        for i in range(len(combined)):
+            head, tail = combined[:i], combined[i:]
+            if head in REVISED_FINALS and tail in REVISED_INITIALS:
+                if not check:
+                    check = True
+                else:
+                    result.add(combined)
+                    break
+    return result
+
+
+ACADEMIC_AMBIGUOUS_PATTERNS = academic_ambiguous_patterns()
+
+
+def academic(now, pre, **options):
+    """Rule for academic translition."""
+    c, s = now
+    if not s:
+        return c
+
+    ps = pre[1] if pre else None
+
+    marker = False
+    if ps:
+        if s.initial == 11:
+            marker = True
+        elif ps and (REVISED_FINALS[ps.final] + REVISED_INITIALS[s.initial]) in ACADEMIC_AMBIGUOUS_PATTERNS:
+            marker = True
+
+    r = u''
+    if marker:
+        r += '-'
+    r += REVISED_INITIALS[s.initial] + REVISED_VOWELS[s.vowel] + REVISED_FINALS[s.final]
+    return r
--- a/extend/kanji_to_romaji/init.py
+++ b/extend/kanji_to_romaji/init.py
@ -1,5 +1,5 @@
-from .kanji_to_romaji_module import convert_hiragana_to_katakana, translate_to_romaji, translate_soukon, \
-    translate_long_vowel, translate_soukon_ch, kanji_to_romaji
-__all__ = ["load_mappings_dict", "convert_hiragana_to_katakana", "convert_katakana_to_hiragana",
-           "translate_to_romaji", "translate_soukon",
-           "translate_long_vowel", "translate_soukon_ch", "kanji_to_romaji"]
+from .kanji_to_romaji_module import convert_hiragana_to_katakana, translate_to_romaji, translate_soukon, \
+    translate_long_vowel, translate_soukon_ch, kanji_to_romaji
+__all__ = ["load_mappings_dict", "convert_hiragana_to_katakana", "convert_katakana_to_hiragana",
+           "translate_to_romaji", "translate_soukon",
+           "translate_long_vowel", "translate_soukon_ch", "kanji_to_romaji"]
--- a/extend/kanji_to_romaji/jp_mappings/conjugated_godan_kanji.json
+++ b/extend/kanji_to_romaji/jp_mappings/conjugated_godan_kanji.json
--- a/extend/kanji_to_romaji/jp_mappings/conjugated_ichidan_kanji.json
+++ b/extend/kanji_to_romaji/jp_mappings/conjugated_ichidan_kanji.json
--- a/extend/kanji_to_romaji/jp_mappings/conjugated_irr_kuru_kanji.json
+++ b/extend/kanji_to_romaji/jp_mappings/conjugated_irr_kuru_kanji.json
--- a/extend/kanji_to_romaji/jp_mappings/conjugated_irr_suru_kanji.json
+++ b/extend/kanji_to_romaji/jp_mappings/conjugated_irr_suru_kanji.json
--- a/extend/kanji_to_romaji/jp_mappings/full_and_half_width_latin.json
+++ b/extend/kanji_to_romaji/jp_mappings/full_and_half_width_latin.json
@ -1,154 +1,154 @@
-{
-  "！": "!",
-  "＂": "\"",
-  "＃": "#",
-  "＄": "$",
-  "％": "%",
-  "＆": "&",
-  "＇": "'",
-
-  "＊": "*",
-  "＋": "+",
-  "，": ",",
-  "－": "-",
-  "．": ".",
-  "／": "\/",
-  "０": "0",
-  "１": "1",
-  "２": "2",
-  "３": "3",
-  "４": "4",
-  "５": "5",
-  "６": "6",
-  "７": "7",
-  "８": "8",
-  "９": "9",
-  "：": ":",
-  "；": ";",
-  "＜": "<",
-  "＝": "=",
-  "＞": ">",
-  "？": "?",
-  "＠": "@",
-  "Ａ": "A",
-  "Ｂ": "B",
-  "Ｃ": "C",
-  "Ｄ": "D",
-  "Ｅ": "E",
-  "Ｆ": "F",
-  "Ｇ": "G",
-  "Ｈ": "H",
-  "Ｉ": "I",
-  "Ｊ": "J",
-  "Ｋ": "K",
-  "Ｌ": "L",
-  "Ｍ": "M",
-  "Ｎ": "N",
-  "И": "N",
-  "Ｏ": "O",
-  "Ｐ": "P",
-  "Ｑ": "Q",
-  "Ｒ": "R",
-  "Ｓ": "S",
-  "Ｔ": "T",
-  "Ｕ": "U",
-  "Ｖ": "V",
-  "Ｗ": "W",
-  "Ｘ": "X",
-  "Ｙ": "Y",
-  "Ｚ": "Z",
-  "［": "[",
-  "＼": "\\",
-  "］": "]",
-  "＾": "^",
-  "＿": "_",
-  "｀": "'",
-  "ａ": "a",
-  "ｂ": "b",
-  "ｃ": "c",
-  "ｄ": "d",
-  "ｅ": "e",
-  "ｆ": "f",
-  "ｇ": "g",
-  "ｈ": "h",
-  "ｉ": "i",
-  "ｊ": "j",
-  "ｋ": "k",
-  "ｌ": "l",
-  "ｍ": "m",
-  "ｎ": "n",
-  "ｏ": "o",
-  "ｐ": "p",
-  "ｑ": "q",
-  "ｒ": "r",
-  "ｓ": "s",
-  "ｔ": "t",
-  "ｕ": "u",
-  "ｖ": "v",
-  "ｗ": "w",
-  "ｘ": "x",
-  "ｙ": "y",
-  "ｚ": "z",
-  "｛": "{",
-  "｜": "|",
-  "｝": "}",
-  "～": "~",
-
-  "Ā": "A",
-  "Ă": "A",
-  "Ą": "A",
-  "â": "a",
-  "ā": "a",
-  "ă": "a",
-  "ą": "a",
-  "Ē": "E",
-  "Ĕ": "E",
-  "Ė": "E",
-  "Ę": "E",
-  "Ě": "E",
-  "ē": "e",
-  "ĕ": "e",
-  "ė": "e",
-  "ę": "e",
-  "ě": "e",
-  "Ī": "I",
-  "Ĭ": "I",
-  "Į": "I",
-  "ī": "i",
-  "ĭ": "i",
-  "į": "i",
-  "Ō": "O",
-  "Ŏ": "O",
-  "Ő": "O",
-  "ō": "o",
-  "ŏ": "o",
-  "ő": "o",
-  "Ũ": "U",
-  "Ū": "U",
-  "Ŭ": "U",
-  "Ů": "U",
-  "Ű": "U",
-  "Ų": "U",
-  "ũ": "u",
-  "ū": "u",
-  "ŭ": "u",
-  "ů": "u",
-  "ű": "u",
-  "ų": "u",
-
-  "Ӓ": "A",
-  "Ӑ": "A",
-  "Ѐ": "E",
-  "Ё": "E",
-  "Ӧ": "O",
-  "ӓ": "a",
-  "ӑ": "a",
-  "ѐ": "e",
-  "ё": "e",
-  "ӧ": "o",
-
-  "ω": "w",
-  "€": "E",
-  "∃": "E",
-  "ϛ": "c"
+{
+  "！": "!",
+  "＂": "\"",
+  "＃": "#",
+  "＄": "$",
+  "％": "%",
+  "＆": "&",
+  "＇": "'",
+
+  "＊": "*",
+  "＋": "+",
+  "，": ",",
+  "－": "-",
+  "．": ".",
+  "／": "\/",
+  "０": "0",
+  "１": "1",
+  "２": "2",
+  "３": "3",
+  "４": "4",
+  "５": "5",
+  "６": "6",
+  "７": "7",
+  "８": "8",
+  "９": "9",
+  "：": ":",
+  "；": ";",
+  "＜": "<",
+  "＝": "=",
+  "＞": ">",
+  "？": "?",
+  "＠": "@",
+  "Ａ": "A",
+  "Ｂ": "B",
+  "Ｃ": "C",
+  "Ｄ": "D",
+  "Ｅ": "E",
+  "Ｆ": "F",
+  "Ｇ": "G",
+  "Ｈ": "H",
+  "Ｉ": "I",
+  "Ｊ": "J",
+  "Ｋ": "K",
+  "Ｌ": "L",
+  "Ｍ": "M",
+  "Ｎ": "N",
+  "И": "N",
+  "Ｏ": "O",
+  "Ｐ": "P",
+  "Ｑ": "Q",
+  "Ｒ": "R",
+  "Ｓ": "S",
+  "Ｔ": "T",
+  "Ｕ": "U",
+  "Ｖ": "V",
+  "Ｗ": "W",
+  "Ｘ": "X",
+  "Ｙ": "Y",
+  "Ｚ": "Z",
+  "［": "[",
+  "＼": "\\",
+  "］": "]",
+  "＾": "^",
+  "＿": "_",
+  "｀": "'",
+  "ａ": "a",
+  "ｂ": "b",
+  "ｃ": "c",
+  "ｄ": "d",
+  "ｅ": "e",
+  "ｆ": "f",
+  "ｇ": "g",
+  "ｈ": "h",
+  "ｉ": "i",
+  "ｊ": "j",
+  "ｋ": "k",
+  "ｌ": "l",
+  "ｍ": "m",
+  "ｎ": "n",
+  "ｏ": "o",
+  "ｐ": "p",
+  "ｑ": "q",
+  "ｒ": "r",
+  "ｓ": "s",
+  "ｔ": "t",
+  "ｕ": "u",
+  "ｖ": "v",
+  "ｗ": "w",
+  "ｘ": "x",
+  "ｙ": "y",
+  "ｚ": "z",
+  "｛": "{",
+  "｜": "|",
+  "｝": "}",
+  "～": "~",
+
+  "Ā": "A",
+  "Ă": "A",
+  "Ą": "A",
+  "â": "a",
+  "ā": "a",
+  "ă": "a",
+  "ą": "a",
+  "Ē": "E",
+  "Ĕ": "E",
+  "Ė": "E",
+  "Ę": "E",
+  "Ě": "E",
+  "ē": "e",
+  "ĕ": "e",
+  "ė": "e",
+  "ę": "e",
+  "ě": "e",
+  "Ī": "I",
+  "Ĭ": "I",
+  "Į": "I",
+  "ī": "i",
+  "ĭ": "i",
+  "į": "i",
+  "Ō": "O",
+  "Ŏ": "O",
+  "Ő": "O",
+  "ō": "o",
+  "ŏ": "o",
+  "ő": "o",
+  "Ũ": "U",
+  "Ū": "U",
+  "Ŭ": "U",
+  "Ů": "U",
+  "Ű": "U",
+  "Ų": "U",
+  "ũ": "u",
+  "ū": "u",
+  "ŭ": "u",
+  "ů": "u",
+  "ű": "u",
+  "ų": "u",
+
+  "Ӓ": "A",
+  "Ӑ": "A",
+  "Ѐ": "E",
+  "Ё": "E",
+  "Ӧ": "O",
+  "ӓ": "a",
+  "ӑ": "a",
+  "ѐ": "e",
+  "ё": "e",
+  "ӧ": "o",
+
+  "ω": "w",
+  "€": "E",
+  "∃": "E",
+  "ϛ": "c"
 }
--- a/extend/kanji_to_romaji/jp_mappings/hiragana_romaji_mappings.json
+++ b/extend/kanji_to_romaji/jp_mappings/hiragana_romaji_mappings.json
@ -1,120 +1,120 @@
-{
-  "ぁ": "a",
-  "あ": "a",
-  "ぃ": "i",
-  "い": "i",
-  "ぅ": "u",
-  "う": "u",
-  "ぇ": "e",
-  "え": "e",
-  "ぉ": "o",
-  "お": "o",
-  "か": "ka",
-  "が": "ga",
-  "き": "ki",
-  "きゃ": "kya",
-  "きゅ": "kyu",
-  "きょ": "kyo",
-  "ぎ": "gi",
-  "ぎゃ": "gya",
-  "ぎゅ": "gyu",
-  "ぎょ": "gyo",
-  "く": "ku",
-  "ぐ": "gu",
-  "け": "ke",
-  "げ": "ge",
-  "こ": "ko",
-  "ご": "go",
-  "さ": "sa",
-  "ざ": "za",
-  "し": "shi",
-  "しゃ": "sha",
-  "しゅ": "shu",
-  "しょ": "sho",
-  "じ": "ji",
-  "じゃ": "ja",
-  "じゅ": "ju",
-  "じょ": "jo",
-  "す": "su",
-  "ず": "zu",
-  "せ": "se",
-  "ぜ": "ze",
-  "そ": "so",
-  "ぞ": "zo",
-  "た": "ta",
-  "だ": "da",
-  "ち": "chi",
-  "ちゃ": "cha",
-  "ちゅ": "chu",
-  "ちょ": "cho",
-  "ぢ": "ji",
-  "つ": "tsu",
-  "づ": "zu",
-  "て": "te",
-  "で": "de",
-  "と": "to",
-  "ど": "do",
-  "な": "na",
-  "に": "ni",
-  "にゃ": "nya",
-  "にゅ": "nyu",
-  "にょ": "nyo",
-  "ぬ": "nu",
-  "ね": "ne",
-  "の": "no",
-  "は": "ha",
-  "ば": "ba",
-  "ぱ": "pa",
-  "ひ": "hi",
-  "ひゃ": "hya",
-  "ひゅ": "hyu",
-  "ひょ": "hyo",
-  "び": "bi",
-  "びゃ": "bya",
-  "びゅ": "byu",
-  "びょ": "byo",
-  "ぴ": "pi",
-  "ぴゃ": "pya",
-  "ぴゅ": "pyu",
-  "ぴょ": "pyo",
-  "ふ": "fu",
-  "ぶ": "bu",
-  "ぷ": "pu",
-  "へ": "he",
-  "べ": "be",
-  "ぺ": "pe",
-  "ほ": "ho",
-  "ぼ": "bo",
-  "ぽ": "po",
-  "ま": "ma",
-  "み": "mi",
-  "みゃ": "mya",
-  "みゅ": "myu",
-  "みょ": "myo",
-  "む": "mu",
-  "め": "me",
-  "も": "mo",
-  "や": "ya",
-  "ゆ": "yu",
-  "よ": "yo",
-  "ら": "ra",
-  "り": "ri",
-  "りゃ": "rya",
-  "りゅ": "ryu",
-  "りょ": "ryo",
-  "る": "ru",
-  "れ": "re",
-  "ろ": "ro",
-  "ゎ": "wa",
-  "わ": "wa",
-  "ゐ": "wi",
-  "ゑ": "we",
-  "を": " wo ",
-  "ん": "n",
-  "ゔ": "vu",
-  "ゕ": "ka",
-  "ゖ": "ke",
-  "ゝ": "iteration_mark",
-  "ゞ": "voiced_iteration_mark",
-  "ゟ": "yori"
+{
+  "ぁ": "a",
+  "あ": "a",
+  "ぃ": "i",
+  "い": "i",
+  "ぅ": "u",
+  "う": "u",
+  "ぇ": "e",
+  "え": "e",
+  "ぉ": "o",
+  "お": "o",
+  "か": "ka",
+  "が": "ga",
+  "き": "ki",
+  "きゃ": "kya",
+  "きゅ": "kyu",
+  "きょ": "kyo",
+  "ぎ": "gi",
+  "ぎゃ": "gya",
+  "ぎゅ": "gyu",
+  "ぎょ": "gyo",
+  "く": "ku",
+  "ぐ": "gu",
+  "け": "ke",
+  "げ": "ge",
+  "こ": "ko",
+  "ご": "go",
+  "さ": "sa",
+  "ざ": "za",
+  "し": "shi",
+  "しゃ": "sha",
+  "しゅ": "shu",
+  "しょ": "sho",
+  "じ": "ji",
+  "じゃ": "ja",
+  "じゅ": "ju",
+  "じょ": "jo",
+  "す": "su",
+  "ず": "zu",
+  "せ": "se",
+  "ぜ": "ze",
+  "そ": "so",
+  "ぞ": "zo",
+  "た": "ta",
+  "だ": "da",
+  "ち": "chi",
+  "ちゃ": "cha",
+  "ちゅ": "chu",
+  "ちょ": "cho",
+  "ぢ": "ji",
+  "つ": "tsu",
+  "づ": "zu",
+  "て": "te",
+  "で": "de",
+  "と": "to",
+  "ど": "do",
+  "な": "na",
+  "に": "ni",
+  "にゃ": "nya",
+  "にゅ": "nyu",
+  "にょ": "nyo",
+  "ぬ": "nu",
+  "ね": "ne",
+  "の": "no",
+  "は": "ha",
+  "ば": "ba",
+  "ぱ": "pa",
+  "ひ": "hi",
+  "ひゃ": "hya",
+  "ひゅ": "hyu",
+  "ひょ": "hyo",
+  "び": "bi",
+  "びゃ": "bya",
+  "びゅ": "byu",
+  "びょ": "byo",
+  "ぴ": "pi",
+  "ぴゃ": "pya",
+  "ぴゅ": "pyu",
+  "ぴょ": "pyo",
+  "ふ": "fu",
+  "ぶ": "bu",
+  "ぷ": "pu",
+  "へ": "he",
+  "べ": "be",
+  "ぺ": "pe",
+  "ほ": "ho",
+  "ぼ": "bo",
+  "ぽ": "po",
+  "ま": "ma",
+  "み": "mi",
+  "みゃ": "mya",
+  "みゅ": "myu",
+  "みょ": "myo",
+  "む": "mu",
+  "め": "me",
+  "も": "mo",
+  "や": "ya",
+  "ゆ": "yu",
+  "よ": "yo",
+  "ら": "ra",
+  "り": "ri",
+  "りゃ": "rya",
+  "りゅ": "ryu",
+  "りょ": "ryo",
+  "る": "ru",
+  "れ": "re",
+  "ろ": "ro",
+  "ゎ": "wa",
+  "わ": "wa",
+  "ゐ": "wi",
+  "ゑ": "we",
+  "を": " wo ",
+  "ん": "n",
+  "ゔ": "vu",
+  "ゕ": "ka",
+  "ゖ": "ke",
+  "ゝ": "iteration_mark",
+  "ゞ": "voiced_iteration_mark",
+  "ゟ": "yori"
 }
--- a/extend/kanji_to_romaji/jp_mappings/jm_dict_autod_kanji.json
+++ b/extend/kanji_to_romaji/jp_mappings/jm_dict_autod_kanji.json
--- a/extend/kanji_to_romaji/jp_mappings/jm_dict_autod_kanji_updates.json
+++ b/extend/kanji_to_romaji/jp_mappings/jm_dict_autod_kanji_updates.json
@ -1,18 +1,18 @@
-{
-  "今日": {
-    "w_type": "noun",
-    "romaji": "kyou"
-  },
-  "明日": {
-    "w_type": "noun",
-    "romaji": "ashita"
-  },
-  "本": {
-    "w_type": "noun",
-    "romaji": "hon"
-  },
-  "中": {
-    "w_type": "noun",
-    "romaji": "naka"
-  }
+{
+  "今日": {
+    "w_type": "noun",
+    "romaji": "kyou"
+  },
+  "明日": {
+    "w_type": "noun",
+    "romaji": "ashita"
+  },
+  "本": {
+    "w_type": "noun",
+    "romaji": "hon"
+  },
+  "中": {
+    "w_type": "noun",
+    "romaji": "naka"
+  }
 }
--- a/extend/kanji_to_romaji/jp_mappings/kanji_names.json
+++ b/extend/kanji_to_romaji/jp_mappings/kanji_names.json
@ -1,78 +1,78 @@
-{
-  "朝日奈丸佳": {
-    "w_type": "noun",
-    "romaji": "Asahina Madoka"
-  },
-  "高海千歌": {
-    "w_type": "noun",
-    "romaji": "Takami Chika"
-  },
-  "鏡音レン": {
-    "w_type": "noun",
-    "romaji": "Kagamine Len"
-  },
-  "鏡音リン": {
-    "w_type": "noun",
-    "romaji": "Kagamine Rin"
-  },
-  "逢坂大河": {
-    "w_type": "noun",
-    "romaji": "Aisaka Taiga"
-  },
-  "水樹奈々": {
-    "w_type": "noun",
-    "romaji": "Mizuki Nana"
-  },
-  "桜内梨子": {
-    "w_type": "noun",
-    "romaji": "Sakurauchi Riko"
-  },
-  "山吹沙綾": {
-    "w_type": "noun",
-    "romaji": "Yamabuki Saaya"
-  },
-  "初音ミク": {
-    "w_type": "noun",
-    "romaji": "Hatsune Miku"
-  },
-  "渡辺曜": {
-    "w_type": "noun",
-    "romaji": "Watanabe You"
-  },
-  "原由実": {
-    "w_type": "noun",
-    "romaji": "Hara Yumi"
-  },
-  "北宇治": {
-    "w_type": "noun",
-    "romaji": "Kita Uji"
-  },
-  "六本木": {
-    "w_type": "noun",
-    "romaji": "Roppongi"
-  },
-  "久美子": {
-    "w_type": "noun",
-    "romaji": "Kumiko"
-  },
-  "政宗": {
-    "w_type": "noun",
-    "romaji": "Masamune"
-  },
-  "小林": {
-    "w_type": "noun",
-    "romaji": "Kobayashi"
-  },
-  "奥寺": {
-    "w_type": "noun",
-    "romaji": "Okudera"
-  },
-  "佐藤": {
-    "w_type": "noun",
-    "romaji": "Satou"
-  },
-  "玲子": {
-    "w_type": "noun",
-    "romaji": "Reiko"
-  }
+{
+  "朝日奈丸佳": {
+    "w_type": "noun",
+    "romaji": "Asahina Madoka"
+  },
+  "高海千歌": {
+    "w_type": "noun",
+    "romaji": "Takami Chika"
+  },
+  "鏡音レン": {
+    "w_type": "noun",
+    "romaji": "Kagamine Len"
+  },
+  "鏡音リン": {
+    "w_type": "noun",
+    "romaji": "Kagamine Rin"
+  },
+  "逢坂大河": {
+    "w_type": "noun",
+    "romaji": "Aisaka Taiga"
+  },
+  "水樹奈々": {
+    "w_type": "noun",
+    "romaji": "Mizuki Nana"
+  },
+  "桜内梨子": {
+    "w_type": "noun",
+    "romaji": "Sakurauchi Riko"
+  },
+  "山吹沙綾": {
+    "w_type": "noun",
+    "romaji": "Yamabuki Saaya"
+  },
+  "初音ミク": {
+    "w_type": "noun",
+    "romaji": "Hatsune Miku"
+  },
+  "渡辺曜": {
+    "w_type": "noun",
+    "romaji": "Watanabe You"
+  },
+  "原由実": {
+    "w_type": "noun",
+    "romaji": "Hara Yumi"
+  },
+  "北宇治": {
+    "w_type": "noun",
+    "romaji": "Kita Uji"
+  },
+  "六本木": {
+    "w_type": "noun",
+    "romaji": "Roppongi"
+  },
+  "久美子": {
+    "w_type": "noun",
+    "romaji": "Kumiko"
+  },
+  "政宗": {
+    "w_type": "noun",
+    "romaji": "Masamune"
+  },
+  "小林": {
+    "w_type": "noun",
+    "romaji": "Kobayashi"
+  },
+  "奥寺": {
+    "w_type": "noun",
+    "romaji": "Okudera"
+  },
+  "佐藤": {
+    "w_type": "noun",
+    "romaji": "Satou"
+  },
+  "玲子": {
+    "w_type": "noun",
+    "romaji": "Reiko"
+  }
 }
--- a/extend/kanji_to_romaji/jp_mappings/katakana_romaji_mappings.json
+++ b/extend/kanji_to_romaji/jp_mappings/katakana_romaji_mappings.json
@ -1,159 +1,159 @@
-{
-  "ァ": "a",
-  "ア": "a",
-  "ィ": "i",
-  "イ": "i",
-  "イィ": "yi",
-  "イェ": "ye",
-  "ゥ": "u",
-  "ウ": "u",
-  "ウィ": "wi",
-  "ウェ": "we",
-  "ウォ": "wo",
-  "ェ": "e",
-  "エ": "e",
-  "ォ": "o",
-  "オ": "o",
-  "カ": "ka",
-  "ガ": "ga",
-  "キ": "ki",
-  "キェ": "kye",
-  "キャ": "kya",
-  "キュ": "kyu",
-  "キョ": "kyo",
-  "ギ": "gi",
-  "ギェ": "gye",
-  "ギャ": "gya",
-  "ギュ": "gyu",
-  "ギョ": "gyo",
-  "ク": "ku",
-  "クァ": "kwa",
-  "クィ": "kwi",
-  "クェ": "kwe",
-  "クォ": "kwo",
-  "グ": "gu",
-  "グァ": "gwa",
-  "グィ": "gwi",
-  "グェ": "gwe",
-  "グォ": "gwo",
-  "ケ": "ke",
-  "ゲ": "ge",
-  "コ": "ko",
-  "ゴ": "go",
-  "サ": "sa",
-  "ザ": "za",
-  "シ": "shi",
-  "シェ": "she",
-  "シャ": "sha",
-  "シュ": "shu",
-  "ショ": "sho",
-  "ジ": "ji",
-  "ジェ": "je",
-  "ジャ": "ja",
-  "ジュ": "ju",
-  "ジョ": "jo",
-  "ス": "su",
-  "スィ": "si",
-  "ズ": "zu",
-  "ズィ": "zi",
-  "セ": "se",
-  "ゼ": "ze",
-  "ソ": "so",
-  "ゾ": "zo",
-  "タ": "ta",
-  "ダ": "da",
-  "チ": "chi",
-  "チェ": "che",
-  "チャ": "cha",
-  "チュ": "chu",
-  "チョ": "cho",
-  "ヂ": "ji",
-  "ツ": "tsu",
-  "ツァ": "tsa",
-  "ツィ": "tsi",
-  "ツェ": "tse",
-  "ツォ": "tso",
-  "ヅ": "zu",
-  "テ": "te",
-  "ティ": "ti",
-  "デ": "de",
-  "ディ": "di",
-  "ト": "to",
-  "トゥ": "tu",
-  "ド": "do",
-  "ドゥ": "du",
-  "ナ": "na",
-  "ニ": "ni",
-  "ニャ": "nya",
-  "ニュ": "nyu",
-  "ニョ": "nyo",
-  "ヌ": "nu",
-  "ネ": "ne",
-  "ノ": "no",
-  "ハ": "ha",
-  "バ": "ba",
-  "パ": "pa",
-  "ヒ": "hi",
-  "ヒャ": "hya",
-  "ヒュ": "hyu",
-  "ヒョ": "hyo",
-  "ビ": "bi",
-  "ビャ": "bya",
-  "ビュ": "byu",
-  "ビョ": "byo",
-  "ピ": "pi",
-  "ピャ": "pya",
-  "ピュ": "pyu",
-  "ピョ": "pyo",
-  "フ": "fu",
-  "ファ": "fa",
-  "フィ": "fi",
-  "フェ": "fe",
-  "フォ": "fo",
-  "ブ": "bu",
-  "プ": "pu",
-  "ヘ": "he",
-  "ベ": "be",
-  "ペ": "pe",
-  "ホ": "ho",
-  "ホゥ": "hu",
-  "ボ": "bo",
-  "ポ": "po",
-  "マ": "ma",
-  "ミ": "mi",
-  "ミャ": "mya",
-  "ミュ": "myu",
-  "ミョ": "myo",
-  "ム": "mu",
-  "メ": "me",
-  "モ": "mo",
-  "ヤ": "ya",
-  "ユ": "yu",
-  "ヨ": "yo",
-  "ラ": "ra",
-  "リ": "ri",
-  "リャ": "rya",
-  "リュ": "ryu",
-  "リョ": "ryo",
-  "ル": "ru",
-  "レ": "re",
-  "ロ": "ro",
-  "ヮ": "wa",
-  "ワ": "wa",
-  "ヰ": "wi",
-  "ヱ": "we",
-  "ヲ": "wo",
-  "ン": "n",
-  "ヴ": "vu",
-  "ヴァ": "va",
-  "ヴィ": "vi",
-  "ヴェ": "ve",
-  "ヴォ": "vo",
-  "ヵ": "ka",
-  "ヶ": "ke",
-  "ヺ": "vo",
-  "・": " ",
-  "ヽ": "iteration_mark",
-  "ヾ": "voiced_iteration_mark",
-  "ヿ": "koto"
+{
+  "ァ": "a",
+  "ア": "a",
+  "ィ": "i",
+  "イ": "i",
+  "イィ": "yi",
+  "イェ": "ye",
+  "ゥ": "u",
+  "ウ": "u",
+  "ウィ": "wi",
+  "ウェ": "we",
+  "ウォ": "wo",
+  "ェ": "e",
+  "エ": "e",
+  "ォ": "o",
+  "オ": "o",
+  "カ": "ka",
+  "ガ": "ga",
+  "キ": "ki",
+  "キェ": "kye",
+  "キャ": "kya",
+  "キュ": "kyu",
+  "キョ": "kyo",
+  "ギ": "gi",
+  "ギェ": "gye",
+  "ギャ": "gya",
+  "ギュ": "gyu",
+  "ギョ": "gyo",
+  "ク": "ku",
+  "クァ": "kwa",
+  "クィ": "kwi",
+  "クェ": "kwe",
+  "クォ": "kwo",
+  "グ": "gu",
+  "グァ": "gwa",
+  "グィ": "gwi",
+  "グェ": "gwe",
+  "グォ": "gwo",
+  "ケ": "ke",
+  "ゲ": "ge",
+  "コ": "ko",
+  "ゴ": "go",
+  "サ": "sa",
+  "ザ": "za",
+  "シ": "shi",
+  "シェ": "she",
+  "シャ": "sha",
+  "シュ": "shu",
+  "ショ": "sho",
+  "ジ": "ji",
+  "ジェ": "je",
+  "ジャ": "ja",
+  "ジュ": "ju",
+  "ジョ": "jo",
+  "ス": "su",
+  "スィ": "si",
+  "ズ": "zu",
+  "ズィ": "zi",
+  "セ": "se",
+  "ゼ": "ze",
+  "ソ": "so",
+  "ゾ": "zo",
+  "タ": "ta",
+  "ダ": "da",
+  "チ": "chi",
+  "チェ": "che",
+  "チャ": "cha",
+  "チュ": "chu",
+  "チョ": "cho",
+  "ヂ": "ji",
+  "ツ": "tsu",
+  "ツァ": "tsa",
+  "ツィ": "tsi",
+  "ツェ": "tse",
+  "ツォ": "tso",
+  "ヅ": "zu",
+  "テ": "te",
+  "ティ": "ti",
+  "デ": "de",
+  "ディ": "di",
+  "ト": "to",
+  "トゥ": "tu",
+  "ド": "do",
+  "ドゥ": "du",
+  "ナ": "na",
+  "ニ": "ni",
+  "ニャ": "nya",
+  "ニュ": "nyu",
+  "ニョ": "nyo",
+  "ヌ": "nu",
+  "ネ": "ne",
+  "ノ": "no",
+  "ハ": "ha",
+  "バ": "ba",
+  "パ": "pa",
+  "ヒ": "hi",
+  "ヒャ": "hya",
+  "ヒュ": "hyu",
+  "ヒョ": "hyo",
+  "ビ": "bi",
+  "ビャ": "bya",
+  "ビュ": "byu",
+  "ビョ": "byo",
+  "ピ": "pi",
+  "ピャ": "pya",
+  "ピュ": "pyu",
+  "ピョ": "pyo",
+  "フ": "fu",
+  "ファ": "fa",
+  "フィ": "fi",
+  "フェ": "fe",
+  "フォ": "fo",
+  "ブ": "bu",
+  "プ": "pu",
+  "ヘ": "he",
+  "ベ": "be",
+  "ペ": "pe",
+  "ホ": "ho",
+  "ホゥ": "hu",
+  "ボ": "bo",
+  "ポ": "po",
+  "マ": "ma",
+  "ミ": "mi",
+  "ミャ": "mya",
+  "ミュ": "myu",
+  "ミョ": "myo",
+  "ム": "mu",
+  "メ": "me",
+  "モ": "mo",
+  "ヤ": "ya",
+  "ユ": "yu",
+  "ヨ": "yo",
+  "ラ": "ra",
+  "リ": "ri",
+  "リャ": "rya",
+  "リュ": "ryu",
+  "リョ": "ryo",
+  "ル": "ru",
+  "レ": "re",
+  "ロ": "ro",
+  "ヮ": "wa",
+  "ワ": "wa",
+  "ヰ": "wi",
+  "ヱ": "we",
+  "ヲ": "wo",
+  "ン": "n",
+  "ヴ": "vu",
+  "ヴァ": "va",
+  "ヴィ": "vi",
+  "ヴェ": "ve",
+  "ヴォ": "vo",
+  "ヵ": "ka",
+  "ヶ": "ke",
+  "ヺ": "vo",
+  "・": " ",
+  "ヽ": "iteration_mark",
+  "ヾ": "voiced_iteration_mark",
+  "ヿ": "koto"
 }
--- a/extend/kanji_to_romaji/jp_mappings/other_kanji_mappings.json
+++ b/extend/kanji_to_romaji/jp_mappings/other_kanji_mappings.json
--- a/extend/kanji_to_romaji/jp_mappings/typographic_misc.json
+++ b/extend/kanji_to_romaji/jp_mappings/typographic_misc.json
@ -1,103 +1,103 @@
-{
-  "\u200b": "",
-
-  "「": "[",
-  "」": "]",
-
-  "『": "[",
-  "』": "]",
-
-  "（": "(",
-  "）": ")",
-
-  "［": "[",
-  "］": "]",
-
-  "｛": "{",
-  "｝": "}",
-
-  "〈": "(",
-  "〉": ")",
-
-  "【": "[",
-  "】": "]",
-
-  "〔": "[",
-  "〕": "]",
-
-  "〖": "[",
-  "〗": "]",
-
-  "〘": "[",
-  "〙": "]",
-
-  "〚": "[",
-  "〛": "]",
-
-  "゠": "--",
-  "〓": "-",
-  "＝": "=",
-
-  "〜": "~",
-  "…": "_",
-
-  "※": "",
-
-  "♪": "",
-  "♫": "",
-  "♬": "",
-  "♩": "",
-
-  "！": "!",
-  "？": "?",
-
-  "、": ",",
-  "♥": " ",
-  "«": "(",
-  "»": ")",
-  "≪": "(",
-  "≫": ")",
-  "∕": "-",
-  "”": "",
-  "“": "",
-
-  "゙": "",
-  "’": "'",
-  "": "",
-  "→": "",
-  "⇒": "",
-  "∞": " ",
-  "☆": " ",
-  "♠": " ",
-  "ᷨ": " ",
-  "ꯑ": " ",
-  "ᤙ": " ",
-  "": " ",
-  "△": "" ,
-  "★": " ",
-  "♡": " ",
-  "。": "",
-  "゚": "",
-  "(": "(",
-  ")": ")",
-  "∀": "a",
-  "ά": "a",
-  "ɪ": "I",
-  "˥": "l",
-  "ﾟ": "",
-  "—": "-",
-  "Я": "",
-  "Ψ": "",
-  "┐": "",
-  "ə": "",
-  "ˈ": "",
-  "×": " x ",
-  "†": "",
-  "≡": " ",
-  "⁄": "",
-  "–": "-",
-  "⇔": " ",
-  "≒": " ",
-  "〆": "shime",
-  "\u3000": " "
+{
+  "\u200b": "",
+
+  "「": "[",
+  "」": "]",
+
+  "『": "[",
+  "』": "]",
+
+  "（": "(",
+  "）": ")",
+
+  "［": "[",
+  "］": "]",
+
+  "｛": "{",
+  "｝": "}",
+
+  "〈": "(",
+  "〉": ")",
+
+  "【": "[",
+  "】": "]",
+
+  "〔": "[",
+  "〕": "]",
+
+  "〖": "[",
+  "〗": "]",
+
+  "〘": "[",
+  "〙": "]",
+
+  "〚": "[",
+  "〛": "]",
+
+  "゠": "--",
+  "〓": "-",
+  "＝": "=",
+
+  "〜": "~",
+  "…": "_",
+
+  "※": "",
+
+  "♪": "",
+  "♫": "",
+  "♬": "",
+  "♩": "",
+
+  "！": "!",
+  "？": "?",
+
+  "、": ",",
+  "♥": " ",
+  "«": "(",
+  "»": ")",
+  "≪": "(",
+  "≫": ")",
+  "∕": "-",
+  "”": "",
+  "“": "",
+
+  "゙": "",
+  "’": "'",
+  "": "",
+  "→": "",
+  "⇒": "",
+  "∞": " ",
+  "☆": " ",
+  "♠": " ",
+  "ᷨ": " ",
+  "ꯑ": " ",
+  "ᤙ": " ",
+  "": " ",
+  "△": "" ,
+  "★": " ",
+  "♡": " ",
+  "。": "",
+  "゚": "",
+  "(": "(",
+  ")": ")",
+  "∀": "a",
+  "ά": "a",
+  "ɪ": "I",
+  "˥": "l",
+  "ﾟ": "",
+  "—": "-",
+  "Я": "",
+  "Ψ": "",
+  "┐": "",
+  "ə": "",
+  "ˈ": "",
+  "×": " x ",
+  "†": "",
+  "≡": " ",
+  "⁄": "",
+  "–": "-",
+  "⇔": " ",
+  "≒": " ",
+  "〆": "shime",
+  "\u3000": " "
 }
--- a/extend/kanji_to_romaji/kanji_to_romaji_module.py
+++ b/extend/kanji_to_romaji/kanji_to_romaji_module.py
--- a/extend/kanji_to_romaji/models/KanjiBlock.py
+++ b/extend/kanji_to_romaji/models/KanjiBlock.py
@ -1,29 +1,29 @@
-class KanjiBlock(str):
-    def __new__(cls, *args, **kwargs):
-        obj = str.__new__(cls, "@")
-        kanji = args[0]
-        kanji_dict = args[1]
-
-        obj.kanji = kanji
-        if len(kanji) == 1:
-            obj.romaji = " " + kanji_dict["romaji"]
-        else:
-            if "verb stem" in kanji_dict["w_type"]:
-                obj.romaji = " " + kanji_dict["romaji"]
-            else:
-                obj.romaji = " " + kanji_dict["romaji"] + " "
-
-        if "other_readings" in kanji_dict:
-            obj.w_type = [kanji_dict["w_type"]]
-            obj.w_type.extend(
-                [k for k in kanji_dict["other_readings"].keys()]
-            )
-        else:
-            obj.w_type = kanji_dict["w_type"]
-        return obj
-
-    def __repr__(self):
-        return self.kanji.encode("unicode_escape")
-
-    def __str__(self):
-        return self.romaji.encode("utf-8")
+class KanjiBlock(str):
+    def __new__(cls, *args, **kwargs):
+        obj = str.__new__(cls, "@")
+        kanji = args[0]
+        kanji_dict = args[1]
+
+        obj.kanji = kanji
+        if len(kanji) == 1:
+            obj.romaji = " " + kanji_dict["romaji"]
+        else:
+            if "verb stem" in kanji_dict["w_type"]:
+                obj.romaji = " " + kanji_dict["romaji"]
+            else:
+                obj.romaji = " " + kanji_dict["romaji"] + " "
+
+        if "other_readings" in kanji_dict:
+            obj.w_type = [kanji_dict["w_type"]]
+            obj.w_type.extend(
+                [k for k in kanji_dict["other_readings"].keys()]
+            )
+        else:
+            obj.w_type = kanji_dict["w_type"]
+        return obj
+
+    def __repr__(self):
+        return self.kanji.encode("unicode_escape")
+
+    def __str__(self):
+        return self.romaji.encode("utf-8")
--- a/extend/kanji_to_romaji/models/Particle.py
+++ b/extend/kanji_to_romaji/models/Particle.py
@ -1,6 +1,6 @@
-class Particle(str):
-    def __new__(cls, *args, **kwargs):
-        particle_str = args[0]
-        obj = str.__new__(cls, " " + particle_str + " ")
-        obj.pname = particle_str
-        return obj
+class Particle(str):
+    def __new__(cls, *args, **kwargs):
+        particle_str = args[0]
+        obj = str.__new__(cls, " " + particle_str + " ")
+        obj.pname = particle_str
+        return obj
--- a/extend/kanji_to_romaji/models/UnicodeRomajiMapping.py
+++ b/extend/kanji_to_romaji/models/UnicodeRomajiMapping.py
@ -1,4 +1,4 @@
-# noinspection PyClassHasNoInit
-class UnicodeRomajiMapping:  # caching
-    kana_mapping = {}
-    kanji_mapping = {}
+# noinspection PyClassHasNoInit
+class UnicodeRomajiMapping:  # caching
+    kana_mapping = {}
+    kanji_mapping = {}
--- a/extend/kanji_to_romaji/models/init.py
+++ b/extend/kanji_to_romaji/models/init.py
@ -1,5 +1,5 @@
-from .UnicodeRomajiMapping import UnicodeRomajiMapping
-from .KanjiBlock import KanjiBlock
-from .Particle import Particle
-
-__all__ = ["UnicodeRomajiMapping", "KanjiBlock", "Particle"]
+from .UnicodeRomajiMapping import UnicodeRomajiMapping
+from .KanjiBlock import KanjiBlock
+from .Particle import Particle
+
+__all__ = ["UnicodeRomajiMapping", "KanjiBlock", "Particle"]
--- a/local.py
+++ b/local.py
@ -0,0 +1,5 @@
+import asyncio
+from noawait import NoAwaitPool
+
+loop = asyncio.new_event_loop()
+noawait = NoAwaitPool(loop)
--- a/server.py
+++ b/server.py
@ -1,51 +1,65 @@
-import asyncio
-from typing import TypedDict
-from aiohttp import web
-import asyncpg
-import config
-import api.route
-import utils.web
-from service.database import DatabaseService
-from service.mediawiki_api import MediaWikiApi
-from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker
-
-from service.tiktoken import TikTokenService
-
-async def index(request: web.Request):
-    return utils.web.api_response(1, data={"message": "Isekai toolkit API"}, request=request)
-
-async def init_mw_api(app: web.Application):
-    mw_api = MediaWikiApi.create()
-    if config.MW_BOT_LOGIN_USERNAME and config.MW_BOT_LOGIN_PASSWORD:
-        await mw_api.robot_login(config.MW_BOT_LOGIN_USERNAME, config.MW_BOT_LOGIN_PASSWORD)
-
-    site_meta = await mw_api.get_site_meta()
-
-    print("Connected to Wiki %s, Robot username: %s" % (site_meta["sitename"], site_meta["user"]))
-
-async def init_database(app: web.Application):
-    dbs = await DatabaseService.create(app)
-    print("Database connected.")
-
-async def init_tiktoken(app: web.Application):
-    await TikTokenService.create()
-    print("Tiktoken model loaded.")
-
-if __name__ == '__main__':
-    loop = asyncio.get_event_loop()
-    
-    app = web.Application()
-
-    if config.DATABASE:
-        app.on_startup.append(init_database)
-
-    if config.MW_API:
-        app.on_startup.append(init_mw_api)
-
-    if config.OPENAI_TOKEN:
-        app.on_startup.append(init_tiktoken)
-    
-    app.router.add_route('*', '/', index)
-    api.route.init(app)
-    web.run_app(app, host='0.0.0.0', port=config.PORT, loop=loop)
-    
+from local import loop, noawait
+
+from aiohttp import web
+import config
+import api.route
+import utils.web
+from service.database import DatabaseService
+from service.mediawiki_api import MediaWikiApi
+
+# Auto create Table
+from api.model.base import BaseModel
+from api.model.toolkit_ui.conversation import ConversationModel as _
+from api.model.chat_complete.conversation import ConversationChunkModel as _
+from api.model.embedding_search.title_collection import TitleCollectionModel as _
+from api.model.embedding_search.title_index import TitleIndexModel as _
+
+from service.tiktoken import TikTokenService
+
+async def index(request: web.Request):
+    return utils.web.api_response(1, data={"message": "Isekai toolkit API"}, request=request)
+
+async def init_mw_api(app: web.Application):
+    mw_api = MediaWikiApi.create()
+    if config.MW_BOT_LOGIN_USERNAME and config.MW_BOT_LOGIN_PASSWORD:
+        try:
+            await mw_api.robot_login(config.MW_BOT_LOGIN_USERNAME, config.MW_BOT_LOGIN_PASSWORD)
+        except Exception as e:
+            print("Cannot login to Robot account, please check config.")
+
+    site_meta = await mw_api.get_site_meta()
+
+    print("Connected to Wiki %s, Robot username: %s" % (site_meta["sitename"], site_meta["user"]))
+
+async def init_database(app: web.Application):
+    dbs = await DatabaseService.create(app)
+    print("Database connected.")
+    
+    async with dbs.engine.begin() as conn:
+        await conn.run_sync(BaseModel.metadata.create_all)
+
+async def init_tiktoken(app: web.Application):
+    await TikTokenService.create()
+    print("Tiktoken model loaded.")
+
+async def stop_noawait_pool(app: web.Application):
+    await noawait.end()
+
+if __name__ == '__main__':
+    app = web.Application()
+
+    if config.DATABASE:
+        app.on_startup.append(init_database)
+
+    if config.MW_API:
+        app.on_startup.append(init_mw_api)
+
+    if config.OPENAI_TOKEN:
+        app.on_startup.append(init_tiktoken)
+    
+    app.on_shutdown.append(stop_noawait_pool)
+    
+    app.router.add_route('*', '/', index)
+    api.route.init(app)
+
+    web.run_app(app, host='0.0.0.0', port=config.PORT, loop=loop)
--- a/noawait.py
+++ b/noawait.py
@ -0,0 +1,72 @@
+from __future__ import annotations
+from asyncio import AbstractEventLoop, Task
+import asyncio
+from functools import wraps
+
+import sys
+import traceback
+from typing import Callable, Coroutine
+
+class NoAwaitPool:
+    def __init__(self, loop: AbstractEventLoop):
+        self.task_list: list[Task] = []
+        self.loop = loop
+        self.running = True
+
+        self.on_error: list[Callable] = []
+
+        self.gc_task = loop.create_task(self._run_gc())
+
+    async def end(self):
+        print("Stopping NoAwait Tasks...")
+        self.running = False
+        for task in self.task_list:
+            await self._finish_task(task)
+        
+        await self.gc_task
+    
+    def add_task(self, coroutine: Coroutine):
+        task = self.loop.create_task(coroutine)
+        self.task_list.append(task)
+
+    def wrap(self, f):
+        @wraps(f)
+        def decorated_function(*args, **kwargs):
+            coroutine = f(*args, **kwargs)
+            self.add_task(coroutine)
+        
+        return decorated_function
+
+    async def _finish_task(self, task: Task):
+        try:
+            if not task.done():
+                task.cancel()
+            await task
+        except Exception as e:
+            handled = False
+            for handler in self.on_error:
+                try:
+                    handler_ret = handler(e)
+                    if handler_ret is Coroutine:
+                        await handler_ret
+                    handled = True
+                except Exception as handler_err:
+                    print("Exception on error handler: " + str(handler_err), file=sys.stderr)
+                    traceback.print_exc()
+
+            if not handled:
+                print(e, file=sys.stderr)
+                traceback.print_exc()
+                
+
+    async def _run_gc(self):
+        while self.running:
+            should_remove = []
+            for task in self.task_list:
+                if task.done():
+                    await self._finish_task(task)
+                    should_remove.append(task)
+            for task in should_remove:
+                self.task_list.remove(task)
+
+            await asyncio.sleep(0.1)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,16 +1,17 @@
-aiohttp==3.8.4
-jieba==0.42.1
-pypinyin==0.37.0
-simplejson==3.17.0
-beautifulsoup4==4.11.2
-markdownify==0.11.6
-asyncpg==0.27.0
-aiofiles==23.1.0
-pgvector==0.1.6
-websockets==11.0
-PyJWT==2.6.0
-asyncpg-stubs==0.27.0
-sqlalchemy==2.0.9
-aiohttp-sse-client2==0.3.0
-OpenCC==1.1.6
-event-emitter-asyncio==1.0.4
+aiohttp==3.8.4
+jieba==0.42.1
+pypinyin==0.37.0
+simplejson==3.17.0
+beautifulsoup4==4.11.2
+markdownify==0.11.6
+asyncpg==0.27.0
+aiofiles==23.1.0
+pgvector==0.1.6
+websockets==11.0
+PyJWT==2.6.0
+asyncpg-stubs==0.27.0
+sqlalchemy==2.0.9
+aiohttp-sse-client2==0.3.0
+OpenCC==1.1.6
+event-emitter-asyncio==1.0.4
+tiktoken-async==0.3.2
--- a/service/chat_complete.py
+++ b/service/chat_complete.py
@ -19,6 +19,11 @@ from service.openai_api import OpenAIApi
 from service.tiktoken import TikTokenService


+class ChatCompleteServicePrepareResponse(TypedDict):
+    extract_doc: list
+    question_tokens: int
+
+
 class ChatCompleteServiceResponse(TypedDict):
    message: str
    message_tokens: int
@ -44,9 +49,18 @@ class ChatCompleteService:

        self.tiktoken: TikTokenService = None

+        self.extract_doc: list = None
+
        self.mwapi = MediaWikiApi.create()
        self.openai_api = OpenAIApi.create()

+        self.user_id = 0
+        self.question = ""
+        self.question_tokens: Optional[int] = None
+        self.conversation_id: Optional[int] = None
+
+        self.delta_data = {}
+
    async def __aenter__(self):
        self.tiktoken = await TikTokenService.create()

@ -67,26 +81,55 @@ class ChatCompleteService:
    async def get_question_tokens(self, question: str):
        return await self.tiktoken.get_tokens(question)

-    async def chat_complete(self, question: str, on_message: Optional[callable] = None, on_extracted_doc: Optional[callable] = None,
-                            conversation_id: Optional[str] = None, user_id: Optional[int] = None, question_tokens: Optional[int] = None,
-                            embedding_search: Optional[EmbeddingSearchArgs] = None) -> ChatCompleteServiceResponse:
+    async def prepare_chat_complete(self, question: str, conversation_id: Optional[str] = None, user_id: Optional[int] = None,
+                                    question_tokens: Optional[int] = None, 
+                                    embedding_search: Optional[EmbeddingSearchArgs] = None) -> ChatCompleteServiceResponse:
        if user_id is not None:
            user_id = int(user_id)

+        self.user_id = user_id
+        self.question = question
+
        self.conversation_info = None
        if conversation_id is not None:
-            conversation_id = int(conversation_id)
-            self.conversation_info = await self.conversation_helper.get_conversation(conversation_id)
+            self.conversation_id = int(conversation_id)
+            self.conversation_info = await self.conversation_helper.find_by_id(self.conversation_id)
+        else:
+            self.conversation_id = None
+        
+        if self.conversation_info is not None:
+            if self.conversation_info.user_id != user_id:
+                raise web.HTTPUnauthorized()
+
+        if question_tokens is None:
+            self.question_tokens = await self.get_question_tokens(question)
+        else:
+            self.question_tokens = question_tokens
+
+        if (len(question) * 4 > config.CHATCOMPLETE_MAX_INPUT_TOKENS and
+                self.question_tokens > config.CHATCOMPLETE_MAX_INPUT_TOKENS):
+            # If the question is too long, we need to truncate it
+            raise web.HTTPRequestEntityTooLarge()
+        
+        # Extract document from wiki page index
+        self.extract_doc = None
+        if embedding_search is not None:
+            self.extract_doc, token_usage = await self.embedding_search.search(question, **embedding_search)
+            if self.extract_doc is not None:
+                self.question_tokens += token_usage

+        return ChatCompleteServicePrepareResponse(
+            extract_doc=self.extract_doc,
+            question_tokens=self.question_tokens
+        )
+
+    async def finish_chat_complete(self, on_message: Optional[callable] = None) -> ChatCompleteServiceResponse:
        delta_data = {}

        self.conversation_chunk = None
        message_log = []
        if self.conversation_info is not None:
-            if self.conversation_info.user_id != user_id:
-                raise web.HTTPUnauthorized()
-
-            self.conversation_chunk = await self.conversation_chunk_helper.get_newest_chunk(conversation_id)
+            self.conversation_chunk = await self.conversation_chunk_helper.get_newest_chunk(self.conversation_id)

            # If the conversation is too long, we need to make a summary
            if self.conversation_chunk.tokens > config.CHATCOMPLETE_MAX_MEMORY_TOKENS:
@ -95,9 +138,9 @@ class ChatCompleteService:
                    {"role": "summary", "content": summary, "tokens": tokens}
                ]

-                self.conversation_chunk = await self.conversation_chunk_helper.add(conversation_id, new_message_log, tokens)
+                self.conversation_chunk = await self.conversation_chunk_helper.add(self.conversation_id, new_message_log, tokens)

-                delta_data["conversation_chunk_id"] = self.conversation_chunk.id
+                self.delta_data["conversation_chunk_id"] = self.conversation_chunk.id

            message_log = []
            for message in self.conversation_chunk.message_data:
@ -106,40 +149,26 @@ class ChatCompleteService:
                    "content": message["content"],
                })

-        if question_tokens is None:
-            question_tokens = await self.get_question_tokens(question)
-        if (len(question) * 4 > config.CHATCOMPLETE_MAX_INPUT_TOKENS and
-                question_tokens > config.CHATCOMPLETE_MAX_INPUT_TOKENS):
-            # If the question is too long, we need to truncate it
-            raise web.HTTPRequestEntityTooLarge()
-
-        extract_doc = None
-        if embedding_search is not None:
-            extract_doc, token_usage = await self.embedding_search.search(question, **embedding_search)
-            if extract_doc is not None:
-                if on_extracted_doc is not None:
-                    await on_extracted_doc(extract_doc)
-
-                question_tokens = token_usage
-                doc_prompt_content = "\n".join(["%d. %s" % (
-                    i + 1, doc["markdown"] or doc["text"]) for i, doc in enumerate(extract_doc)])
+        if self.extract_doc is not None:
+            doc_prompt_content = "\n".join(["%d. %s" % (
+                i + 1, doc["markdown"] or doc["text"]) for i, doc in enumerate(self.extract_doc)])

-                doc_prompt = utils.config.get_prompt("extracted_doc", "prompt", {
-                                                     "content": doc_prompt_content})
-                message_log.append({"role": "user", "content": doc_prompt})
+            doc_prompt = utils.config.get_prompt("extracted_doc", "prompt", {
+                                                    "content": doc_prompt_content})
+            message_log.append({"role": "user", "content": doc_prompt})

        system_prompt = utils.config.get_prompt("chat", "system_prompt")

        # Start chat complete
        if on_message is not None:
-            response = await self.openai_api.chat_complete_stream(question, system_prompt, message_log, on_message)
+            response = await self.openai_api.chat_complete_stream(self.question, system_prompt, message_log, on_message)
        else:
-            response = await self.openai_api.chat_complete(question, system_prompt, message_log)
+            response = await self.openai_api.chat_complete(self.question, system_prompt, message_log)

        if self.conversation_info is None:
            # Create a new conversation
            message_log_list = [
-                {"role": "user", "content": question, "tokens": question_tokens},
+                {"role": "user", "content": self.question, "tokens": self.question_tokens},
                {"role": "assistant",
                    "content": response["message"], "tokens": response["message_tokens"]},
            ]
@ -152,21 +181,21 @@ class ChatCompleteService:
                print(str(e), file=sys.stderr)
                traceback.print_exc(file=sys.stderr)

-            total_token_usage = question_tokens + response["message_tokens"]
+            total_token_usage = self.question_tokens + response["message_tokens"]

            title_info = self.embedding_search.title_info
-            self.conversation_info = await self.conversation_helper.add(user_id, "chatcomplete", page_id=title_info["page_id"], rev_id=title_info["rev_id"], title=title)
+            self.conversation_info = await self.conversation_helper.add(self.user_id, "chatcomplete", page_id=title_info["page_id"], rev_id=title_info["rev_id"], title=title)
            self.conversation_chunk = await self.conversation_chunk_helper.add(self.conversation_info.id, message_log_list, total_token_usage)
        else:
            # Update the conversation chunk
-            await self.conversation_helper.refresh_updated_at(conversation_id)
+            await self.conversation_helper.refresh_updated_at(self.conversation_id)

            self.conversation_chunk.message_data.append(
-                {"role": "user", "content": question, "tokens": question_tokens})
+                {"role": "user", "content": self.question, "tokens": self.question_tokens})
            self.conversation_chunk.message_data.append(
                {"role": "assistant", "content": response["message"], "tokens": response["message_tokens"]})
            flag_modified(self.conversation_chunk, "message_data")
-            self.conversation_chunk.tokens += question_tokens + \
+            self.conversation_chunk.tokens += self.question_tokens + \
                response["message_tokens"]

            await self.conversation_chunk_helper.update(self.conversation_chunk)
--- a/service/database.py
+++ b/service/database.py
@ -1,5 +1,5 @@
 from __future__ import annotations
-import asyncio
+import local
 from urllib.parse import quote_plus
 from aiohttp import web
 import asyncpg
@ -38,7 +38,7 @@ class DatabaseService:
        self.create_session: async_sessionmaker[AsyncSession] = None
    
    async def init(self):
-        loop = asyncio.get_event_loop()
+        loop = local.loop
        self.pool = asyncpg.create_pool(**config.DATABASE, loop=loop)
        await self.pool.__aenter__()

--- a/service/embedding_search.py
+++ b/service/embedding_search.py
@ -129,10 +129,24 @@ class EmbeddingSearchService:
        if self.unindexed_docs is None:
            return False

+
+        chunk_limit = 500
+
+        chunk_len = 0
+        doc_chunk = []
        total_token_usage = 0
+        processed_len = 0
+
+        async def on_embedding_progress(current, length):
+            nonlocal processed_len
+
+            indexed_docs = processed_len + current
+
+            if on_progress is not None:
+                await on_progress(indexed_docs, len(self.unindexed_docs))

        async def embedding_doc(doc_chunk):
-            (doc_chunk, token_usage) = await self.openai_api.get_embeddings(doc_chunk)
+            (doc_chunk, token_usage) = await self.openai_api.get_embeddings(doc_chunk, on_embedding_progress)
            await self.page_index.index_doc(doc_chunk)

            return token_usage
@ -140,12 +154,7 @@ class EmbeddingSearchService:
        if len(self.unindexed_docs) > 0:
            if on_progress is not None:
                await on_progress(0, len(self.unindexed_docs))
-
-            chunk_limit = 500
-
-            chunk_len = 0
-            processed_len = 0
-            doc_chunk = []
+                
            for doc in self.unindexed_docs:
                chunk_len += len(doc)

--- a/service/mediawiki_api.py
+++ b/service/mediawiki_api.py
@ -1,7 +1,7 @@
 import json
 import sys
 import time
-from typing import Optional
+from typing import Optional, TypedDict
 import aiohttp
 import config

@ -18,6 +18,10 @@ class MediaWikiApiException(Exception):
 class MediaWikiPageNotFoundException(MediaWikiApiException):
    pass

+class ChatCompleteReportUsageResponse(TypedDict):
+    point_cost: int
+    transaction_id: str
+
 class MediaWikiApi:
    cookie_jar = aiohttp.CookieJar(unsafe=True)

@ -27,7 +31,7 @@ class MediaWikiApi:

    def __init__(self, api_url: str):
        self.api_url = api_url
-        self.login_time = 0
+        self.login_time = 0.0
        self.login_identity = None

    async def get_page_info(self, title: str):
@ -142,7 +146,7 @@ class MediaWikiApi:
    async def refresh_login(self):
        if self.login_identity is None:
            return False
-        if time.time() - self.login_time > 10:
+        if time.time() - self.login_time > 30:
            return await self.robot_login(self.login_identity["username"], self.login_identity["password"])
        
    async def chat_complete_user_info(self, user_id: int):
@ -166,7 +170,7 @@ class MediaWikiApi:
                
                return data["chatcompletebot"]["userinfo"]
            
-    async def chat_complete_start_transaction(self, user_id: int, user_action: str, tokens: Optional[int] = None, extractlines: Optional[int] = None) -> str:
+    async def chat_complete_start_transaction(self, user_id: int, user_action: str, tokens: Optional[int] = None, extractlines: Optional[int] = None) -> ChatCompleteReportUsageResponse:
        await self.refresh_login()

        async with aiohttp.ClientSession(cookie_jar=self.cookie_jar) as session:
@ -189,7 +193,8 @@ class MediaWikiApi:
                    print(data)
                    raise MediaWikiApiException(data["error"]["info"], data["error"]["code"])
                
-                return data["chatcompletebot"]["reportusage"]["transactionid"]
+                return ChatCompleteReportUsageResponse(point_cost=int(data["chatcompletebot"]["reportusage"]["pointcost"]),
+                                                       transaction_id=data["chatcompletebot"]["reportusage"]["transactionid"])
            
    async def chat_complete_end_transaction(self, transaction_id: str, tokens: Optional[int] = None):
        await self.refresh_login()
--- a/service/openai_api.py
+++ b/service/openai_api.py
@ -1,6 +1,6 @@
 from __future__ import annotations
 import json
-from typing import TypedDict
+from typing import Callable, Optional, TypedDict

 import aiohttp
 import config
@ -23,38 +23,117 @@ class ChatCompleteResponse(TypedDict):
 class OpenAIApi:
    @staticmethod
    def create():
-        return OpenAIApi(config.OPENAI_API or "https://api.openai.com", config.OPENAI_TOKEN)
-
-    def __init__(self, api_url: str, token: str):
-        self.api_url = api_url
-        self.token = token
+        return OpenAIApi()
+
+    def __init__(self):
+        if config.OPENAI_API_TYPE == "azure":
+            self.api_url = config.AZURE_OPENAI_ENDPOINT
+            self.api_key = config.AZURE_OPENAI_KEY
+        else:
+            self.api_url = config.OPENAI_API or "https://api.openai.com"
+            self.api_key = config.OPENAI_TOKEN
+
+    def build_header(self):
+        if config.OPENAI_API_TYPE == "azure":
+            return {
+                "Content-Type": "application/json",
+                "Accept": "application/json",
+                "api-key": self.api_key
+            }
+        else:
+            return {
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json",
+                "Accept": "application/json",
+            }
+        
+    def get_url(self, method: str):
+        if config.OPENAI_API_TYPE == "azure":
+            if method == "completions":
+                return self.api_url + "/openai/deployments/" + config.AZURE_OPENAI_CHATCOMPLETE_DEPLOYMENT_NAME + "/" + method
+            elif method == "embeddings":
+                return self.api_url + "/openai/deployments/" + config.AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME + "/" + method
+        else:
+            return self.api_url + "/v1/" + method
+
+    async def get_embeddings(self, doc_list: list, on_index_progress: Optional[Callable[[int, int], None]] = None):
+        text_list = []
+        regex = r"[=,.?!@#$%^&*()_+:\"<>/\[\]\\`~——，。、《》？；’：“【】、{}|·！￥…（）-]"
+        for doc in doc_list:
+            text: str = doc["text"]
+            text = text.replace("\r\n", "\n").replace("\r", "\n")
+            if "\n" in text:
+                lines = text.split("\n")
+                new_lines = []
+                for line in lines:
+                    line = line.strip()
+                    # Add a dot at the end of the line if it doesn't end with a punctuation mark
+                    if regex.find(line[-1]) == -1:
+                        line += "."
+                    new_lines.append(line)
+                text = " ".join(new_lines)
+            text_list.append(text)

-    async def get_embeddings(self, doc_list: list):
        token_usage = 0
+
        async with aiohttp.ClientSession() as session:
-            text_list = [doc["text"] for doc in doc_list]
-            params = {
-                "model": "text-embedding-ada-002",
+            url = self.get_url("embeddings")
+            params = {}
+            post_data = {
                "input": text_list,
            }
-            async with session.post(self.api_url + "/v1/embeddings",
-                                    headers={"Authorization": f"Bearer {self.token}"},
-                                    json=params,
-                                    timeout=30,
-                                    proxy=config.REQUEST_PROXY) as resp:
-
-                data = await resp.json()

-                for one_data in data["data"]:
-                    embedding = one_data["embedding"]
-                    index = one_data["index"]
-                    
-                    if index < len(doc_list):
-                        if embedding is not None:
-                            embedding = np.array(embedding)
-                        doc_list[index]["embedding"] = embedding
-
-                token_usage = int(data["usage"]["total_tokens"])
+            if config.OPENAI_API_TYPE == "azure":
+                params["api-version"] = "2023-05-15"
+            else:
+                post_data["model"] = "text-embedding-ada-002"
+
+
+            if config.OPENAI_API_TYPE == "azure":
+                # Azure api does not support batch
+                for index, text in enumerate(text_list):
+                    async with session.post(url,
+                                            headers=self.build_header(),
+                                            params=params,
+                                            json={"input": text},
+                                            timeout=30,
+                                            proxy=config.REQUEST_PROXY) as resp:
+
+                        data = await resp.json()
+
+                        one_data = data["data"]
+                        if len(one_data) > 0:
+                            embedding = one_data[0]["embedding"]
+                            if embedding is not None:
+                                embedding = np.array(embedding)
+                            doc_list[index]["embedding"] = embedding
+
+                    token_usage += int(data["usage"]["total_tokens"])
+
+                    if on_index_progress is not None:
+                        await on_index_progress(index, len(text_list))
+            else:
+                async with session.post(url,
+                                        headers=self.build_header(),
+                                        params=params,
+                                        json=post_data,
+                                        timeout=30,
+                                        proxy=config.REQUEST_PROXY) as resp:
+
+                    data = await resp.json()
+
+                    for one_data in data["data"]:
+                        embedding = one_data["embedding"]
+                        index = one_data["index"]
+                        
+                        if index < len(doc_list):
+                            if embedding is not None:
+                                embedding = np.array(embedding)
+                            doc_list[index]["embedding"] = embedding
+
+                    token_usage = int(data["usage"]["total_tokens"])
+
+                await on_index_progress(index, len(text_list))

        return (doc_list, token_usage)
    
@ -79,17 +158,26 @@ class OpenAIApi:
    async def chat_complete(self, question: str, system_prompt: str, conversation: list[ChatCompleteMessageLog] = [], user = None):
        messageList = await self.make_message_list(question, system_prompt, conversation)

-        params = {
-            "model": "gpt-3.5-turbo",
+        url = self.get_url("completions")
+
+        params = {}
+        post_data = {
            "messages": messageList,
            "user": user,
        }
-        params = {k: v for k, v in params.items() if v is not None}
+
+        if config.OPENAI_API_TYPE == "azure":
+            params["api-version"] = "2023-05-15"
+        else:
+            post_data["model"] = "gpt-3.5-turbo"
+
+        post_data = {k: v for k, v in post_data.items() if v is not None}

        async with aiohttp.ClientSession() as session:
-            async with session.post(self.api_url + "/v1/chat/completions",
-                                    headers={"Authorization": f"Bearer {self.token}"},
-                                    json=params,
+            async with session.post(url,
+                                    headers=self.build_header,
+                                    params=params,
+                                    json=post_data,
                                    timeout=30,
                                    proxy=config.REQUEST_PROXY) as resp:
                
@ -138,7 +226,7 @@ class OpenAIApi:
            option={
                "method": "POST"
            },
-            headers={"Authorization": f"Bearer {self.token}"},
+            headers={"Authorization": f"Bearer {self.api_key}"},
            json=params,
            proxy=config.REQUEST_PROXY
        ) as session:
--- a/test.py
+++ b/test.py
@ -1,33 +1,37 @@
-import asyncio
-import config
-import asyncpg
-from service.database import DatabaseService
-
-from service.embedding_search import EmbeddingSearchService
-
-async def main():
-    dbs = await DatabaseService.create()
-
-    async with EmbeddingSearchService(dbs, "代号:曙光的世界/黄昏的阿瓦隆") as embedding_search:
-        async def on_index_progress(current, length):
-            print("索引进度：%.1f%%" % (current / length * 100))
-
-        await embedding_search.update_page_index(on_index_progress)
-
-        while True:
-            query = input("请输入要搜索的问题 (.exit 退出)：")
-            if query == ".exit":
-                break
-            res = await embedding_search.search(query, 5)
-            total_length = 0
-            if res:
-                for one in res:
-                    total_length += len(one["markdown"])
-                    print("%s, distance=%.4f" % (one["markdown"], one["distance"]))
-            else:
-                print("未搜索到相关内容")
-
-            print("总长度：%d" % total_length)
-
-if __name__ == '__main__':
-    asyncio.run(main())
+import local
+from service.database import DatabaseService
+
+from service.embedding_search import EmbeddingSearchService
+
+async def main():
+    dbs = await DatabaseService.create()
+
+    async with EmbeddingSearchService(dbs, "代号:曙光的世界/黄昏的阿瓦隆") as embedding_search:
+        await embedding_search.prepare_update_index()
+
+        async def on_index_progress(current, length):
+            print("\r索引进度：%.1f%%" % (current / length * 100), end="", flush=True)
+
+        print("")
+        await embedding_search.update_page_index(on_index_progress)
+        print("")
+
+        while True:
+            query = input("请输入要搜索的问题 (.exit 退出)：")
+            if query == ".exit":
+                break
+            res, token_usage = await embedding_search.search(query, 5)
+            total_length = 0
+            if res:
+                for one in res:
+                    total_length += len(one["markdown"])
+                    print("%s, distance=%.4f" % (one["markdown"], one["distance"]))
+            else:
+                print("未搜索到相关内容")
+
+            print("总长度：%d" % total_length)
+    
+    await local.noawait.end()
+
+if __name__ == '__main__':
+    local.loop.run_until_complete(main())