You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
isekai-toolkit/test/text2vec_embedding_queue.py

38 lines
1.1 KiB
Python

import asyncio
import time
import base as _
from utils.local import loop, noawait
from service.text_embedding import Text2VecEmbeddingQueue
async def main():
embedding_list = []
queue = []
text2vec_queue = Text2VecEmbeddingQueue("shibing624/text2vec-base-chinese")
start_time = time.time()
async def on_progress(current, total):
print(f"{current}/{total}")
total_lines = 0
with open("test/test.md", "r", encoding="utf-8") as fp:
text = fp.read()
lines = text.split("\n")
for line in lines:
line = line.strip()
if line == "":
continue
queue.append(text2vec_queue.get_embeddings(line))
total_lines += 0
embedding_list = await asyncio.gather(*queue)
end_time = time.time()
print("total lines: %d" % total_lines)
print("time cost: %.4f" % (end_time - start_time))
print("speed: %.4f it/s" % (total_lines / (end_time - start_time)))
print("dimensions: %d" % len(embedding_list[0]))
await noawait.end()
if __name__ == '__main__':
loop.run_until_complete(main())