You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1.1 KiB
Python
38 lines
1.1 KiB
Python
2 years ago
|
import asyncio
|
||
|
import time
|
||
9 months ago
|
import base as _
|
||
|
from utils.local import loop, noawait
|
||
|
from service.text_embedding import Text2VecEmbeddingQueue
|
||
2 years ago
|
|
||
|
async def main():
|
||
|
embedding_list = []
|
||
|
queue = []
|
||
9 months ago
|
text2vec_queue = Text2VecEmbeddingQueue("shibing624/text2vec-base-chinese")
|
||
|
|
||
|
start_time = time.time()
|
||
|
|
||
|
async def on_progress(current, total):
|
||
|
print(f"{current}/{total}")
|
||
|
|
||
|
total_lines = 0
|
||
|
|
||
2 years ago
|
with open("test/test.md", "r", encoding="utf-8") as fp:
|
||
|
text = fp.read()
|
||
|
lines = text.split("\n")
|
||
|
for line in lines:
|
||
|
line = line.strip()
|
||
|
if line == "":
|
||
|
continue
|
||
|
|
||
9 months ago
|
queue.append(text2vec_queue.get_embeddings(line))
|
||
|
total_lines += 0
|
||
2 years ago
|
embedding_list = await asyncio.gather(*queue)
|
||
|
end_time = time.time()
|
||
9 months ago
|
print("total lines: %d" % total_lines)
|
||
2 years ago
|
print("time cost: %.4f" % (end_time - start_time))
|
||
9 months ago
|
print("speed: %.4f it/s" % (total_lines / (end_time - start_time)))
|
||
2 years ago
|
print("dimensions: %d" % len(embedding_list[0]))
|
||
|
await noawait.end()
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
loop.run_until_complete(main())
|