|
|
@ -484,7 +484,7 @@ async def predict(
|
|
|
|
stop_words_ids = [tokenizer.encode(s)
|
|
|
|
stop_words_ids = [tokenizer.encode(s)
|
|
|
|
for s in stop_words] if stop_words else None
|
|
|
|
for s in stop_words] if stop_words else None
|
|
|
|
|
|
|
|
|
|
|
|
delay_token_num = max([len(x) for x in stop_words])
|
|
|
|
delay_token_num = max([len(x) for x in stop_words]) if stop_words_ids else 0
|
|
|
|
response_generator = model.chat_stream(tokenizer,
|
|
|
|
response_generator = model.chat_stream(tokenizer,
|
|
|
|
query,
|
|
|
|
query,
|
|
|
|
history=history,
|
|
|
|
history=history,
|
|
|
@ -494,7 +494,7 @@ async def predict(
|
|
|
|
for _new_response in response_generator:
|
|
|
|
for _new_response in response_generator:
|
|
|
|
if len(_new_response) <= delay_token_num:
|
|
|
|
if len(_new_response) <= delay_token_num:
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
new_response = _new_response[:-delay_token_num]
|
|
|
|
new_response = _new_response[:-delay_token_num] if delay_token_num else _new_response
|
|
|
|
|
|
|
|
|
|
|
|
if len(new_response) == current_length:
|
|
|
|
if len(new_response) == current_length:
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|