|
|
@ -322,6 +322,8 @@ Using the API is also simple. See the example below:
|
|
|
|
import openai
|
|
|
|
import openai
|
|
|
|
openai.api_base = "http://localhost:8000/v1"
|
|
|
|
openai.api_base = "http://localhost:8000/v1"
|
|
|
|
openai.api_key = "none"
|
|
|
|
openai.api_key = "none"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# create a request activating streaming response
|
|
|
|
for chunk in openai.ChatCompletion.create(
|
|
|
|
for chunk in openai.ChatCompletion.create(
|
|
|
|
model="Qwen-7B",
|
|
|
|
model="Qwen-7B",
|
|
|
|
messages=[
|
|
|
|
messages=[
|
|
|
@ -331,6 +333,16 @@ for chunk in openai.ChatCompletion.create(
|
|
|
|
):
|
|
|
|
):
|
|
|
|
if hasattr(chunk.choices[0].delta, "content"):
|
|
|
|
if hasattr(chunk.choices[0].delta, "content"):
|
|
|
|
print(chunk.choices[0].delta.content, end="", flush=True)
|
|
|
|
print(chunk.choices[0].delta.content, end="", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# create a request not activating streaming response
|
|
|
|
|
|
|
|
response = openai.ChatCompletion.create(
|
|
|
|
|
|
|
|
model="Qwen-7B",
|
|
|
|
|
|
|
|
messages=[
|
|
|
|
|
|
|
|
{"role": "user", "content": "你好"}
|
|
|
|
|
|
|
|
],
|
|
|
|
|
|
|
|
stream=False
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
print(response.choices[0].message.content)
|
|
|
|
```
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|