Replies: 1 comment
-
this is working with OpenAI an LM Studio from openai import OpenAI
client = OpenAI(base_url='http://localhost:12345/v1', api_key='na')
# Use the following func to get the available models
# model_list = client.models.list()
# print(model_list)
chat_completion = client.chat.completions.create(
model="C:\\AI LLMS\\gemma-3-4B-it-QAT-Q4_0.gguf",
messages=[
{
"role": "user",
"content": "Tell me something about large language models."
}
],
stream=True,
)
thinking_buf = ""
generation_buf = ""
in_think = False
for chunk in chat_completion:
#print(chunk.choices[0].delta.content or "", end="")
data = chunk.choices[0].delta.content or ""
# Erkennung, ob wir gerade im Think-Block sind
if "<think>" in data:
in_think = True
data = data.split("<think>")[1]
print(f"(🧠 Beginne Denken...) {data}", end="", flush=True)
if "</think>" in data:
thinking_buf += data.split("</think>")[0]
in_think = False
data = data.split("</think>")[1]
print(f"(🧠 Denken beenden...) {data}", end="", flush=True)
if in_think:
thinking_buf += data
print(f"{data}", end="", flush=True)
else:
generation_buf += data
print(data, end="", flush=True)
if data:
thinking_buf += data
|
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
How do I get from e.g. Qwen3 prompt with /think adds a paragraph with reasoning.
howto activate --jininx --chat-template-file "xxx"?
https://qwen.readthedocs.io/en/latest/
Test ...
Beta Was this translation helpful? Give feedback.
All reactions