scripts/playground/frontend_reasoning.ipynb
Launch the server with a reasoning model (Qwen 3.5-4B) and reasoning parser.
from sglang import separate_reasoning, assistant_begin, assistant_end
from sglang import assistant, function, gen, system, user
from sglang import image
from sglang import RuntimeEndpoint, set_default_backend
from sglang.srt.utils import load_image
from sglang.test.test_utils import is_in_ci
from sglang.utils import print_highlight, terminate_process, wait_for_server
if is_in_ci():
from patch import launch_server_cmd
else:
from sglang.utils import launch_server_cmd
server_process, port = launch_server_cmd(
"python3 -m sglang.launch_server --model-path Qwen/Qwen3-4B --reasoning-parser qwen3 --host 0.0.0.0"
)
wait_for_server(f"http://localhost:{port}", process=server_process)
print(f"Server started on http://localhost:{port}")
Set the default backend. Note: you can set chat_template_name in RontimeEndpoint.
set_default_backend(
RuntimeEndpoint(f"http://localhost:{port}", chat_template_name="qwen")
)
Let's start with a basic question-answering task. And see how the reasoning content is generated.
@function
def basic_qa(s, question):
s += system(f"You are a helpful assistant than can answer questions.")
s += user(question)
s += assistant_begin()
s += gen("answer", max_tokens=512)
s += assistant_end()
state = basic_qa("List 3 countries and their capitals.")
print_highlight(state["answer"])
With separate_reasoning, you can move the reasoning content to {param_name}_reasoning_content in the state.
@function
def basic_qa_separate_reasoning(s, question):
s += system(f"You are a helpful assistant than can answer questions.")
s += user(question)
s += assistant_begin()
s += separate_reasoning(gen("answer", max_tokens=512), model_type="qwen3")
s += assistant_end()
reasoning_state = basic_qa_separate_reasoning("List 3 countries and their capitals.")
print_highlight(reasoning_state.stream_executor.variable_event.keys())
print_highlight(
f"\nSeparated Reasoning Content:\n{reasoning_state['answer_reasoning_content']}"
)
print_highlight(f"\n\nContent:\n{reasoning_state['answer']}")
print_highlight(f"\n\nMessages:\n{reasoning_state.messages()[-1]}")
separate_reasoning can also be used in multi-turn conversations.
@function
def multi_turn_qa(s):
s += system(f"You are a helpful assistant than can answer questions.")
s += user("Please give me a list of 3 countries and their capitals.")
s += assistant(
separate_reasoning(gen("first_answer", max_tokens=512), model_type="qwen3")
)
s += user("Please give me another list of 3 countries and their capitals.")
s += assistant(
separate_reasoning(gen("second_answer", max_tokens=512), model_type="qwen3")
)
return s
reasoning_state = multi_turn_qa()
print_highlight(f"\n\nfirst_answer:\n{reasoning_state['first_answer']}")
print_highlight(
f"\n\nfirst_answer_reasoning_content:\n{reasoning_state['first_answer_reasoning_content']}"
)
print_highlight(f"\n\nsecond_answer:\n{reasoning_state['second_answer']}")
print_highlight(
f"\n\nsecond_answer_reasoning_content:\n{reasoning_state['second_answer_reasoning_content']}"
)
sglang separate_reasoning is particularly useful when combined with Qwen 3's advanced feature.
reasoning_state = basic_qa_separate_reasoning(
"List 3 countries and their capitals. /no_think"
)
print_highlight(f"Reasoning Content:\n{reasoning_state['answer_reasoning_content']}")
print_highlight(f"Content:\n{reasoning_state['answer']}")
separate_reasoning can also be used in regular expression generation.
@function
def regular_expression_gen(s):
s += user(
"What is the IP address of the Google DNS servers? just provide the answer"
)
s += assistant(
separate_reasoning(
gen(
"answer",
temperature=0,
regex=r"((25[0-5]|2[0-4]\d|[01]?\d\d?).){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)",
max_tokens=512,
),
model_type="qwen3",
),
)
reasoning_state = regular_expression_gen()
print_highlight(f"Answer:\n{reasoning_state['answer']}")
print_highlight(
f"\n\nReasoning Content:\n{reasoning_state['answer_reasoning_content']}"
)