Skip to content

Commit 5d43093

Browse files
committed
Add example for streaming from custom action.
1 parent 8bb50af commit 5d43093

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

examples/scripts/demo_streaming.py

+69
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,14 @@
1616
"""Demo script."""
1717
import asyncio
1818
import logging
19+
from typing import Optional
20+
21+
from langchain_core.language_models import BaseLLM
22+
from langchain_core.runnables import RunnableConfig
1923

2024
from nemoguardrails import LLMRails, RailsConfig
25+
from nemoguardrails.actions import action
26+
from nemoguardrails.context import streaming_handler_var
2127
from nemoguardrails.streaming import StreamingHandler
2228

2329
logging.basicConfig(level=logging.INFO)
@@ -78,7 +84,70 @@ async def demo_hf_pipeline():
7884
# Or do something else with the token
7985

8086

87+
async def demo_streaming_from_custom_action():
88+
"""Demo of using the streaming of chunks from custom actions."""
89+
config = RailsConfig.from_content(
90+
yaml_content="""
91+
models:
92+
- type: main
93+
engine: openai
94+
model: gpt-4
95+
96+
# We're not interested in the user message canonical forms, since we
97+
# are only using a generic flow with `user ...`. So, we compute it purely
98+
# based on the embedding, without any additional LLM call.
99+
rails:
100+
dialog:
101+
user_messages:
102+
embeddings_only: True
103+
104+
streaming: True
105+
""",
106+
colang_content="""
107+
# We need to have at least on canonical form to enable dialog rails.
108+
define user ask question
109+
"..."
110+
111+
define flow
112+
user ...
113+
# Here we call the custom action which will
114+
$result = execute call_llm(user_query=$user_message)
115+
116+
# In this case, we also return the result as the final message.
117+
# This is optional.
118+
bot $result
119+
""",
120+
)
121+
app = LLMRails(config, verbose=True)
122+
123+
@action(is_system_action=True)
124+
async def call_llm(user_query: str, llm: Optional[BaseLLM]) -> str:
125+
call_config = RunnableConfig(callbacks=[streaming_handler_var.get()])
126+
response = await llm.ainvoke(user_query, config=call_config)
127+
return response.content
128+
129+
app.register_action(call_llm)
130+
131+
history = [{"role": "user", "content": "Write a short paragraph about France."}]
132+
133+
streaming_handler = StreamingHandler()
134+
streaming_handler_var.set(streaming_handler)
135+
136+
async def process_tokens():
137+
async for chunk in streaming_handler:
138+
print(f"CHUNK: {chunk}")
139+
# Or do something else with the token
140+
141+
asyncio.create_task(process_tokens())
142+
143+
result = await app.generate_async(
144+
messages=history, streaming_handler=streaming_handler
145+
)
146+
print(result)
147+
148+
81149
if __name__ == "__main__":
82150
asyncio.run(demo_1())
83151
asyncio.run(demo_2())
84152
# asyncio.run(demo_hf_pipeline())
153+
asyncio.run(demo_streaming_from_custom_action())

0 commit comments

Comments
 (0)