|
16 | 16 | """Demo script."""
|
17 | 17 | import asyncio
|
18 | 18 | import logging
|
| 19 | +from typing import Optional |
| 20 | + |
| 21 | +from langchain_core.language_models import BaseLLM |
| 22 | +from langchain_core.runnables import RunnableConfig |
19 | 23 |
|
20 | 24 | from nemoguardrails import LLMRails, RailsConfig
|
| 25 | +from nemoguardrails.actions import action |
| 26 | +from nemoguardrails.context import streaming_handler_var |
21 | 27 | from nemoguardrails.streaming import StreamingHandler
|
22 | 28 |
|
23 | 29 | logging.basicConfig(level=logging.INFO)
|
@@ -78,7 +84,70 @@ async def demo_hf_pipeline():
|
78 | 84 | # Or do something else with the token
|
79 | 85 |
|
80 | 86 |
|
| 87 | +async def demo_streaming_from_custom_action(): |
| 88 | + """Demo of using the streaming of chunks from custom actions.""" |
| 89 | + config = RailsConfig.from_content( |
| 90 | + yaml_content=""" |
| 91 | + models: |
| 92 | + - type: main |
| 93 | + engine: openai |
| 94 | + model: gpt-4 |
| 95 | +
|
| 96 | + # We're not interested in the user message canonical forms, since we |
| 97 | + # are only using a generic flow with `user ...`. So, we compute it purely |
| 98 | + # based on the embedding, without any additional LLM call. |
| 99 | + rails: |
| 100 | + dialog: |
| 101 | + user_messages: |
| 102 | + embeddings_only: True |
| 103 | +
|
| 104 | + streaming: True |
| 105 | + """, |
| 106 | + colang_content=""" |
| 107 | + # We need to have at least on canonical form to enable dialog rails. |
| 108 | + define user ask question |
| 109 | + "..." |
| 110 | +
|
| 111 | + define flow |
| 112 | + user ... |
| 113 | + # Here we call the custom action which will |
| 114 | + $result = execute call_llm(user_query=$user_message) |
| 115 | +
|
| 116 | + # In this case, we also return the result as the final message. |
| 117 | + # This is optional. |
| 118 | + bot $result |
| 119 | + """, |
| 120 | + ) |
| 121 | + app = LLMRails(config, verbose=True) |
| 122 | + |
| 123 | + @action(is_system_action=True) |
| 124 | + async def call_llm(user_query: str, llm: Optional[BaseLLM]) -> str: |
| 125 | + call_config = RunnableConfig(callbacks=[streaming_handler_var.get()]) |
| 126 | + response = await llm.ainvoke(user_query, config=call_config) |
| 127 | + return response.content |
| 128 | + |
| 129 | + app.register_action(call_llm) |
| 130 | + |
| 131 | + history = [{"role": "user", "content": "Write a short paragraph about France."}] |
| 132 | + |
| 133 | + streaming_handler = StreamingHandler() |
| 134 | + streaming_handler_var.set(streaming_handler) |
| 135 | + |
| 136 | + async def process_tokens(): |
| 137 | + async for chunk in streaming_handler: |
| 138 | + print(f"CHUNK: {chunk}") |
| 139 | + # Or do something else with the token |
| 140 | + |
| 141 | + asyncio.create_task(process_tokens()) |
| 142 | + |
| 143 | + result = await app.generate_async( |
| 144 | + messages=history, streaming_handler=streaming_handler |
| 145 | + ) |
| 146 | + print(result) |
| 147 | + |
| 148 | + |
81 | 149 | if __name__ == "__main__":
|
82 | 150 | asyncio.run(demo_1())
|
83 | 151 | asyncio.run(demo_2())
|
84 | 152 | # asyncio.run(demo_hf_pipeline())
|
| 153 | + asyncio.run(demo_streaming_from_custom_action()) |
0 commit comments