huggingface · Oct 21, 2024
diff --git a/‎.github/README.md
+6-1 b/‎.github/README.md
+6-1
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎assets/gradio_chatbot_demo.png
324 KB b/‎assets/gradio_chatbot_demo.png
324 KB
diff --git a/‎gradio_demos/chatbot_demo.ipynb
+1,316 b/‎gradio_demos/chatbot_demo.ipynb
+1,316
diff --git a/‎gradio_demos/chatbot_demo.py
+89 b/‎gradio_demos/chatbot_demo.py
+89
@@ -157,4 +157,9 @@ Seeking an entry-level RAG pipeline? This notebook guides you through building a
 ## Text Generation Inference (TGI) & API Inference with Llama Models
 Text Generation Inference (TGI) framework enables efficient and  scalable deployment of Llama models. In this notebook we'll learn how to integrate TGI for fast text generation and to consume already deployed Llama models via Inference API:
 
-* [Text Generation Inference (TGI) with Llama Models](../llama_tgi_api_inference/tgi_api_inference_recipe.ipynb) 
+* [Text Generation Inference (TGI) with Llama Models](../llama_tgi_api_inference/tgi_api_inference_recipe.ipynb) 
+
+## Chatbot Demo with Llama Models 
+Would you like to build a chatbot with Llama models? Here's a simple example to get you started.
+
+* [Chatbot with Llama Models](../gradio_demos/chatbot_demo.ipynb)
@@ -1 +1,2 @@
 .DS_Store
+.venv
@@ -0,0 +1,89 @@
+from huggingface_hub import login
+import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+
+# Log in to Hugging Face Hub
+login()
+
+# Determine the device to use (GPU if available, otherwise CPU)
+device = 0 if torch.cuda.is_available() else -1
+
+# Dictionary mapping model names to their Hugging Face Hub identifiers
+llama_models = {
+    "Llama 3 70B Instruct": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "Llama 3 8B Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "Llama 3.1 70B Instruct": "meta-llama/Llama-3.1-70B-Instruct",
+    "Llama 3.1 8B Instruct": "meta-llama/Llama-3.1-8B-Instruct",
+    "Llama 3.2 3B Instruct": "meta-llama/Llama-3.2-3B-Instruct",
+    "Llama 3.2 1B Instruct": "meta-llama/Llama-3.2-1B-Instruct",
+}
+
+# Function to load the model and tokenizer
+def load_model(model_name):
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=device)
+    return generator
+
+# Cache to store loaded models
+model_cache = {}
+
+# Function to generate chat responses
+def generate_chat(user_input, history, model_choice):
+    # Load the model if not already cached
+    if model_choice not in model_cache:
+        model_cache[model_choice] = load_model(llama_models[model_choice])
+    generator = model_cache[model_choice]
+
+    # Initial system prompt
+    system_prompt = {"role": "system", "content": "You are a helpful assistant"}
+
+    # Initialize history if it's None
+    if history is None:
+        history = [system_prompt]
+    
+    # Append user input to history
+    history.append({"role": "user", "content": user_input})
+
+    # Generate response using the model
+    response = generator(
+        history,
+        max_length=512,
+        pad_token_id=generator.tokenizer.eos_token_id,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9
+    )[-1]["generated_text"][-1]["content"]
+
+    # Append model response to history
+    history.append({"role": "assistant", "content": response})
+    
+    return history
+
+# Create Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("<h1><center>Chat with Llama Models</center></h1>")
+
+    # Dropdown to select model
+    model_choice = gr.Dropdown(list(llama_models.keys()), label="Select Llama Model")
+    # Chatbot interface
+    chatbot = gr.Chatbot(label="Chatbot Interface", type="messages")
+    # Textbox for user input
+    txt_input = gr.Textbox(show_label=False, placeholder="Type your message here...")
+
+    # Function to handle user input and generate response
+    def respond(user_input, chat_history, model_choice):
+        if model_choice is None:
+            model_choice = list(llama_models.keys())[0]
+        updated_history = generate_chat(user_input, chat_history, model_choice)
+        return "", updated_history
+
+    # Submit user input on pressing Enter
+    txt_input.submit(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot])
+    # Button to submit user input
+    submit_btn = gr.Button("Submit")
+    submit_btn.click(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot])
+
+# Launch the Gradio demo
+demo.launch()