Azure-Samples
diff --git a/‎.DS_Store
6 KB b/‎.DS_Store
6 KB
diff --git a/‎README.md
+18-41 b/‎README.md
+18-41
diff --git a/‎ollama/Modelfile
+6 b/‎ollama/Modelfile
+6
diff --git a/‎onnx/run-onnx.ipynb
+135 b/‎onnx/run-onnx.ipynb
+135
diff --git a/‎semantickernel/.DS_Store
6 KB b/‎semantickernel/.DS_Store
6 KB
diff --git a/‎semantickernel/server/.DS_Store
6 KB b/‎semantickernel/server/.DS_Store
6 KB
diff --git a/‎semantickernel/server/.env
+2 b/‎semantickernel/server/.env
+2
diff --git a/‎semantickernel/server/README.md
+86 b/‎semantickernel/server/README.md
+86
@@ -1,57 +1,34 @@
-# Project Name
+# Unlocking Generative AI with Phi-3-mini: A Guide to Inference and Deployment
 
-(short, 1-3 sentenced, description of the project)
+Discover how Phi-3-mini, a new series of models from Microsoft, enables deployment of Large Language Models (LLMs) on edge devices and IoT devices. Learn how to use Semantic Kernel, Ollama/LlamaEdge, and ONNX Runtime to access and infer Phi-3-mini models, and explore the possibilities of generative AI in various application scenarios.
 
 ## Features
 
-This project framework provides the following features:
+inference phi3-mini model in:
 
-* Feature 1
-* Feature 2
-* ...
+* Semantic Kernel
+* Ollama
+* LlamaEdge
+* ONNX Runtime
 
 ## Getting Started
 
 ### Prerequisites
 
-(ideally very short, if any)
+- macOS/Windows/Liunx
+- python 3.10+
 
-- OS
-- Library version
-- ...
+### Guideline
 
-### Installation
+please read my blog https://aka.ms/phi3gettingstarted  to run the demo 
 
-(ideally very short)
-
-- npm install [package name]
-- mvn install
-- ...
-
-### Quickstart
-(Add steps to get up and running quickly)
-
-1. git clone [repository clone url]
-2. cd [repository name]
-3. ...
-
-
-## Demo
-
-A demo app is included to show how to use the project.
-
-To run the demo, follow these steps:
-
-(Add steps to start up the demo)
-
-1.
-2.
-3.
 
 ## Resources
 
-(Any additional resources or related projects)
-
-- Link to supporting information
-- Link to similar sample
-- ...
+- Phi-3 Azure blog https://aka.ms/phi3blog-april
+- Phi-3 technical report https://aka.ms/phi3-tech-report
+- Learn ONNX Runtime Generative AI https://github.com/microsoft/onnxruntime-genai
+- Learn  about  Semantic Kernel https://aka.ms/SemanticKernel
+- Read Semantic Kernel Cookbook https://aka.ms/SemanticKernelCookBook
+- Learn about LlamaEdge https://github.com/LlamaEdge/LlamaEdge
+- Learn about Ollama https://ollama.com/
@@ -0,0 +1,6 @@
+FROM Your phi3-mini gguf location
+TEMPLATE """<|user|>
+{{.Prompt}}<|end|>
+<|assistant|>"""
+PARAMETER stop <|end|>
+PARAMETER num_ctx 4096
@@ -0,0 +1,135 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "dotnet_interactive": {
+     "language": "csharp"
+    },
+    "polyglot_notebook": {
+     "kernelName": "csharp"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import onnxruntime_genai as og\n",
+    "import argparse\n",
+    "import time\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = og.Model('Your phi3-mini ONNX model path')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer = og.Tokenizer(model)\n",
+    "tokenizer_stream = tokenizer.create_stream()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "search_options = {\"max_length\": 1024,\"temperature\":0.6}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = og.GeneratorParams(model)\n",
+    "params.try_use_cuda_graph_with_max_batch_size(0)\n",
+    "params.set_search_options(**search_options)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prompt = \"<|system|>You are a helpful AI assistant.<|end|><|user|>Can you introduce yourself?<|end|><|assistant|>\"\n",
+    "input_tokens = tokenizer.encode(prompt)\n",
+    "params.input_ids = input_tokens\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "generator = og.Generator(model, params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " Hello! I'm an advanced AI developed to assist and provide information to users like you. My purpose is to help answer your questions, provide guidance, and offer insights on a wide range of topics, from general knowledge to specific subjects. I'm designed to continuously learn and improve, ensuring that I can assist you effectively. I don't have personal experiences or emotions, but I'm here to help you find the information you need. How can I assist you today?"
+     ]
+    }
+   ],
+   "source": [
+    "while not generator.is_done():\n",
+    "                generator.compute_logits()\n",
+    "                generator.generate_next_token()\n",
+    "\n",
+    "                new_token = generator.get_next_tokens()[0]\n",
+    "                print(tokenizer_stream.decode(new_token), end='', flush=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llamaenv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "polyglot_notebook": {
+   "kernelInfo": {
+    "defaultKernelName": "csharp",
+    "items": [
+     {
+      "aliases": [],
+      "name": "csharp"
+     }
+    ]
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -0,0 +1,2 @@
+CHAT_COMPLETION_URL = 'Your phi3-mini location'
+EMBEDDING_URL = ''
@@ -0,0 +1,86 @@
+# Phi-3-Mini for Semantic Kernel .NET Developer
+
+
+*Support Semantic-Kernel 1.7.1*
+
+At this stage, it is adapted for macOS and Linux environments.
+
+At this stage, the implementation of ChatCompletion and Embedding has been completed.
+
+**ChatCompletion** is adapted to  LLM  *phi3-mini*
+
+**Samples**
+
+
+1. download your LLM firstly and using pip to install python library
+
+
+```bash
+
+pip install -r requirement.txt
+
+```
+
+1. .env config your ChatCompletion  location
+
+```txt
+
+CHAT_COMPLETION_URL = 'Your chat completion model location'
+
+```
+
+2. Start your Local LLM Http Server
+
+```bash
+
+python local_llm_service.py
+
+```
+
+3. Add Microsoft.SemanticKernel, Microsoft.SemanticKernel.Connectors.AI.HuggingFace, Microsoft.SemanticKernel.Connectors.Memory.Qdrant(You can choose different vector database) packages 
+
+4. Initialization endpoint for chatcompletion
+
+```csharp
+
+string chat_endpoint = "http://localhost:5002/v1/chat/completions";
+
+
+```
+
+
+5. Sample 1 - ChatCompletion
+
+
+```csharp
+
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.Connectors.Memory.Qdrant;
+using Microsoft.SemanticKernel.Plugins.Memory;
+using Microsoft.SemanticKernel.Connectors.AI.HuggingFace.TextEmbedding;
+
+#pragma warning disable SKEXP0020
+
+Kernel kernel = new KernelBuilder()
+            .AddHuggingFaceTextGeneration(
+                model: "phi-3-mini",
+                endpoint: chat_endpoint)
+            .Build();
+
+var questionAnswerFunction = kernel.CreateFunctionFromPrompt(@"{{$input}}");
+
+var result = await kernel.InvokeAsync(questionAnswerFunction, new(){["input"] = "Can you introduce yourself?"});
+
+result.GetValue<string>()
+
+
+```
+
+
+
+
+
+
+
+
+
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+CHAT_COMPLETION_URL = 'Your phi3-mini location'`
	`2`	`+EMBEDDING_URL = ''`