Skip to content

Commit ad06946

Browse files
committed
Update README.md
1 parent 134b008 commit ad06946

15 files changed

+895
-41
lines changed

.DS_Store

6 KB
Binary file not shown.

README.md

+18-41
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,34 @@
1-
# Project Name
1+
# Unlocking Generative AI with Phi-3-mini: A Guide to Inference and Deployment
22

3-
(short, 1-3 sentenced, description of the project)
3+
Discover how Phi-3-mini, a new series of models from Microsoft, enables deployment of Large Language Models (LLMs) on edge devices and IoT devices. Learn how to use Semantic Kernel, Ollama/LlamaEdge, and ONNX Runtime to access and infer Phi-3-mini models, and explore the possibilities of generative AI in various application scenarios.
44

55
## Features
66

7-
This project framework provides the following features:
7+
inference phi3-mini model in:
88

9-
* Feature 1
10-
* Feature 2
11-
* ...
9+
* Semantic Kernel
10+
* Ollama
11+
* LlamaEdge
12+
* ONNX Runtime
1213

1314
## Getting Started
1415

1516
### Prerequisites
1617

17-
(ideally very short, if any)
18+
- macOS/Windows/Liunx
19+
- python 3.10+
1820

19-
- OS
20-
- Library version
21-
- ...
21+
### Guideline
2222

23-
### Installation
23+
please read my blog https://aka.ms/phi3gettingstarted to run the demo
2424

25-
(ideally very short)
26-
27-
- npm install [package name]
28-
- mvn install
29-
- ...
30-
31-
### Quickstart
32-
(Add steps to get up and running quickly)
33-
34-
1. git clone [repository clone url]
35-
2. cd [repository name]
36-
3. ...
37-
38-
39-
## Demo
40-
41-
A demo app is included to show how to use the project.
42-
43-
To run the demo, follow these steps:
44-
45-
(Add steps to start up the demo)
46-
47-
1.
48-
2.
49-
3.
5025

5126
## Resources
5227

53-
(Any additional resources or related projects)
54-
55-
- Link to supporting information
56-
- Link to similar sample
57-
- ...
28+
- Phi-3 Azure blog https://aka.ms/phi3blog-april
29+
- Phi-3 technical report https://aka.ms/phi3-tech-report
30+
- Learn ONNX Runtime Generative AI https://github.com/microsoft/onnxruntime-genai
31+
- Learn about Semantic Kernel https://aka.ms/SemanticKernel
32+
- Read Semantic Kernel Cookbook https://aka.ms/SemanticKernelCookBook
33+
- Learn about LlamaEdge https://github.com/LlamaEdge/LlamaEdge
34+
- Learn about Ollama https://ollama.com/

ollama/Modelfile

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
FROM Your phi3-mini gguf location
2+
TEMPLATE """<|user|>
3+
{{.Prompt}}<|end|>
4+
<|assistant|>"""
5+
PARAMETER stop <|end|>
6+
PARAMETER num_ctx 4096

onnx/run-onnx.ipynb

+135
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {
7+
"dotnet_interactive": {
8+
"language": "csharp"
9+
},
10+
"polyglot_notebook": {
11+
"kernelName": "csharp"
12+
}
13+
},
14+
"outputs": [],
15+
"source": [
16+
"import onnxruntime_genai as og\n",
17+
"import argparse\n",
18+
"import time\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": 2,
24+
"metadata": {},
25+
"outputs": [],
26+
"source": [
27+
"model = og.Model('Your phi3-mini ONNX model path')"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 3,
33+
"metadata": {},
34+
"outputs": [],
35+
"source": [
36+
"tokenizer = og.Tokenizer(model)\n",
37+
"tokenizer_stream = tokenizer.create_stream()"
38+
]
39+
},
40+
{
41+
"cell_type": "code",
42+
"execution_count": 4,
43+
"metadata": {},
44+
"outputs": [],
45+
"source": [
46+
"search_options = {\"max_length\": 1024,\"temperature\":0.6}"
47+
]
48+
},
49+
{
50+
"cell_type": "code",
51+
"execution_count": 5,
52+
"metadata": {},
53+
"outputs": [],
54+
"source": [
55+
"params = og.GeneratorParams(model)\n",
56+
"params.try_use_cuda_graph_with_max_batch_size(0)\n",
57+
"params.set_search_options(**search_options)"
58+
]
59+
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": 10,
63+
"metadata": {},
64+
"outputs": [],
65+
"source": [
66+
"prompt = \"<|system|>You are a helpful AI assistant.<|end|><|user|>Can you introduce yourself?<|end|><|assistant|>\"\n",
67+
"input_tokens = tokenizer.encode(prompt)\n",
68+
"params.input_ids = input_tokens\n"
69+
]
70+
},
71+
{
72+
"cell_type": "code",
73+
"execution_count": 13,
74+
"metadata": {},
75+
"outputs": [],
76+
"source": [
77+
"generator = og.Generator(model, params)"
78+
]
79+
},
80+
{
81+
"cell_type": "code",
82+
"execution_count": 14,
83+
"metadata": {},
84+
"outputs": [
85+
{
86+
"name": "stdout",
87+
"output_type": "stream",
88+
"text": [
89+
" Hello! I'm an advanced AI developed to assist and provide information to users like you. My purpose is to help answer your questions, provide guidance, and offer insights on a wide range of topics, from general knowledge to specific subjects. I'm designed to continuously learn and improve, ensuring that I can assist you effectively. I don't have personal experiences or emotions, but I'm here to help you find the information you need. How can I assist you today?"
90+
]
91+
}
92+
],
93+
"source": [
94+
"while not generator.is_done():\n",
95+
" generator.compute_logits()\n",
96+
" generator.generate_next_token()\n",
97+
"\n",
98+
" new_token = generator.get_next_tokens()[0]\n",
99+
" print(tokenizer_stream.decode(new_token), end='', flush=True)"
100+
]
101+
}
102+
],
103+
"metadata": {
104+
"kernelspec": {
105+
"display_name": "llamaenv",
106+
"language": "python",
107+
"name": "python3"
108+
},
109+
"language_info": {
110+
"codemirror_mode": {
111+
"name": "ipython",
112+
"version": 3
113+
},
114+
"file_extension": ".py",
115+
"mimetype": "text/x-python",
116+
"name": "python",
117+
"nbconvert_exporter": "python",
118+
"pygments_lexer": "ipython3",
119+
"version": "3.10.12"
120+
},
121+
"polyglot_notebook": {
122+
"kernelInfo": {
123+
"defaultKernelName": "csharp",
124+
"items": [
125+
{
126+
"aliases": [],
127+
"name": "csharp"
128+
}
129+
]
130+
}
131+
}
132+
},
133+
"nbformat": 4,
134+
"nbformat_minor": 2
135+
}

semantickernel/.DS_Store

6 KB
Binary file not shown.

semantickernel/server/.DS_Store

6 KB
Binary file not shown.

semantickernel/server/.env

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
CHAT_COMPLETION_URL = 'Your phi3-mini location'
2+
EMBEDDING_URL = ''

semantickernel/server/README.md

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Phi-3-Mini for Semantic Kernel .NET Developer
2+
3+
4+
*Support Semantic-Kernel 1.7.1*
5+
6+
At this stage, it is adapted for macOS and Linux environments.
7+
8+
At this stage, the implementation of ChatCompletion and Embedding has been completed.
9+
10+
**ChatCompletion** is adapted to LLM *phi3-mini*
11+
12+
**Samples**
13+
14+
15+
1. download your LLM firstly and using pip to install python library
16+
17+
18+
```bash
19+
20+
pip install -r requirement.txt
21+
22+
```
23+
24+
1. .env config your ChatCompletion location
25+
26+
```txt
27+
28+
CHAT_COMPLETION_URL = 'Your chat completion model location'
29+
30+
```
31+
32+
2. Start your Local LLM Http Server
33+
34+
```bash
35+
36+
python local_llm_service.py
37+
38+
```
39+
40+
3. Add Microsoft.SemanticKernel, Microsoft.SemanticKernel.Connectors.AI.HuggingFace, Microsoft.SemanticKernel.Connectors.Memory.Qdrant(You can choose different vector database) packages
41+
42+
4. Initialization endpoint for chatcompletion
43+
44+
```csharp
45+
46+
string chat_endpoint = "http://localhost:5002/v1/chat/completions";
47+
48+
49+
```
50+
51+
52+
5. Sample 1 - ChatCompletion
53+
54+
55+
```csharp
56+
57+
using Microsoft.SemanticKernel;
58+
using Microsoft.SemanticKernel.Connectors.Memory.Qdrant;
59+
using Microsoft.SemanticKernel.Plugins.Memory;
60+
using Microsoft.SemanticKernel.Connectors.AI.HuggingFace.TextEmbedding;
61+
62+
#pragma warning disable SKEXP0020
63+
64+
Kernel kernel = new KernelBuilder()
65+
.AddHuggingFaceTextGeneration(
66+
model: "phi-3-mini",
67+
endpoint: chat_endpoint)
68+
.Build();
69+
70+
var questionAnswerFunction = kernel.CreateFunctionFromPrompt(@"{{$input}}");
71+
72+
var result = await kernel.InvokeAsync(questionAnswerFunction, new(){["input"] = "Can you introduce yourself?"});
73+
74+
result.GetValue<string>()
75+
76+
77+
```
78+
79+
80+
81+
82+
83+
84+
85+
86+

0 commit comments

Comments
 (0)