Skip to content

Commit 0b0ed11

Browse files
integration: Add Hugging Face local models; ST for embeddings (#402)
--------- Co-authored-by: Marcus Schiesser <[email protected]>
1 parent 1fe21f8 commit 0b0ed11

File tree

7 files changed

+132
-35
lines changed

7 files changed

+132
-35
lines changed

.changeset/plenty-pumpkins-fold.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"create-llama": patch
3+
---
4+
5+
Add local models via Hugging Face; use Sentence Transformers w. ONNX instead of FastEmbed (support for more models, etc)

helpers/env-variables.ts

+14
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,20 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
336336
},
337337
]
338338
: []),
339+
...(modelConfig.provider === "huggingface"
340+
? [
341+
{
342+
name: "EMBEDDING_BACKEND",
343+
description:
344+
"The backend to use for the Sentence Transformers embedding model, either 'torch', 'onnx', or 'openvino'. Defaults to 'onnx'.",
345+
},
346+
{
347+
name: "EMBEDDING_TRUST_REMOTE_CODE",
348+
description:
349+
"Whether to trust remote code for the embedding model, required for some models with custom code.",
350+
},
351+
]
352+
: []),
339353
...(modelConfig.provider === "t-systems"
340354
? [
341355
{

helpers/providers/huggingface.ts

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import prompts from "prompts";
2+
import { ModelConfigParams } from ".";
3+
import { questionHandlers, toChoice } from "../../questions/utils";
4+
5+
const MODELS = ["HuggingFaceH4/zephyr-7b-alpha"];
6+
type ModelData = {
7+
dimensions: number;
8+
};
9+
const EMBEDDING_MODELS: Record<string, ModelData> = {
10+
"all-MiniLM-L6-v2": { dimensions: 384 },
11+
};
12+
13+
const DEFAULT_MODEL = MODELS[0];
14+
const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0];
15+
const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions;
16+
17+
type HuggingfaceQuestionsParams = {
18+
askModels: boolean;
19+
};
20+
21+
export async function askHuggingfaceQuestions({
22+
askModels,
23+
}: HuggingfaceQuestionsParams): Promise<ModelConfigParams> {
24+
const config: ModelConfigParams = {
25+
model: DEFAULT_MODEL,
26+
embeddingModel: DEFAULT_EMBEDDING_MODEL,
27+
dimensions: DEFAULT_DIMENSIONS,
28+
isConfigured(): boolean {
29+
return true;
30+
},
31+
};
32+
33+
if (askModels) {
34+
const { model } = await prompts(
35+
{
36+
type: "select",
37+
name: "model",
38+
message: "Which Hugging Face model would you like to use?",
39+
choices: MODELS.map(toChoice),
40+
initial: 0,
41+
},
42+
questionHandlers,
43+
);
44+
config.model = model;
45+
46+
const { embeddingModel } = await prompts(
47+
{
48+
type: "select",
49+
name: "embeddingModel",
50+
message: "Which embedding model would you like to use?",
51+
choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
52+
initial: 0,
53+
},
54+
questionHandlers,
55+
);
56+
config.embeddingModel = embeddingModel;
57+
config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
58+
}
59+
60+
return config;
61+
}

helpers/providers/index.ts

+5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { askAnthropicQuestions } from "./anthropic";
55
import { askAzureQuestions } from "./azure";
66
import { askGeminiQuestions } from "./gemini";
77
import { askGroqQuestions } from "./groq";
8+
import { askHuggingfaceQuestions } from "./huggingface";
89
import { askLLMHubQuestions } from "./llmhub";
910
import { askMistralQuestions } from "./mistral";
1011
import { askOllamaQuestions } from "./ollama";
@@ -39,6 +40,7 @@ export async function askModelConfig({
3940

4041
if (framework === "fastapi") {
4142
choices.push({ title: "T-Systems", value: "t-systems" });
43+
choices.push({ title: "Huggingface", value: "huggingface" });
4244
}
4345
const { provider } = await prompts(
4446
{
@@ -76,6 +78,9 @@ export async function askModelConfig({
7678
case "t-systems":
7779
modelConfig = await askLLMHubQuestions({ askModels });
7880
break;
81+
case "huggingface":
82+
modelConfig = await askHuggingfaceQuestions({ askModels });
83+
break;
7984
default:
8085
modelConfig = await askOpenAIQuestions({
8186
openAiKey,

helpers/python.ts

+14-16
Original file line numberDiff line numberDiff line change
@@ -173,35 +173,23 @@ const getAdditionalDependencies = (
173173
}
174174
break;
175175
case "groq":
176-
// Fastembed==0.2.0 does not support python3.13 at the moment
177-
// Fixed the python version less than 3.13
178-
dependencies.push({
179-
name: "python",
180-
version: "^3.11,<3.13",
181-
});
182176
dependencies.push({
183177
name: "llama-index-llms-groq",
184178
version: "0.2.0",
185179
});
186180
dependencies.push({
187-
name: "llama-index-embeddings-fastembed",
188-
version: "^0.2.0",
181+
name: "llama-index-embeddings-huggingface",
182+
version: "^0.3.1",
189183
});
190184
break;
191185
case "anthropic":
192-
// Fastembed==0.2.0 does not support python3.13 at the moment
193-
// Fixed the python version less than 3.13
194-
dependencies.push({
195-
name: "python",
196-
version: "^3.11,<3.13",
197-
});
198186
dependencies.push({
199187
name: "llama-index-llms-anthropic",
200188
version: "0.3.0",
201189
});
202190
dependencies.push({
203-
name: "llama-index-embeddings-fastembed",
204-
version: "^0.2.0",
191+
name: "llama-index-embeddings-huggingface",
192+
version: "^0.3.1",
205193
});
206194
break;
207195
case "gemini":
@@ -234,6 +222,16 @@ const getAdditionalDependencies = (
234222
version: "0.2.4",
235223
});
236224
break;
225+
case "huggingface":
226+
dependencies.push({
227+
name: "llama-index-llms-huggingface",
228+
version: "^0.3.5",
229+
});
230+
dependencies.push({
231+
name: "llama-index-embeddings-huggingface",
232+
version: "^0.3.1",
233+
});
234+
break;
237235
case "t-systems":
238236
dependencies.push({
239237
name: "llama-index-agent-openai",

helpers/types.ts

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ export type ModelProvider =
99
| "gemini"
1010
| "mistral"
1111
| "azure-openai"
12+
| "huggingface"
1213
| "t-systems";
1314
export type ModelConfig = {
1415
provider: ModelProvider;

templates/components/settings/python/settings.py

+32-19
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ def init_settings():
2121
init_mistral()
2222
case "azure-openai":
2323
init_azure_openai()
24+
case "huggingface":
25+
init_huggingface()
2426
case "t-systems":
2527
from .llmhub import init_llmhub
2628

@@ -113,29 +115,40 @@ def init_azure_openai():
113115
)
114116

115117

116-
def init_fastembed():
118+
def init_huggingface_embedding():
117119
try:
118-
from llama_index.embeddings.fastembed import FastEmbedEmbedding
120+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
119121
except ImportError:
120122
raise ImportError(
121-
"FastEmbed support is not installed. Please install it with `poetry add llama-index-embeddings-fastembed`"
123+
"Hugging Face support is not installed. Please install it with `poetry add llama-index-embeddings-huggingface`"
122124
)
123125

124-
embed_model_map: Dict[str, str] = {
125-
# Small and multilingual
126-
"all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
127-
# Large and multilingual
128-
"paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
129-
}
126+
embedding_model = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
127+
backend = os.getenv("EMBEDDING_BACKEND", "onnx") # "torch", "onnx", or "openvino"
128+
trust_remote_code = (
129+
os.getenv("EMBEDDING_TRUST_REMOTE_CODE", "false").lower() == "true"
130+
)
131+
132+
Settings.embed_model = HuggingFaceEmbedding(
133+
model_name=embedding_model,
134+
trust_remote_code=trust_remote_code,
135+
backend=backend,
136+
)
137+
130138

131-
embedding_model = os.getenv("EMBEDDING_MODEL")
132-
if embedding_model is None:
133-
raise ValueError("EMBEDDING_MODEL environment variable is not set")
139+
def init_huggingface():
140+
try:
141+
from llama_index.llms.huggingface import HuggingFaceLLM
142+
except ImportError:
143+
raise ImportError(
144+
"Hugging Face support is not installed. Please install it with `poetry add llama-index-llms-huggingface` and `poetry add llama-index-embeddings-huggingface`"
145+
)
134146

135-
# This will download the model automatically if it is not already downloaded
136-
Settings.embed_model = FastEmbedEmbedding(
137-
model_name=embed_model_map[embedding_model]
147+
Settings.llm = HuggingFaceLLM(
148+
model_name=os.getenv("MODEL"),
149+
tokenizer_name=os.getenv("MODEL"),
138150
)
151+
init_huggingface_embedding()
139152

140153

141154
def init_groq():
@@ -147,8 +160,8 @@ def init_groq():
147160
)
148161

149162
Settings.llm = Groq(model=os.getenv("MODEL"))
150-
# Groq does not provide embeddings, so we use FastEmbed instead
151-
init_fastembed()
163+
# Groq does not provide embeddings, so we use open Sentence Transformer models instead
164+
init_huggingface_embedding()
152165

153166

154167
def init_anthropic():
@@ -168,8 +181,8 @@ def init_anthropic():
168181
}
169182

170183
Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")])
171-
# Anthropic does not provide embeddings, so we use FastEmbed instead
172-
init_fastembed()
184+
# Anthropic does not provide embeddings, so we use open Sentence Transformer models instead
185+
init_huggingface_embedding()
173186

174187

175188
def init_gemini():

0 commit comments

Comments
 (0)