Skip to content

Commit 04419f1

Browse files
committed
Merge 'origin/master' into hipblas
2 parents bb16eff + d3494bb commit 04419f1

21 files changed

+934
-248
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
*.o
22
*.a
3+
*.so
34
.DS_Store
45
.build/
56
.cache/
@@ -39,8 +40,8 @@ models/*
3940
/vdot
4041
/server
4142
/Pipfile
43+
/embd-input-test
4244
/libllama.so
43-
4445
build-info.h
4546
arm_neon.h
4647
compile_commands.json

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -333,9 +333,9 @@ if (LLAMA_HIPBLAS)
333333
add_library(ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h)
334334
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
335335
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y})
336+
target_compile_definitions(ggml-rocm PRIVATE K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
336337
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
337338
target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::hipblas)
338-
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
339339

340340
if (LLAMA_STATIC)
341341
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")

Makefile

+9-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Define the default target now so that it is always the first target
2-
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple
2+
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple libembdinput.so embd-input-test
33

44
ifdef LLAMA_BUILD_SERVER
55
BUILD_TARGETS += server
@@ -295,7 +295,7 @@ libllama.so: llama.o ggml.o $(OBJS)
295295
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
296296

297297
clean:
298-
rm -vf *.o *.so main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server vdot train-text-from-scratch build-info.h
298+
rm -vf *.o *.so main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server vdot train-text-from-scratch embd-input-test build-info.h
299299

300300
#
301301
# Examples
@@ -328,6 +328,13 @@ save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.
328328
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o llama.o common.o $(OBJS)
329329
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS)
330330

331+
libembdinput.so: examples/embd-input/embd-input.h examples/embd-input/embd-input-lib.cpp build-info.h ggml.o llama.o common.o $(OBJS)
332+
$(CXX) --shared $(CXXFLAGS) $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS)
333+
334+
335+
embd-input-test: libembdinput.so examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
336+
$(CXX) $(CXXFLAGS) $(filter-out %.so,$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput
337+
331338
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o $(OBJS)
332339
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
333340

convert-lora-to-ggml.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,18 @@ def write_tensor_header(
113113

114114
write_file_header(fout, params)
115115
for k, v in model.items():
116+
if k.endswith(".default.weight"):
117+
k = k.replace(".default.weight", ".weight")
118+
if k in ["llama_proj.weight", "llama_proj.bias"]:
119+
continue
116120
if k.endswith("lora_A.weight"):
117121
if v.dtype != torch.float16 and v.dtype != torch.float32:
118122
v = v.float()
119123
v = v.T
120124
else:
121125
v = v.float()
122126

123-
t = v.numpy()
127+
t = v.detach().numpy()
124128
tname = translate_tensor_name(k)
125129
print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
126130
write_tensor_header(fout, tname, t.shape, t.dtype)

examples/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ else()
3939
add_subdirectory(baby-llama)
4040
add_subdirectory(train-text-from-scratch)
4141
add_subdirectory(simple)
42+
add_subdirectory(embd-input)
4243
if (LLAMA_METAL)
4344
add_subdirectory(metal)
4445
endif()

examples/common.cpp

-7
Original file line numberDiff line numberDiff line change
@@ -416,13 +416,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
416416
exit(1);
417417
}
418418

419-
#ifdef GGML_USE_CUBLAS
420-
if (!params.lora_adapter.empty() && params.n_gpu_layers > 0) {
421-
fprintf(stderr, "%s: error: the simultaneous use of LoRAs and GPU acceleration is not supported", __func__);
422-
exit(1);
423-
}
424-
#endif // GGML_USE_CUBLAS
425-
426419
if (escape_prompt) {
427420
process_escapes(params.prompt);
428421
}

examples/embd-input/.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
PandaGPT
2+
MiniGPT-4
3+
*.pth
4+

examples/embd-input/CMakeLists.txt

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
set(TARGET embdinput)
2+
add_library(${TARGET} embd-input-lib.cpp embd-input.h)
3+
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
4+
target_compile_features(${TARGET} PRIVATE cxx_std_11)
5+
if(TARGET BUILD_INFO)
6+
add_dependencies(${TARGET} BUILD_INFO)
7+
endif()
8+
9+
set(TARGET embd-input-test)
10+
add_executable(${TARGET} embd-input-test.cpp)
11+
target_link_libraries(${TARGET} PRIVATE common llama embdinput ${CMAKE_THREAD_LIBS_INIT})
12+
target_compile_features(${TARGET} PRIVATE cxx_std_11)
13+
if(TARGET BUILD_INFO)
14+
add_dependencies(${TARGET} BUILD_INFO)
15+
endif()

examples/embd-input/README.md

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
### Examples for input embedding directly
2+
3+
## Requirement
4+
build `libembdinput.so`
5+
run the following comman in main dir (../../).
6+
```
7+
make
8+
```
9+
10+
## [LLaVA](https://github.com/haotian-liu/LLaVA/) example (llava.py)
11+
12+
1. Obtian LLaVA model (following https://github.com/haotian-liu/LLaVA/ , use https://huggingface.co/liuhaotian/LLaVA-13b-delta-v1-1/).
13+
2. Convert it to ggml format.
14+
3. `llava_projection.pth` is [pytorch_model-00003-of-00003.bin](https://huggingface.co/liuhaotian/LLaVA-13b-delta-v1-1/blob/main/pytorch_model-00003-of-00003.bin).
15+
16+
```
17+
import torch
18+
19+
bin_path = "../LLaVA-13b-delta-v1-1/pytorch_model-00003-of-00003.bin"
20+
pth_path = "./examples/embd_input/llava_projection.pth"
21+
22+
dic = torch.load(bin_path)
23+
used_key = ["model.mm_projector.weight","model.mm_projector.bias"]
24+
torch.save({k: dic[k] for k in used_key}, pth_path)
25+
```
26+
4. Check the path of LLaVA model and `llava_projection.pth` in `llava.py`.
27+
28+
29+
## [PandaGPT](https://github.com/yxuansu/PandaGPT) example (panda_gpt.py)
30+
31+
1. Obtian PandaGPT lora model from https://github.com/yxuansu/PandaGPT. Rename the file to `adapter_model.bin`. Use [convert-lora-to-ggml.py](../../convert-lora-to-ggml.py) to convert it to ggml format.
32+
The `adapter_config.json` is
33+
```
34+
{
35+
"peft_type": "LORA",
36+
"fan_in_fan_out": false,
37+
"bias": null,
38+
"modules_to_save": null,
39+
"r": 32,
40+
"lora_alpha": 32,
41+
"lora_dropout": 0.1,
42+
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"]
43+
}
44+
```
45+
2. Papare the `vicuna` v0 model.
46+
3. Obtain the [ImageBind](https://dl.fbaipublicfiles.com/imagebind/imagebind_huge.pth) model.
47+
4. Clone the PandaGPT source.
48+
```
49+
git clone https://github.com/yxuansu/PandaGPT
50+
```
51+
5. Install the requirement of PandaGPT.
52+
6. Check the path of PandaGPT source, ImageBind model, lora model and vicuna model in panda_gpt.py.
53+
54+
## [MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4/) example (minigpt4.py)
55+
56+
1. Obtain MiniGPT-4 model from https://github.com/Vision-CAIR/MiniGPT-4/ and put it in `embd-input`.
57+
2. Clone the MiniGPT-4 source.
58+
```
59+
git clone https://github.com/Vision-CAIR/MiniGPT-4/
60+
```
61+
3. Install the requirement of PandaGPT.
62+
4. Papare the `vicuna` v0 model.
63+
5. Check the path of MiniGPT-4 source, MiniGPT-4 model and vicuna model in `minigpt4.py`.

0 commit comments

Comments
 (0)