Skip to content

Commit a9c7d2e

Browse files
authored
Basic flashinfer 0.2 support (#2862)
* Basic flashinfer 0.2 support This change does not use any of the new features yet, but makes some small compatibility changes. * Update to flashinfer 0.2.0.post1 * flashinfer: remove `contiguous` calls * Fix flashinfer install * flashinfer: fixup kv cache dtype * Fix some annoying perturbations * More output changes
1 parent afb6c72 commit a9c7d2e

File tree

13 files changed

+154
-184
lines changed

13 files changed

+154
-184
lines changed

flake.lock

Lines changed: 4 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

flake.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
66
};
77
nix-filter.url = "github:numtide/nix-filter";
8-
tgi-nix.url = "github:huggingface/text-generation-inference-nix";
8+
tgi-nix.url = "github:huggingface/text-generation-inference-nix/flashinfer-v0.2";
99
nixpkgs.follows = "tgi-nix/nixpkgs";
1010
flake-utils.url = "github:numtide/flake-utils";
1111
rust-overlay = {

integration-tests/models/__snapshots__/test_compressed_tensors_w8a8_int/test_compressed_tensors_w8a8_int_all_params.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
},
3333
{
3434
"id": 1101,
35-
"logprob": -1.0947266,
35+
"logprob": -1.0136719,
3636
"special": false,
3737
"text": " also"
3838
},
@@ -56,13 +56,13 @@
5656
},
5757
{
5858
"id": 4009,
59-
"logprob": -0.15563965,
59+
"logprob": -0.21923828,
6060
"special": false,
6161
"text": " network"
6262
},
6363
{
6464
"id": 477,
65-
"logprob": -1.4003906,
65+
"logprob": -1.4824219,
6666
"special": false,
6767
"text": " or"
6868
}

integration-tests/models/__snapshots__/test_compressed_tensors_w8a8_int_dynamic_weight/test_compressed_tensors_w8a8_int_dynamic_weight_all_params.json

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"tokens": [
99
{
1010
"id": 1939,
11-
"logprob": -2.2675781,
11+
"logprob": -2.2460938,
1212
"special": false,
1313
"text": "?\n\n"
1414
},
@@ -20,13 +20,13 @@
2020
},
2121
{
2222
"id": 20909,
23-
"logprob": -0.37695312,
23+
"logprob": -0.48608398,
2424
"special": false,
2525
"text": " Learning"
2626
},
2727
{
2828
"id": 4102,
29-
"logprob": -1.9316406,
29+
"logprob": -2.265625,
3030
"special": false,
3131
"text": " "
3232
},
@@ -38,36 +38,36 @@
3838
},
3939
{
4040
"id": 458,
41-
"logprob": -0.80859375,
41+
"logprob": -0.6328125,
4242
"special": false,
4343
"text": " an"
4444
},
4545
{
46-
"id": 3082,
47-
"logprob": -1.4541016,
46+
"id": 20443,
47+
"logprob": -0.1796875,
4848
"special": false,
49-
"text": " area"
49+
"text": " artificial"
5050
},
5151
{
52-
"id": 315,
52+
"id": 11229,
5353
"logprob": 0.0,
5454
"special": false,
55-
"text": " of"
55+
"text": " intelligence"
5656
},
5757
{
58-
"id": 20443,
59-
"logprob": -0.5136719,
58+
"id": 320,
59+
"logprob": -0.37695312,
6060
"special": false,
61-
"text": " artificial"
61+
"text": " ("
6262
},
6363
{
64-
"id": 11229,
64+
"id": 15469,
6565
"logprob": 0.0,
6666
"special": false,
67-
"text": " intelligence"
67+
"text": "AI"
6868
}
6969
],
7070
"top_tokens": null
7171
},
72-
"generated_text": "What is deep learning?\n\nDeep Learning is an area of artificial intelligence"
72+
"generated_text": "What is deep learning?\n\nDeep Learning is an artificial intelligence (AI"
7373
}

0 commit comments

Comments
 (0)