Skip to content

Commit 70e5ab1

Browse files
[GenAI] Add LLaMA support (#7220)
* add llama * add test for tokenizer * make llama 3.1 working * update * add shape test for 70b and 405b * clean up * add tests * update * fix error * calculate rotary embedding in model layer * remove rotary_emb from attention * update feed * update .csproj * Update NuGet.config * fix test * pass device * fix test * update constructor * disable 405b test * update * disable 70b test * use windows only fact * revert change * rename test to LLaMA3_1
1 parent fa8c822 commit 70e5ab1

File tree

57 files changed

+3932
-211
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+3932
-211
lines changed

Microsoft.ML.sln

+23-1
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,11 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.GenAI.Phi.Test
184184
EndProject
185185
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.GenAI.Samples", "docs\samples\Microsoft.ML.GenAI.Samples\Microsoft.ML.GenAI.Samples.csproj", "{1D4AD9A3-19AF-432B-889D-A63FE6D7BD47}"
186186
EndProject
187-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.GenAI.Core.Tests", "test\Microsoft.ML.GenAI.Core.Tests\Microsoft.ML.GenAI.Core.Tests.csproj", "{14AB0804-D4CE-4634-B544-5A8587620783}"
187+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.GenAI.Core.Tests", "test\Microsoft.ML.GenAI.Core.Tests\Microsoft.ML.GenAI.Core.Tests.csproj", "{14AB0804-D4CE-4634-B544-5A8587620783}"
188+
EndProject
189+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.GenAI.LLaMA", "src\Microsoft.ML.GenAI.LLaMA\Microsoft.ML.GenAI.LLaMA.csproj", "{0AA6D5CB-195F-457A-8792-4221E76E6C44}"
190+
EndProject
191+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.GenAI.LLaMA.Tests", "test\Microsoft.ML.GenAI.LLaMA.Tests\Microsoft.ML.GenAI.LLaMA.Tests.csproj", "{D202353D-6FAF-4263-9A01-BDCFBC92391F}"
188192
EndProject
189193
Global
190194
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -878,6 +882,22 @@ Global
878882
{14AB0804-D4CE-4634-B544-5A8587620783}.Release|Any CPU.Build.0 = Release|Any CPU
879883
{14AB0804-D4CE-4634-B544-5A8587620783}.Release|x64.ActiveCfg = Release|Any CPU
880884
{14AB0804-D4CE-4634-B544-5A8587620783}.Release|x64.Build.0 = Release|Any CPU
885+
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
886+
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Debug|Any CPU.Build.0 = Debug|Any CPU
887+
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Debug|x64.ActiveCfg = Debug|Any CPU
888+
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Debug|x64.Build.0 = Debug|Any CPU
889+
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Release|Any CPU.ActiveCfg = Release|Any CPU
890+
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Release|Any CPU.Build.0 = Release|Any CPU
891+
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Release|x64.ActiveCfg = Release|Any CPU
892+
{0AA6D5CB-195F-457A-8792-4221E76E6C44}.Release|x64.Build.0 = Release|Any CPU
893+
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
894+
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Debug|Any CPU.Build.0 = Debug|Any CPU
895+
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Debug|x64.ActiveCfg = Debug|Any CPU
896+
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Debug|x64.Build.0 = Debug|Any CPU
897+
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Release|Any CPU.ActiveCfg = Release|Any CPU
898+
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Release|Any CPU.Build.0 = Release|Any CPU
899+
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Release|x64.ActiveCfg = Release|Any CPU
900+
{D202353D-6FAF-4263-9A01-BDCFBC92391F}.Release|x64.Build.0 = Release|Any CPU
881901
EndGlobalSection
882902
GlobalSection(SolutionProperties) = preSolution
883903
HideSolutionNode = FALSE
@@ -969,6 +989,8 @@ Global
969989
{867FFC34-DFA7-400F-B9BB-85158326CE08} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
970990
{1D4AD9A3-19AF-432B-889D-A63FE6D7BD47} = {DA452A53-2E94-4433-B08C-041EDEC729E6}
971991
{14AB0804-D4CE-4634-B544-5A8587620783} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
992+
{0AA6D5CB-195F-457A-8792-4221E76E6C44} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
993+
{D202353D-6FAF-4263-9A01-BDCFBC92391F} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
972994
EndGlobalSection
973995
GlobalSection(ExtensibilityGlobals) = postSolution
974996
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}

NuGet.config

+4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
<add key="dotnet5-roslyn" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json" />
1414
<add key="mlnet-daily" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/MachineLearning/nuget/v3/index.json" />
1515
<add key="mlnet-assets" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/machinelearning-assets/nuget/v3/index.json" />
16+
<add key="dotnet-libraries-transport" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-libraries-transport/nuget/v3/index.json" />
1617
<add key="dotnet8" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet8/nuget/v3/index.json" />
1718
</packageSources>
1819
<packageSourceMapping>
@@ -40,6 +41,9 @@
4041
<packageSource key="mlnet-assets">
4142
<package pattern="*" />
4243
</packageSource>
44+
<packageSource key="dotnet-libraries-transport">
45+
<package pattern="*" />
46+
</packageSource>
4347
<packageSource key="dotnet8">
4448
<package pattern="*" />
4549
</packageSource>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Text.Json;
6+
using System.Threading.Tasks;
7+
using AutoGen.Core;
8+
using Microsoft.ML.GenAI.Core;
9+
using Microsoft.ML.GenAI.Core.Extension;
10+
using Microsoft.ML.GenAI.LLaMA;
11+
using Microsoft.ML.Tokenizers;
12+
using TorchSharp;
13+
using static TorchSharp.torch;
14+
15+
namespace Microsoft.ML.GenAI.Samples.Llama;
16+
17+
internal class LlamaSample
18+
{
19+
public static async void Run()
20+
{
21+
var device = "cuda";
22+
if (device == "cuda")
23+
{
24+
torch.InitializeDeviceType(DeviceType.CUDA);
25+
}
26+
27+
var defaultType = ScalarType.Float16;
28+
torch.manual_seed(1);
29+
torch.set_default_dtype(defaultType);
30+
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Meta-Llama-3.1-8B-Instruct";
31+
var configName = "config.json";
32+
var originalWeightFolder = Path.Combine(weightFolder, "original");
33+
34+
Console.WriteLine("Loading Llama from huggingface model weight folder");
35+
var stopWatch = System.Diagnostics.Stopwatch.StartNew();
36+
stopWatch.Start();
37+
var tokenizer = LlamaTokenizerHelper.FromPretrained(originalWeightFolder);
38+
var model = LlamaForCausalLM.FromPretrained(weightFolder, configName, layersOnTargetDevice: -1);
39+
40+
var pipeline = new CausalLMPipeline<TiktokenTokenizer, LlamaForCausalLM>(tokenizer, model, device);
41+
42+
var agent = new LlamaCausalLMAgent(pipeline, "assistant")
43+
.RegisterPrintMessage();
44+
45+
var task = """
46+
Write a C# program to print the sum of two numbers. Use top-level statement, put code between ```csharp and ```.
47+
""";
48+
49+
await agent.SendAsync(task);
50+
}
51+
}

docs/samples/Microsoft.ML.GenAI.Samples/Microsoft.ML.GenAI.Samples.csproj

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
<ItemGroup>
1111
<ProjectReference Include="..\..\..\src\Microsoft.ML.GenAI.Core\Microsoft.ML.GenAI.Core.csproj" />
12+
<ProjectReference Include="..\..\..\src\Microsoft.ML.GenAI.LLaMA\Microsoft.ML.GenAI.LLaMA.csproj" />
1213
<ProjectReference Include="..\..\..\src\Microsoft.ML.GenAI.Phi\Microsoft.ML.GenAI.Phi.csproj" />
1314
</ItemGroup>
1415

docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/AutoGenSample.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public static async Task RunAsync()
2626
torch.manual_seed(1);
2727
torch.set_default_dtype(defaultType);
2828
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct";
29-
var pipeline = Utils.LoadPhi3Mini4KFromFolder(weightFolder, device: device);
29+
var pipeline = Utils.LoadPhi3Mini4KFromFolder(weightFolder, device: device, quantizeToInt8: false);
3030

3131
// agent
3232
var agent = new Phi3Agent(pipeline, "assistant")

docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/Utils.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public static ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> LoadPhi3Mini4KFromFo
2020
string weightFolder,
2121
string configName = "config.json",
2222
string device = "cuda",
23-
int modelSizeOnCudaInGB = 16,
23+
int modelSizeOnCudaInGB = 55,
2424
int modelSizeOnMemoryInGB = 64,
2525
int modelSizeOnDiskInGB = 200,
2626
bool quantizeToInt8 = false,

eng/Versions.props

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@
9696
<MicrosoftMLTensorFlowTestModelsVersion>0.0.13-test</MicrosoftMLTensorFlowTestModelsVersion>
9797
<MicrosoftMLTestDatabasesVersion>0.0.6-test</MicrosoftMLTestDatabasesVersion>
9898
<MicrosoftMLTestModelsVersion>0.0.7-test</MicrosoftMLTestModelsVersion>
99-
<MicrosoftMLTestTokenizersVersion>2.0.0-beta.24219.1</MicrosoftMLTestTokenizersVersion>
99+
<MicrosoftMLTestTokenizersVersion>2.0.0-beta.24415.1</MicrosoftMLTestTokenizersVersion>
100100
<SystemDataSqlClientVersion>4.8.6</SystemDataSqlClientVersion>
101101
<SystemDataSQLiteCoreVersion>1.0.118</SystemDataSQLiteCoreVersion>
102102
<XunitCombinatorialVersion>1.6.24</XunitCombinatorialVersion>

src/Microsoft.ML.GenAI.Core/Extension/ModuleExtension.cs

+51
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,57 @@ public static Dictionary<string, string> InferDeviceMapForEachLayer(
197197
return deviceMap;
198198
}
199199

200+
/// <summary>
201+
/// Infer the device map for each layer in the model.
202+
/// The device map is a dictionary where the key is the device id (e.g. "cuda:0") and the value is the memory size in bytes of the device.
203+
/// When inferring the device map, each layer in the model will be placed on the device in the order of the devices list.
204+
/// </summary>
205+
/// <param name="model"></param>
206+
/// <param name="numberOfLayerToBePlaced">a list of key-value pairs where the key is the device id (e.g. "cuda:0") and the value is the number of layers to be placed on the device.
207+
/// If you want to place all remaining layers on the device, set that value to -1.
208+
/// e.g. [{"cuda:0", 2}, {"cpu", -1}], the first 2 layers will be placed on "cuda:0" and the rest will be placed on "cpu".
209+
/// </param>
210+
/// <returns></returns>
211+
public static Dictionary<string, string> InferDeviceMapForEachLayer(
212+
this nn.Module model,
213+
IEnumerable<KeyValuePair<string, int>> numberOfLayerToBePlaced)
214+
{
215+
var layerSizeMap = model.GetSizeForEachDynamicLayerInBytes()
216+
.OrderByDescending(x => x.Value)
217+
.ToList();
218+
219+
var deviceMap = new Dictionary<string, string>();
220+
foreach (var (device, count) in numberOfLayerToBePlaced)
221+
{
222+
if (count != -1)
223+
{
224+
var topK = layerSizeMap.Take(count).ToList();
225+
layerSizeMap = layerSizeMap.Skip(count).ToList();
226+
foreach (var (key, value) in topK)
227+
{
228+
deviceMap[key] = device;
229+
}
230+
}
231+
else
232+
{
233+
foreach (var (key, value) in layerSizeMap)
234+
{
235+
deviceMap[key] = device;
236+
}
237+
238+
layerSizeMap.Clear();
239+
break;
240+
}
241+
}
242+
243+
if (layerSizeMap.Count > 0)
244+
{
245+
throw new ArgumentException("The layer count is not enough to cover all layers, did you forget to set the last layer count to -1?");
246+
}
247+
248+
return deviceMap;
249+
}
250+
200251
internal static string Peek(this nn.Module model)
201252
{
202253
var sb = new StringBuilder();

src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj

+4-7
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,20 @@
88
</PropertyGroup>
99

1010
<ItemGroup>
11+
<PackageReference Include="AutoGen.Core" Version="$(AutoGenVersion)" />
12+
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="$(SemanticKernelVersion)" />
1113
<PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" />
1214
<PackageReference Include="TorchSharp" Version="$(TorchSharpVersion)" />
1315
</ItemGroup>
14-
<!--
15-
16-
<ItemGroup Condition="'$(Configuration)' == 'Debug'">
17-
<PackageReference Include="libtorch-cpu-win-x64" Version="$(LibTorchVersion)" Condition="$([MSBuild]::IsOSPlatform('Windows'))" PrivateAssets="all" />
18-
<PackageReference Include="libtorch-cpu-linux-x64" Version="$(LibTorchVersion)" Condition="$([MSBuild]::IsOSPlatform('Linux'))" PrivateAssets="all" />
19-
<PackageReference Include="libtorch-cpu-osx-x64" Version="$(LibTorchVersion)" Condition="$([MSBuild]::IsOSPlatform('OSX'))" PrivateAssets="all" />
20-
</ItemGroup> -->
2116

2217
<ItemGroup>
2318
<ProjectReference Include="..\Microsoft.ML.Tokenizers\Microsoft.ML.Tokenizers.csproj" />
2419
</ItemGroup>
2520

2621
<ItemGroup>
2722
<InternalsVisibleTo Include="Microsoft.ML.GenAI.Phi" />
23+
<InternalsVisibleTo Include="Microsoft.ML.GenAI.LLaMA" />
24+
<InternalsVisibleTo Include="Microsoft.ML.GenAI.LLaMA.Tests" />
2825
<InternalsVisibleTo Include="Microsoft.ML.GenAI.Phi.Tests" />
2926
<InternalsVisibleTo Include="Microsoft.ML.GenAI.Core.Tests" />
3027
</ItemGroup>

0 commit comments

Comments
 (0)