Skip to content

Commit 6c2d800

Browse files
dcosteaPrashanth Govindarajanjoperezreerhardt
authored
Add Microsoft.Data.Analysis.nuget project (dotnet#2933)
* Add DataFrame object formatter. * Update nuget dependencies. * Apply CR fixes. * Remove ReferenceOutputAssembly added to from Microsoft.Data.Analysys.csproj. * Add Microsoft.Data.Analysis.nuget project. * Move project to src. Fix nuget project settings. * Remove NoBuild property from project. * Remove IncludeBuildOutput and IncludeSymbols from project. * Add VersionPrefix to project. * Add IncludeBuildOutput property. * Add unit tests. * Downgrade from netcoreapp3.1 to netcoreapp3.0 * Upgrade from netcoreapp3.0 to netcoreapp3.1 (dotnet interactive is not compatible with 3.0) * Add netcoreapp3.1 to global settings * Add dotnet 3.1.5 runtime to global settings * Build fixes * Moving MDAI into interactive-extensions folder of the package * Minor refactoring * Respond to PR feedback Co-authored-by: Prashanth Govindarajan <[email protected]> Co-authored-by: Jose Perez Rodriguez <[email protected]> Co-authored-by: Eric Erhardt <[email protected]>
1 parent 6e60307 commit 6c2d800

File tree

5 files changed

+276
-4
lines changed

5 files changed

+276
-4
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System;
6+
using System.Collections.Generic;
7+
using System.Linq;
8+
using System.Threading.Tasks;
9+
using Microsoft.AspNetCore.Html;
10+
using Microsoft.DotNet.Interactive;
11+
using Microsoft.DotNet.Interactive.Formatting;
12+
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;
13+
14+
namespace Microsoft.Data.Analysis.Interactive
15+
{
16+
public class DataFrameKernelExtension : IKernelExtension
17+
{
18+
public Task OnLoadAsync(Kernel kernel)
19+
{
20+
RegisterDataFrame();
21+
22+
return Task.CompletedTask;
23+
}
24+
25+
public static void RegisterDataFrame()
26+
{
27+
Formatter<DataFrame>.Register((df, writer) =>
28+
{
29+
const int MAX = 10000;
30+
const int SIZE = 10;
31+
32+
var uniqueId = DateTime.Now.Ticks;
33+
34+
var header = new List<IHtmlContent>
35+
{
36+
th(i("index"))
37+
};
38+
header.AddRange(df.Columns.Select(c => (IHtmlContent)th(c.Name)));
39+
40+
if (df.Rows.Count > SIZE)
41+
{
42+
var maxMessage = df.Rows.Count > MAX ? $" (showing a max of {MAX} rows)" : string.Empty;
43+
var title = h3[style: "text-align: center;"]($"DataFrame - {df.Rows.Count} rows {maxMessage}");
44+
45+
// table body
46+
var maxRows = Math.Min(MAX, df.Rows.Count);
47+
var rows = new List<List<IHtmlContent>>();
48+
for (var index = 0; index < maxRows; index++)
49+
{
50+
var cells = new List<IHtmlContent>
51+
{
52+
td(i((index)))
53+
};
54+
foreach (var obj in df.Rows[index])
55+
{
56+
cells.Add(td(obj));
57+
}
58+
rows.Add(cells);
59+
}
60+
61+
//navigator
62+
var footer = new List<IHtmlContent>();
63+
BuildHideRowsScript(uniqueId);
64+
65+
var paginateScriptFirst = BuildHideRowsScript(uniqueId) + GotoPageIndex(uniqueId, 0) + BuildPageScript(uniqueId, SIZE);
66+
footer.Add(button[style: "margin: 2px;", onclick: paginateScriptFirst]("⏮"));
67+
68+
var paginateScriptPrevTen = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, -10, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
69+
footer.Add(button[style: "margin: 2px;", onclick: paginateScriptPrevTen]("⏪"));
70+
71+
var paginateScriptPrev = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, -1, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
72+
footer.Add(button[style: "margin: 2px;", onclick: paginateScriptPrev]("◀️"));
73+
74+
footer.Add(b[style: "margin: 2px;"]("Page"));
75+
footer.Add(b[id: $"page_{uniqueId}", style: "margin: 2px;"]("1"));
76+
77+
var paginateScriptNext = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, 1, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
78+
footer.Add(button[style: "margin: 2px;", onclick: paginateScriptNext]("▶️"));
79+
80+
var paginateScriptNextTen = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, 10, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
81+
footer.Add(button[style: "margin: 2px;", onclick: paginateScriptNextTen]("⏩"));
82+
83+
var paginateScriptLast = BuildHideRowsScript(uniqueId) + GotoPageIndex(uniqueId, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
84+
footer.Add(button[style: "margin: 2px;", onclick: paginateScriptLast]("⏭️"));
85+
86+
//table
87+
var t = table[id: $"table_{uniqueId}"](
88+
caption(title),
89+
thead(tr(header)),
90+
tbody(rows.Select(r => tr[style: "display: none"](r))),
91+
tfoot(tr(td[colspan: df.Columns.Count + 1, style: "text-align: center;"](footer)))
92+
);
93+
writer.Write(t);
94+
95+
//show first page
96+
writer.Write($"<script>{BuildPageScript(uniqueId, SIZE)}</script>");
97+
}
98+
else
99+
{
100+
var rows = new List<List<IHtmlContent>>();
101+
for (var index = 0; index < df.Rows.Count; index++)
102+
{
103+
var cells = new List<IHtmlContent>
104+
{
105+
td(i((index)))
106+
};
107+
foreach (var obj in df.Rows[index])
108+
{
109+
cells.Add(td(obj));
110+
}
111+
rows.Add(cells);
112+
}
113+
114+
//table
115+
var t = table[id: $"table_{uniqueId}"](
116+
thead(tr(header)),
117+
tbody(rows.Select(r => tr(r)))
118+
);
119+
writer.Write(t);
120+
}
121+
}, "text/html");
122+
}
123+
124+
private static string BuildHideRowsScript(long uniqueId)
125+
{
126+
var script = $"var allRows = document.querySelectorAll('#table_{uniqueId} tbody tr:nth-child(n)'); ";
127+
script += "for (let i = 0; i < allRows.length; i++) { allRows[i].style.display='none'; } ";
128+
return script;
129+
}
130+
131+
private static string BuildPageScript(long uniqueId, int size)
132+
{
133+
var script = $"var page = parseInt(document.querySelector('#page_{uniqueId}').innerHTML) - 1; ";
134+
script += $"var pageRows = document.querySelectorAll(`#table_{uniqueId} tbody tr:nth-child(n + ${{page * {size} + 1 }})`); ";
135+
script += $"for (let j = 0; j < {size}; j++) {{ pageRows[j].style.display='table-row'; }} ";
136+
return script;
137+
}
138+
139+
private static string GotoPageIndex(long uniqueId, long page)
140+
{
141+
var script = $"document.querySelector('#page_{uniqueId}').innerHTML = {page + 1}; ";
142+
return script;
143+
}
144+
145+
private static string UpdatePageIndex(long uniqueId, int step, long maxPage)
146+
{
147+
var script = $"var page = parseInt(document.querySelector('#page_{uniqueId}').innerHTML) - 1; ";
148+
script += $"page = parseInt(page) + parseInt({step}); ";
149+
script += $"page = page < 0 ? 0 : page; ";
150+
script += $"page = page > {maxPage} ? {maxPage} : page; ";
151+
script += $"document.querySelector('#page_{uniqueId}').innerHTML = page + 1; ";
152+
return script;
153+
}
154+
}
155+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>netcoreapp3.1</TargetFramework>
5+
<IsPackable>false</IsPackable>
6+
</PropertyGroup>
7+
8+
<ItemGroup>
9+
<PackageReference Include="Microsoft.CodeAnalysis.Analyzers" Version="3.0.0">
10+
<PrivateAssets>all</PrivateAssets>
11+
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
12+
</PackageReference>
13+
<PackageReference Include="Microsoft.DotNet.Interactive" Version="1.0.0-beta.20377.1" />
14+
<PackageReference Include="Microsoft.DotNet.Interactive.Formatting" Version="1.0.0-beta.20377.1" />
15+
</ItemGroup>
16+
17+
<ItemGroup>
18+
<ProjectReference Include="..\Microsoft.Data.Analysis\Microsoft.Data.Analysis.csproj" />
19+
</ItemGroup>
20+
21+
</Project>

src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj

+22-4
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,28 @@
1111
<PackageReleaseNotes>Initial preview of robust and extensible types and algorithms for manipulating structured data that supports aggregations, statistical funtions, sorting, grouping, joins, merges, handling missing values and more. </PackageReleaseNotes>
1212
<PackageTags>ML.NET ML Machine Learning Data Science DataFrame Preparation DataView Analytics Exploration</PackageTags>
1313
<GenerateDocumentationFile>true</GenerateDocumentationFile>
14-
<!-- Documentation warnings -->
15-
<NoWarn>$(NoWarn);1591</NoWarn>
14+
<!--
15+
1591: Documentation warnings
16+
NU5100: Warning that gets triggered because a .dll is not placed under lib folder on package. This is by design as we want MDAI to be under interactive-extensions folder.
17+
-->
18+
<NoWarn>$(NoWarn);1591;NU5100</NoWarn>
19+
<TargetsForTfmSpecificContentInPackage>$(TargetsForTfmSpecificContentInPackage);AddMDAIToInteractiveExtensionsFolder</TargetsForTfmSpecificContentInPackage>
1620
</PropertyGroup>
17-
21+
22+
<!-- The following properties are set to package M.D.A.Interactive with the M.D.A nuget package. If M.D.A.I undergoes TFM or dependency changes, we need to update the TargetFramework passed in below-->
23+
<Target Name="AddMDAIToInteractiveExtensionsFolder">
24+
<MSBuild Projects="./../Microsoft.Data.Analysis.Interactive/Microsoft.Data.Analysis.Interactive.csproj"
25+
Targets="_GetBuildOutputFilesWithTfm"
26+
Properties="TargetFramework=netcoreapp3.1"> <!-- Manually hardcoding the TargetFramework to netcoreapp3.1 as that is the one that MDAI targets -->
27+
<Output TaskParameter="TargetOutputs" ItemName="_ItemsToIncludeForInteractive" />
28+
</MSBuild>
29+
30+
<ItemGroup>
31+
<_ItemsToIncludeForInteractive Update="@(_ItemsToIncludeForInteractive)" PackagePath="interactive-extensions/dotnet" />
32+
<TfmSpecificPackageFile Include="@(_ItemsToIncludeForInteractive)" />
33+
</ItemGroup>
34+
</Target>
35+
1836
<ItemGroup>
1937
<None Include="Converters.cs">
2038
<DesignTime>True</DesignTime>
@@ -30,7 +48,7 @@
3048

3149
<ItemGroup>
3250
<PackageReference Include="Apache.Arrow" Version="0.14.1" />
33-
<PackageReference Include="System.Memory" Version="4.5.2" />
51+
<PackageReference Include="System.Memory" Version="4.5.3" />
3452
<PackageReference Include="System.Runtime.CompilerServices.Unsafe" Version="4.5.2" />
3553
<PackageReference Include="System.Buffers" Version="4.5.0" />
3654
<PackageReference Include="System.Text.Encoding" Version="4.3.0" />
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System.Linq;
6+
using Xunit;
7+
using Microsoft.DotNet.Interactive.Formatting;
8+
9+
namespace Microsoft.Data.Analysis.Interactive.Tests
10+
{
11+
public partial class DataFrameInteractiveTests
12+
{
13+
private const string BUTTON_HTML_PART = "button onclick";
14+
private const string TABLE_HTML_PART = "";
15+
16+
public static DataFrame MakeDataFrameWithTwoColumns(int length, bool withNulls = true)
17+
{
18+
DataFrameColumn dataFrameColumn1 = new Int32DataFrameColumn("Int1", Enumerable.Range(0, length).Select(x => x));
19+
DataFrameColumn dataFrameColumn2 = new Int32DataFrameColumn("Int2", Enumerable.Range(10, length).Select(x => x));
20+
if (withNulls)
21+
{
22+
dataFrameColumn1[length / 2] = null;
23+
dataFrameColumn2[length / 2] = null;
24+
}
25+
DataFrame dataFrame = new DataFrame();
26+
dataFrame.Columns.Insert(0, dataFrameColumn1);
27+
dataFrame.Columns.Insert(1, dataFrameColumn2);
28+
return dataFrame;
29+
}
30+
31+
[Fact]
32+
public void LessThanTenRowsDataFrameTest()
33+
{
34+
DataFrame dataFrame = MakeDataFrameWithTwoColumns(length: 5);
35+
DataFrameKernelExtension.RegisterDataFrame();
36+
var html = dataFrame.ToDisplayString("text/html");
37+
38+
Assert.Contains(TABLE_HTML_PART, html);
39+
Assert.DoesNotContain(BUTTON_HTML_PART, html);
40+
}
41+
42+
[Fact]
43+
public void MoreThanTenRowsDataFrameTest()
44+
{
45+
DataFrame dataFrame = MakeDataFrameWithTwoColumns(length: 21);
46+
DataFrameKernelExtension.RegisterDataFrame();
47+
var html = dataFrame.ToDisplayString("text/html");
48+
49+
Assert.Contains(TABLE_HTML_PART, html);
50+
Assert.Contains(BUTTON_HTML_PART, html);
51+
}
52+
53+
[Fact]
54+
public void DataFrameInfoTest()
55+
{
56+
DataFrame dataFrame = MakeDataFrameWithTwoColumns(length: 5);
57+
DataFrameKernelExtension.RegisterDataFrame();
58+
var html = dataFrame.Info().ToDisplayString("text/html");
59+
60+
Assert.Contains(TABLE_HTML_PART, html);
61+
Assert.DoesNotContain(BUTTON_HTML_PART, html);
62+
}
63+
}
64+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<Project Sdk="Microsoft.NET.Sdk" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
2+
<PropertyGroup>
3+
<TargetFramework>netcoreapp3.1</TargetFramework>
4+
</PropertyGroup>
5+
6+
<ItemGroup>
7+
<ProjectReference Include="..\..\src\Microsoft.Data.Analysis.Interactive\Microsoft.Data.Analysis.Interactive.csproj" />
8+
</ItemGroup>
9+
10+
<!-- register for test discovery in Visual Studio -->
11+
<ItemGroup>
12+
<Service Include="{82a7f48d-3b50-4b1e-b82e-3ada8210c358}" />
13+
</ItemGroup>
14+
</Project>

0 commit comments

Comments
 (0)