Skip to content

Commit 9917ff6

Browse files
committed
Add StringStats aggregation (#4370)
This commit adds support for the string_stats aggregation introduced in Elasticsearch 7.6.0. It is a metric aggregation but does not implement IMetricAggregation because the type of Missing field is a string and not a double value. Missing is implemented as object as the hierarchy of metric aggregations will be changed as part of #4332, and Missing will be an object. The documentation for StringStats indicates that the distribution is returned in descending probability order, but are modelled as a JSON object. Following internal discussion, this is modelled as a dictionary on the response as it is considered this modelling will not diminish functionality. Closes #4369 (cherry picked from commit cc45fb1)
1 parent a50f8a1 commit 9917ff6

File tree

7 files changed

+312
-4
lines changed

7 files changed

+312
-4
lines changed

src/Nest/Aggregations/AggregateDictionary.cs

+2
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ public ScriptedMetricAggregate ScriptedMetric(string key)
7878

7979
public StatsAggregate Stats(string key) => TryGet<StatsAggregate>(key);
8080

81+
public StringStatsAggregate StringStats(string key) => TryGet<StringStatsAggregate>(key);
82+
8183
public StatsAggregate StatsBucket(string key) => TryGet<StatsAggregate>(key);
8284

8385
public ExtendedStatsAggregate ExtendedStats(string key) => TryGet<ExtendedStatsAggregate>(key);

src/Nest/Aggregations/AggregateFormatter.cs

+57-4
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ internal class AggregateFormatter : IJsonFormatter<IAggregate>
3636

3737
private static readonly byte[] KeysField = JsonWriter.GetEncodedPropertyNameWithoutQuotation(Parser.Keys);
3838
private static readonly byte[] MetaField = JsonWriter.GetEncodedPropertyNameWithoutQuotation(Parser.Meta);
39+
private static readonly byte[] MinLengthField = JsonWriter.GetEncodedPropertyNameWithoutQuotation(Parser.MinLength);
3940

4041
private static readonly AutomataDictionary RootFields = new AutomataDictionary
4142
{
@@ -133,7 +134,7 @@ private IAggregate ReadAggregate(ref JsonReader reader, IJsonFormatterResolver f
133134
aggregate = GetMultiBucketAggregate(ref reader, formatterResolver, ref propertyName, meta);
134135
break;
135136
case 5:
136-
aggregate = GetStatsAggregate(ref reader, meta);
137+
aggregate = GetStatsAggregate(ref reader, formatterResolver, meta);
137138
break;
138139
case 6:
139140
aggregate = GetSingleBucketAggregate(ref reader, formatterResolver, meta);
@@ -397,16 +398,67 @@ private IAggregate GetSingleBucketAggregate(ref JsonReader reader, IJsonFormatte
397398
return new SingleBucketAggregate(subAggregates) { DocCount = docCount, Meta = meta };
398399
}
399400

400-
private IAggregate GetStatsAggregate(ref JsonReader reader, IReadOnlyDictionary<string, object> meta)
401+
private IAggregate GetStringStatsAggregate(ref JsonReader reader, IJsonFormatterResolver formatterResolver,
402+
IReadOnlyDictionary<string, object> meta, long count
403+
)
404+
{
405+
// string stats aggregation
406+
var minLength = reader.ReadInt32();
407+
reader.ReadNext(); // ,
408+
reader.ReadNext(); // "max_length"
409+
reader.ReadNext(); // :
410+
var maxLength = reader.ReadInt32();
411+
reader.ReadNext(); // ,
412+
reader.ReadNext(); // "avg_length"
413+
reader.ReadNext(); // :
414+
var avgLength = reader.ReadDouble();
415+
reader.ReadNext(); // ,
416+
reader.ReadNext(); // "entropy"
417+
reader.ReadNext(); // :
418+
var entropy = reader.ReadDouble();
419+
420+
var aggregate = new StringStatsAggregate
421+
{
422+
Meta = meta,
423+
Count = count,
424+
MinLength = minLength,
425+
MaxLength = maxLength,
426+
AverageLength = avgLength,
427+
Entropy = entropy
428+
};
429+
430+
if (reader.ReadIsValueSeparator())
431+
{
432+
reader.ReadNext(); // "distribution"
433+
reader.ReadNext(); // :
434+
var distribution = formatterResolver
435+
.GetFormatter<IReadOnlyDictionary<string, double>>()
436+
.Deserialize(ref reader, formatterResolver);
437+
438+
// only set distribution if present, leaving empty dictionary when absent
439+
aggregate.Distribution = distribution;
440+
}
441+
442+
return aggregate;
443+
}
444+
445+
private IAggregate GetStatsAggregate(ref JsonReader reader, IJsonFormatterResolver formatterResolver, IReadOnlyDictionary<string, object> meta
446+
)
401447
{
402448
var count = reader.ReadNullableLong().GetValueOrDefault(0);
403449

404450
if (reader.GetCurrentJsonToken() == JsonToken.EndObject)
405451
return new GeoCentroidAggregate { Count = count, Meta = meta };
406452

407453
reader.ReadNext(); // ,
408-
reader.ReadNext(); // "min"
409-
reader.ReadNext(); // :
454+
455+
var property = reader.ReadPropertyNameSegmentRaw();
456+
457+
// string stats aggregation
458+
if (property.EqualsBytes(MinLengthField))
459+
return GetStringStatsAggregate(ref reader, formatterResolver, meta, count);
460+
461+
// stats or extended stats aggregation
410462
var min = reader.ReadNullableDouble();
411463
reader.ReadNext(); // ,
412464
reader.ReadNext(); // "max"
@@ -930,6 +982,7 @@ private static class Parser
930982
public const string Location = "location";
931983
public const string MaxScore = "max_score";
932984
public const string Meta = "meta";
985+
public const string MinLength = "min_length";
933986

934987
public const string Score = "score";
935988

src/Nest/Aggregations/AggregationContainer.cs

+13
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,9 @@ public interface IAggregationContainer
259259
[DataMember(Name = "median_absolute_deviation")]
260260
IMedianAbsoluteDeviationAggregation MedianAbsoluteDeviation { get; set; }
261261

262+
[DataMember(Name = "string_stats")]
263+
IStringStatsAggregation StringStats { get; set; }
264+
262265
void Accept(IAggregationVisitor visitor);
263266
}
264267

@@ -377,6 +380,8 @@ public class AggregationContainer : IAggregationContainer
377380

378381
public IMedianAbsoluteDeviationAggregation MedianAbsoluteDeviation { get; set; }
379382

383+
public IStringStatsAggregation StringStats { get; set; }
384+
380385
public void Accept(IAggregationVisitor visitor)
381386
{
382387
if (visitor.Scope == AggregationVisitorScope.Unknown) visitor.Scope = AggregationVisitorScope.Aggregation;
@@ -526,6 +531,8 @@ public class AggregationContainerDescriptor<T> : DescriptorBase<AggregationConta
526531

527532
IMedianAbsoluteDeviationAggregation IAggregationContainer.MedianAbsoluteDeviation { get; set; }
528533

534+
IStringStatsAggregation IAggregationContainer.StringStats { get; set; }
535+
529536
public void Accept(IAggregationVisitor visitor)
530537
{
531538
if (visitor.Scope == AggregationVisitorScope.Unknown) visitor.Scope = AggregationVisitorScope.Aggregation;
@@ -818,6 +825,12 @@ Func<MedianAbsoluteDeviationAggregationDescriptor<T>, IMedianAbsoluteDeviationAg
818825
) =>
819826
_SetInnerAggregation(name, selector, (a, d) => a.MedianAbsoluteDeviation = d);
820827

828+
/// <inheritdoc cref="IStringStatsAggregation"/>
829+
public AggregationContainerDescriptor<T> StringStats(string name,
830+
Func<StringStatsAggregationDescriptor<T>, IStringStatsAggregation> selector
831+
) =>
832+
_SetInnerAggregation(name, selector, (a, d) => a.StringStats = d);
833+
821834
/// <summary>
822835
/// Fluent methods do not assign to properties on `this` directly but on IAggregationContainers inside
823836
/// `this.Aggregations[string, IContainer]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
using System.Collections.Generic;
2+
using Elasticsearch.Net;
3+
4+
namespace Nest
5+
{
6+
public class StringStatsAggregate : MetricAggregateBase
7+
{
8+
/// <summary>
9+
/// The average length computed over all terms.
10+
/// </summary>
11+
public double AverageLength { get; set; }
12+
13+
/// <summary>
14+
/// The number of non-empty fields counted.
15+
/// </summary>
16+
public long Count { get; set; }
17+
18+
/// <summary>
19+
/// The length of the longest term.
20+
/// </summary>
21+
public int MaxLength { get; set; }
22+
23+
/// <summary>
24+
/// The length of the shortest term.
25+
/// </summary>
26+
public int MinLength { get; set; }
27+
28+
/// <summary>
29+
/// The Shannon Entropy value computed over all terms collected by the aggregation.
30+
/// Shannon entropy quantifies the amount of information contained in the field.
31+
/// It is a very useful metric for measuring a wide range of properties of a data set, such as diversity, similarity, randomness etc.
32+
/// </summary>
33+
public double Entropy { get; set; }
34+
35+
/// <summary>
36+
/// The probability of each character appearing in all terms.
37+
/// </summary>
38+
public IReadOnlyDictionary<string, double> Distribution { get; set; } = EmptyReadOnly<string, double>.Dictionary;
39+
}
40+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq.Expressions;
4+
using System.Runtime.Serialization;
5+
using Elasticsearch.Net.Utf8Json;
6+
7+
namespace Nest
8+
{
9+
/// <summary>
10+
/// A multi-value metrics aggregation that computes statistics over string values extracted from the aggregated documents.
11+
/// These values can be retrieved either from specific keyword fields in the documents or can be generated by a provided script.
12+
/// <para />
13+
/// Available in Elasticsearch 7.6.0+ with at least basic license level
14+
/// </summary>
15+
[InterfaceDataContract]
16+
[ReadAs(typeof(StringStatsAggregation))]
17+
public interface IStringStatsAggregation : IAggregation
18+
{
19+
/// <summary>
20+
/// The field to perform the aggregation on
21+
/// </summary>
22+
[DataMember(Name = "field")]
23+
Field Field { get; set; }
24+
25+
/// <summary>
26+
/// A value to use for documents missing a value for the field
27+
/// </summary>
28+
[DataMember(Name = "missing")]
29+
object Missing { get; set; }
30+
31+
/// <summary>
32+
/// Compute the string stats based on a script
33+
/// </summary>
34+
[DataMember(Name = "script")]
35+
IScript Script { get; set; }
36+
37+
/// <summary>
38+
/// Include the probability distribution for all characters in the response.
39+
/// </summary>
40+
[DataMember(Name = "show_distribution")]
41+
bool? ShowDistribution { get; set; }
42+
}
43+
44+
/// <inheritdoc cref="IStringStatsAggregation"/>
45+
public class StringStatsAggregation : AggregationBase, IStringStatsAggregation
46+
{
47+
internal StringStatsAggregation() { }
48+
49+
public StringStatsAggregation(string name, Field field) : base(name) => Field = field;
50+
51+
internal override void WrapInContainer(AggregationContainer c) => c.StringStats = this;
52+
53+
/// <inheritdoc />
54+
public Field Field { get; set; }
55+
56+
/// <inheritdoc />
57+
public object Missing { get; set; }
58+
59+
/// <inheritdoc />
60+
public IScript Script { get; set; }
61+
62+
/// <inheritdoc />
63+
public bool? ShowDistribution { get; set; }
64+
}
65+
66+
/// <inheritdoc cref="IStringStatsAggregation"/>
67+
public class StringStatsAggregationDescriptor<T>
68+
: DescriptorBase<StringStatsAggregationDescriptor<T>, IStringStatsAggregation>, IStringStatsAggregation
69+
where T : class
70+
{
71+
Field IStringStatsAggregation.Field { get; set; }
72+
IDictionary<string, object> IAggregation.Meta { get; set; }
73+
object IStringStatsAggregation.Missing { get; set; }
74+
string IAggregation.Name { get; set; }
75+
76+
IScript IStringStatsAggregation.Script { get; set; }
77+
78+
bool? IStringStatsAggregation.ShowDistribution { get; set; }
79+
80+
/// <inheritdoc cref="IStringStatsAggregation.Field"/>
81+
public StringStatsAggregationDescriptor<T> Field(Field field) => Assign(field, (a, v) => a.Field = v);
82+
83+
/// <inheritdoc cref="IStringStatsAggregation.Field"/>
84+
public StringStatsAggregationDescriptor<T> Field<TValue>(Expression<Func<T, TValue>> field) => Assign(field, (a, v) => a.Field = v);
85+
86+
/// <inheritdoc cref="IStringStatsAggregation.Script"/>
87+
public StringStatsAggregationDescriptor<T> Script(string script) => Assign((InlineScript)script, (a, v) => a.Script = v);
88+
89+
/// <inheritdoc cref="IStringStatsAggregation.Script"/>
90+
public StringStatsAggregationDescriptor<T> Script(Func<ScriptDescriptor, IScript> scriptSelector) =>
91+
Assign(scriptSelector, (a, v) => a.Script = v?.Invoke(new ScriptDescriptor()));
92+
93+
/// <inheritdoc cref="IStringStatsAggregation.Missing"/>
94+
public StringStatsAggregationDescriptor<T> Missing(object missing) => Assign(missing, (a, v) => a.Missing = v);
95+
96+
/// <inheritdoc cref="IAggregation.Meta"/>
97+
public StringStatsAggregationDescriptor<T> Meta(Func<FluentDictionary<string, object>, FluentDictionary<string, object>> selector) =>
98+
Assign(selector, (a, v) => a.Meta = v?.Invoke(new FluentDictionary<string, object>()));
99+
100+
/// <inheritdoc cref="IStringStatsAggregation.ShowDistribution"/>
101+
public StringStatsAggregationDescriptor<T> ShowDistribution(bool? showDistribution = true) =>
102+
Assign(showDistribution, (a, v) => a.ShowDistribution = v);
103+
}
104+
}

src/Nest/Aggregations/Visitor/AggregationVisitor.cs

+4
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ public interface IAggregationVisitor
127127
void Visit(ICompositeAggregation aggregation);
128128

129129
void Visit(IMedianAbsoluteDeviationAggregation aggregation);
130+
131+
void Visit(IStringStatsAggregation aggregation);
130132
}
131133

132134
public class AggregationVisitor : IAggregationVisitor
@@ -239,6 +241,8 @@ public virtual void Visit(ICompositeAggregation aggregation) { }
239241

240242
public virtual void Visit(IMedianAbsoluteDeviationAggregation aggregation) { }
241243

244+
public virtual void Visit(IStringStatsAggregation aggregation) { }
245+
242246
public virtual void Visit(IAggregation aggregation) { }
243247

244248
public virtual void Visit(IAggregationContainer aggregationContainer) { }

0 commit comments

Comments
 (0)