Skip to content

Commit 8cbc1fe

Browse files
authored
Return distinct items from GetMany and SourceMany (#4353)
This commit fixes a bug where a GetMany or SourceMany API call with a repeated id would return a cartesian product of id and documents. - enumerate only distinct ids when retrieving hits or source from GetMany and SourceMany, so that the same id input to either will return only a single document per target index. - fix a bug in the MultiGetRequestFormatter whereby the document index is removed when a request index is specified, without checking whether the document index matches the request index. Fixes #4342
1 parent 5bca3a8 commit 8cbc1fe

File tree

4 files changed

+185
-23
lines changed

4 files changed

+185
-23
lines changed

src/Nest/Document/Multiple/MultiGet/Request/MultiGetRequestJsonConverter.cs renamed to src/Nest/Document/Multiple/MultiGet/Request/MultiGetRequestFormatter.cs

+26-9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using System.Linq;
34
using Elasticsearch.Net.Utf8Json;
45

@@ -20,12 +21,28 @@ public void Serialize(ref JsonWriter writer, IMultiGetRequest value, IJsonFormat
2021
return;
2122
}
2223

23-
var docs = value.Documents.Select(d =>
24-
{
25-
if (value.Index != null) d.Index = null;
26-
return d;
27-
})
28-
.ToList();
24+
List<IMultiGetOperation> docs;
25+
26+
// if an index is specified at the request level and all documents have the same index, remove the index
27+
if (value.Index != null)
28+
{
29+
var settings = formatterResolver.GetConnectionSettings();
30+
var resolvedIndex = value.Index.GetString(settings);
31+
docs = value.Documents.Select(d =>
32+
{
33+
if (d.Index == null)
34+
return d;
35+
36+
// TODO: not nice to resolve index for each doc here for comparison, only for it to be resolved later in serialization.
37+
// Might be better to simply remove the flattening logic.
38+
var docIndex = d.Index.GetString(settings);
39+
if (string.Equals(resolvedIndex, docIndex)) d.Index = null;
40+
return d;
41+
})
42+
.ToList();
43+
}
44+
else
45+
docs = value.Documents.ToList();
2946

3047
var flatten = docs.All(p => p.CanBeFlattened);
3148

@@ -41,11 +58,11 @@ public void Serialize(ref JsonWriter writer, IMultiGetRequest value, IJsonFormat
4158
if (index > 0)
4259
writer.WriteValueSeparator();
4360

44-
var id = docs[index];
61+
var doc = docs[index];
4562
if (flatten)
46-
IdFormatter.Serialize(ref writer, id.Id, formatterResolver);
63+
IdFormatter.Serialize(ref writer, doc.Id, formatterResolver);
4764
else
48-
formatter.Serialize(ref writer, id, formatterResolver);
65+
formatter.Serialize(ref writer, doc, formatterResolver);
4966
}
5067
writer.WriteEndArray();
5168
writer.WriteEndObject();

src/Nest/Document/Multiple/MultiGet/Response/MultiGetHitJsonConverter.cs

+3
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ public MultiGetResponse Deserialize(ref JsonReader reader, IJsonFormatterResolve
3636
responses.Add(reader.ReadNextBlockSegment());
3737
break;
3838
}
39+
40+
// skip any other properties that are not "docs"
41+
reader.ReadNextBlock();
3942
}
4043

4144
if (responses.Count == 0)

src/Nest/Document/Multiple/MultiGet/Response/MultiGetResponse.cs

+30-12
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,28 @@ public FieldValues GetFieldValues<T>(string id) where T : class
2727
return multiHit?.Fields ?? FieldValues.Empty;
2828
}
2929

30+
/// <summary>
31+
/// Retrieves the hits for each distinct id.
32+
/// </summary>
33+
/// <param name="ids">The ids to retrieve source for</param>
34+
/// <typeparam name="T">The document type for the hits to return</typeparam>
35+
/// <returns>An IEnumerable{T} of hits</returns>
3036
public IEnumerable<IMultiGetHit<T>> GetMany<T>(IEnumerable<string> ids) where T : class
3137
{
32-
var docs = Hits.OfType<IMultiGetHit<T>>();
33-
return from d in docs
34-
join id in ids on d.Id equals id
35-
select d;
38+
HashSet<string> seenIndices = null;
39+
foreach (var id in ids.Distinct())
40+
{
41+
if (seenIndices == null)
42+
seenIndices = new HashSet<string>();
43+
else
44+
seenIndices.Clear();
45+
46+
foreach (var doc in Hits.OfType<IMultiGetHit<T>>())
47+
{
48+
if (string.Equals(doc.Id, id) && seenIndices.Add(doc.Index))
49+
yield return doc;
50+
}
51+
}
3652
}
3753

3854
public IEnumerable<IMultiGetHit<T>> GetMany<T>(IEnumerable<long> ids) where T : class =>
@@ -46,14 +62,16 @@ public T Source<T>(string id) where T : class
4662

4763
public T Source<T>(long id) where T : class => Source<T>(id.ToString(CultureInfo.InvariantCulture));
4864

49-
public IEnumerable<T> SourceMany<T>(IEnumerable<string> ids) where T : class
50-
{
51-
var docs = Hits.OfType<IMultiGetHit<T>>();
52-
return from d in docs
53-
join id in ids on d.Id equals id
54-
where d.Found
55-
select d.Source;
56-
}
65+
/// <summary>
66+
/// Retrieves the source, if available, for each distinct id.
67+
/// </summary>
68+
/// <param name="ids">The ids to retrieve source for</param>
69+
/// <typeparam name="T">The document type for the hits to return</typeparam>
70+
/// <returns>An IEnumerable{T} of sources</returns>
71+
public IEnumerable<T> SourceMany<T>(IEnumerable<string> ids) where T : class =>
72+
from hit in GetMany<T>(ids)
73+
where hit.Found
74+
select hit.Source;
5775

5876
public IEnumerable<T> SourceMany<T>(IEnumerable<long> ids) where T : class =>
5977
SourceMany<T>(ids.Select(i => i.ToString(CultureInfo.InvariantCulture)));

tests/Tests/Document/Multiple/MultiGet/GetManyApiTests.cs

+126-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.Globalization;
34
using System.Linq;
45
using System.Threading.Tasks;
56
using Elastic.Xunit.XunitPlumbing;
@@ -12,12 +13,12 @@
1213

1314
namespace Tests.Document.Multiple.MultiGet
1415
{
15-
public class GetManyApiTests : IClusterFixture<ReadOnlyCluster>
16+
public class GetManyApiTests : IClusterFixture<WritableCluster>
1617
{
1718
private readonly IElasticClient _client;
1819
private readonly IEnumerable<long> _ids = Developer.Developers.Select(d => d.Id).Take(10);
1920

20-
public GetManyApiTests(ReadOnlyCluster cluster) => _client = cluster.Client;
21+
public GetManyApiTests(WritableCluster cluster) => _client = cluster.Client;
2122

2223
[I] public void UsesDefaultIndexAndInferredType()
2324
{
@@ -43,6 +44,129 @@ [I] public async Task UsesDefaultIndexAndInferredTypeAsync()
4344
}
4445
}
4546

47+
[I] public async Task ReturnsDocMatchingDistinctIds()
48+
{
49+
var id = _ids.First();
50+
51+
var response = await _client.GetManyAsync<Developer>(new[] { id, id, id });
52+
response.Count().Should().Be(1);
53+
foreach (var hit in response)
54+
{
55+
hit.Index.Should().NotBeNullOrWhiteSpace();
56+
hit.Id.Should().Be(id.ToString(CultureInfo.InvariantCulture));
57+
hit.Found.Should().BeTrue();
58+
}
59+
}
60+
61+
[I] public void ReturnsDocsMatchingDistinctIdsFromDifferentIndices()
62+
{
63+
var developerIndex = Nest.Indices.Index<Developer>();
64+
var indexName = developerIndex.GetString(_client.ConnectionSettings);
65+
var reindexName = $"{indexName}-getmany-distinctids";
66+
67+
var reindexResponse = _client.ReindexOnServer(r => r
68+
.Source(s => s
69+
.Index(developerIndex)
70+
.Query<Developer>(q => q
71+
.Ids(ids => ids.Values(_ids))
72+
)
73+
)
74+
.Destination(d => d
75+
.Index(reindexName))
76+
.Refresh()
77+
);
78+
79+
if (!reindexResponse.IsValid)
80+
throw new Exception($"problem reindexing documents for integration test: {reindexResponse.DebugInformation}");
81+
82+
var id = _ids.First();
83+
84+
var multiGetResponse = _client.MultiGet(s => s
85+
.RequestConfiguration(r => r.ThrowExceptions())
86+
.Get<Developer>(m => m
87+
.Id(id)
88+
.Index(indexName)
89+
)
90+
.Get<Developer>(m => m
91+
.Id(id)
92+
.Index(reindexName)
93+
)
94+
);
95+
96+
var response = multiGetResponse.GetMany<Developer>(new [] { id, id });
97+
98+
response.Count().Should().Be(2);
99+
foreach (var hit in response)
100+
{
101+
hit.Index.Should().NotBeNullOrWhiteSpace();
102+
hit.Id.Should().NotBeNullOrWhiteSpace();
103+
hit.Found.Should().BeTrue();
104+
}
105+
}
106+
107+
[I] public void ReturnsDocsMatchingDistinctIdsFromDifferentIndicesWithRequestLevelIndex()
108+
{
109+
var developerIndex = Nest.Indices.Index<Developer>();
110+
var indexName = developerIndex.GetString(_client.ConnectionSettings);
111+
var reindexName = $"{indexName}-getmany-distinctidsindex";
112+
113+
var reindexResponse = _client.ReindexOnServer(r => r
114+
.Source(s => s
115+
.Index(developerIndex)
116+
.Query<Developer>(q => q
117+
.Ids(ids => ids.Values(_ids))
118+
)
119+
)
120+
.Destination(d => d
121+
.Index(reindexName))
122+
.Refresh()
123+
);
124+
125+
if (!reindexResponse.IsValid)
126+
throw new Exception($"problem reindexing documents for integration test: {reindexResponse.DebugInformation}");
127+
128+
var id = _ids.First();
129+
130+
var multiGetResponse = _client.MultiGet(s => s
131+
.Index(indexName)
132+
.RequestConfiguration(r => r.ThrowExceptions())
133+
.Get<Developer>(m => m
134+
.Id(id)
135+
)
136+
.Get<Developer>(m => m
137+
.Id(id)
138+
.Index(reindexName)
139+
)
140+
);
141+
142+
var response = multiGetResponse.GetMany<Developer>(new [] { id, id });
143+
144+
response.Count().Should().Be(2);
145+
var seenIndices = new HashSet<string>(2);
146+
147+
foreach (var hit in response)
148+
{
149+
hit.Index.Should().NotBeNullOrWhiteSpace();
150+
seenIndices.Add(hit.Index);
151+
hit.Id.Should().NotBeNullOrWhiteSpace();
152+
hit.Found.Should().BeTrue();
153+
}
154+
155+
seenIndices.Should().HaveCount(2).And.Contain(new [] { indexName, reindexName });
156+
}
157+
158+
[I] public async Task ReturnsSourceMatchingDistinctIds()
159+
{
160+
var id = _ids.First();
161+
162+
var sources = await _client.SourceManyAsync<Developer>(new[] { id, id, id });
163+
sources.Count().Should().Be(1);
164+
foreach (var hit in sources)
165+
{
166+
hit.Id.Should().Be(id);
167+
}
168+
}
169+
46170
[I] public async Task CanHandleNotFoundResponses()
47171
{
48172
var response = await _client.GetManyAsync<Developer>(_ids.Select(i => i * 100));

0 commit comments

Comments
 (0)