From 4623489c697d91b73d72facd644d338bc441961c Mon Sep 17 00:00:00 2001 From: Russ Cam Date: Wed, 29 Aug 2018 10:21:57 +1000 Subject: [PATCH] Support Ids in MultiTermVectors API This commit adds support for providing a set of Ids to MultiTermVectors API to be used in conjunction with index and type provided in the URI. Index() and Type() methods added to MultiTermVectorOperation to allow the default typeof(T) values to be overidden. Closes #3219 --- .../ElasticClient-MultiTermVectors.cs | 6 +- .../MultiTermVectorOperation.cs | 123 +++++++++++++++++- .../MultiTermVectorsRequest.cs | 55 +++++++- .../MultiTermVectorsApiTests.cs | 84 +++++++++++- 4 files changed, 251 insertions(+), 17 deletions(-) diff --git a/src/Nest/Document/Multiple/MultiTermVectors/ElasticClient-MultiTermVectors.cs b/src/Nest/Document/Multiple/MultiTermVectors/ElasticClient-MultiTermVectors.cs index a592e6acf18..889cda86a14 100644 --- a/src/Nest/Document/Multiple/MultiTermVectors/ElasticClient-MultiTermVectors.cs +++ b/src/Nest/Document/Multiple/MultiTermVectors/ElasticClient-MultiTermVectors.cs @@ -14,13 +14,13 @@ public partial interface IElasticClient /// The descriptor describing the multi termvectors operation IMultiTermVectorsResponse MultiTermVectors(Func selector = null); - /// + /// IMultiTermVectorsResponse MultiTermVectors(IMultiTermVectorsRequest request); - /// + /// Task MultiTermVectorsAsync(Func selector = null, CancellationToken cancellationToken = default(CancellationToken)); - /// + /// Task MultiTermVectorsAsync(IMultiTermVectorsRequest request, CancellationToken cancellationToken = default(CancellationToken)); } diff --git a/src/Nest/Document/Multiple/MultiTermVectors/MultiTermVectorOperation.cs b/src/Nest/Document/Multiple/MultiTermVectors/MultiTermVectorOperation.cs index 938f6aee29f..e5ca292d491 100644 --- a/src/Nest/Document/Multiple/MultiTermVectors/MultiTermVectorOperation.cs +++ b/src/Nest/Document/Multiple/MultiTermVectors/MultiTermVectorOperation.cs @@ -4,42 +4,121 @@ namespace Nest { + /// + /// An operation to define the calculation of + /// term vectors when using Multi termvectors API + /// public interface IMultiTermVectorOperation { + /// + /// The index in which the document resides + /// [JsonProperty("_index")] IndexName Index { get; set; } + + /// + /// The type of the document + /// [JsonProperty("_type")] TypeName Type { get; set; } + + /// + /// The id of the document + /// [JsonProperty("_id")] Id Id { get; set; } + + /// + /// A document not indexed in Elasticsearch, + /// to generate term vectors for + /// [JsonProperty("doc")] [JsonConverter(typeof(SourceConverter))] object Document { get; set; } + + /// + /// The document field to generate term + /// vectors for + /// [JsonProperty("fields")] + // TODO: Rename to Fields in 7.x Fields StoredFields { get; set; } + + /// + /// Whether to include the start and end offsets. + /// Default is true. + /// [JsonProperty("offsets")] bool? Offsets { get; set; } + + /// + /// Whether to include the term payloads as + /// base64 encoded bytes. Default is true + /// [JsonProperty("payloads")] bool? Payloads { get; set; } + + /// + /// Whether to include the term positions. + /// Default is true + /// [JsonProperty("positions")] bool? Positions { get; set; } + + /// + /// Whether to include term statistics. When set to true, + /// - total term frequency (how often a term occurs in all documents) + /// - document frequency (the number of documents containing the current term) + /// will be returned. Default is false since + /// term statistics can have a large performance impact. + /// [JsonProperty("term_statistics")] bool? TermStatistics { get; set; } + + /// + /// Whether to include field statistics. When set to false, + /// - document count (how many documents contain this field) + /// - sum of document frequencies (the sum of document frequencies for all terms in this field) + /// - sum of total term frequencies (the sum of total term frequencies of each term in this field) + /// will be omitted. Default is true. + /// [JsonProperty("field_statistics")] bool? FieldStatistics { get; set; } + + /// + /// Filter terms based on their tf-idf scores. + /// This can be useful in order find out a good characteristic + /// vector of a document. + /// [JsonProperty("filter")] ITermVectorFilter Filter { get; set; } + + /// + /// The version number + /// [JsonProperty("version")] long? Version { get; set; } + + /// + /// The type of version + /// [JsonProperty("version_type")] VersionType? VersionType { get; set; } + + /// + /// When requesting term vectors for , + /// a shard to get the statistics from is randomly selected. + /// Use only to hit a particular shard. + /// [JsonProperty("routing")] Routing Routing { get; set; } } + /// public class MultiTermVectorOperation : IMultiTermVectorOperation where T : class { + private Routing _routing; public MultiTermVectorOperation(Id id) { @@ -48,21 +127,33 @@ public MultiTermVectorOperation(Id id) this.Type = typeof (T); } + /// public IndexName Index { get; set; } + /// public TypeName Type { get; set; } + /// public Id Id { get; set; } + /// public object Document { get; set; } + /// public Fields StoredFields { get; set; } + /// public bool? Offsets { get; set; } + /// public bool? Payloads { get; set; } + /// public bool? Positions { get; set; } + /// public bool? TermStatistics { get; set; } + /// public bool? FieldStatistics { get; set; } + /// public ITermVectorFilter Filter { get; set; } + /// public long? Version { get; set; } + /// public VersionType? VersionType { get; set; } - - private Routing _routing; + /// public Routing Routing { get => _routing ?? (Document == null ? null : new Routing(Document)); @@ -70,9 +161,12 @@ public Routing Routing } } + /// public class MultiTermVectorOperationDescriptor : DescriptorBase, IMultiTermVectorOperation>, IMultiTermVectorOperation where T : class { + private Routing _routing; + IndexName IMultiTermVectorOperation.Index { get; set; } = typeof (T); TypeName IMultiTermVectorOperation.Type { get; set; } = typeof (T); Id IMultiTermVectorOperation.Id { get; set; } @@ -86,40 +180,59 @@ public class MultiTermVectorOperationDescriptor : DescriptorBase _routing ?? (Self.Document == null ? null : new Routing(Self.Document)); set => _routing = value; } + /// + // TODO: Rename to Fields in 7.x public MultiTermVectorOperationDescriptor StoredFields(Func, IPromise> fields) => Assign(a => a.StoredFields = fields?.Invoke(new FieldsDescriptor())?.Value); + /// + // TODO: Rename to Fields in 7.x public MultiTermVectorOperationDescriptor StoredFields(Fields fields) => Assign(a => a.StoredFields = fields); - public MultiTermVectorOperationDescriptor Id(Id id) => Assign(a=>a.Id = id); + /// + public MultiTermVectorOperationDescriptor Id(Id id) => Assign(a=> a.Id = id); + + /// + public MultiTermVectorOperationDescriptor Index(IndexName index) => Assign(a => a.Index = index); + + /// + public MultiTermVectorOperationDescriptor Type(TypeName type) => Assign(a=> a.Type = type); + /// public MultiTermVectorOperationDescriptor Document(T document) => Assign(a => a.Document = document); + /// public MultiTermVectorOperationDescriptor Offsets(bool? offsets = true) => Assign(a => a.Offsets = offsets); + /// public MultiTermVectorOperationDescriptor Payloads(bool? payloads = true) => Assign(a => a.Payloads = payloads); + /// public MultiTermVectorOperationDescriptor Positions(bool? positions = true) => Assign(a => a.Positions = positions); + /// public MultiTermVectorOperationDescriptor TermStatistics(bool? termStatistics = true) => Assign(a => a.TermStatistics = termStatistics); + /// public MultiTermVectorOperationDescriptor FieldStatistics(bool? fieldStatistics = true) => Assign(a => a.FieldStatistics = fieldStatistics); + /// public MultiTermVectorOperationDescriptor Filter(Func filterSelector) => Assign(a => a.Filter = filterSelector?.Invoke(new TermVectorFilterDescriptor())); + /// public MultiTermVectorOperationDescriptor Version(long? version) => Assign(a => a.Version = version); + /// public MultiTermVectorOperationDescriptor VersionType(VersionType? versionType) => Assign(a => a.VersionType = versionType); + /// public MultiTermVectorOperationDescriptor Routing(Routing routing) => Assign(a => a.Routing = routing); } } diff --git a/src/Nest/Document/Multiple/MultiTermVectors/MultiTermVectorsRequest.cs b/src/Nest/Document/Multiple/MultiTermVectors/MultiTermVectorsRequest.cs index 6e4f9274f4b..c385adb4d35 100644 --- a/src/Nest/Document/Multiple/MultiTermVectors/MultiTermVectorsRequest.cs +++ b/src/Nest/Document/Multiple/MultiTermVectors/MultiTermVectorsRequest.cs @@ -5,43 +5,84 @@ namespace Nest { + /// + /// A Multi termvectors API request + /// public partial interface IMultiTermVectorsRequest { + /// + /// The documents for which to generate term vectors + /// [JsonProperty("docs")] IEnumerable Documents { get; set; } + + /// + /// The ids of documents within the same index and type + /// for which to generate term vectors. Must be used in + /// conjunction with and + /// + [JsonProperty("ids")] + IEnumerable Ids { get; set; } } + /// public partial class MultiTermVectorsRequest { + /// public IEnumerable Documents { get; set; } + + /// + public IEnumerable Ids { get; set; } } + /// [DescriptorFor("Mtermvectors")] public partial class MultiTermVectorsDescriptor { - private List _operations = new List(); + private List _operations; + + private List Operations => + this._operations ?? (this._operations = new List()); + IEnumerable IMultiTermVectorsRequest.Documents { - get { return this._operations; } - set { this._operations = value?.ToList(); } + get => this._operations; + set => this._operations = value?.ToList(); } + IEnumerable IMultiTermVectorsRequest.Ids { get; set; } + + // TODO: Rename to Documents in 7.x + /// + /// A document for which to generate term vectors + /// public MultiTermVectorsDescriptor Get(Func, IMultiTermVectorOperation> getSelector) where T : class => - Assign(a => this._operations.AddIfNotNull(getSelector?.Invoke(new MultiTermVectorOperationDescriptor()))); + Assign(a => this.Operations.AddIfNotNull(getSelector?.Invoke(new MultiTermVectorOperationDescriptor()))); + // TODO: Rename to Documents in 7.x + /// public MultiTermVectorsDescriptor GetMany(IEnumerable ids, Func, long, IMultiTermVectorOperation> getSelector = null) where T : class => - Assign(a => this._operations.AddRange(ids.Select(id => getSelector.InvokeOrDefault(new MultiTermVectorOperationDescriptor().Id(id), id)))); + Assign(a => this.Operations.AddRange(ids.Select(id => getSelector.InvokeOrDefault(new MultiTermVectorOperationDescriptor().Id(id), id)))); + // TODO: Rename to Documents in 7.x + /// public MultiTermVectorsDescriptor GetMany(IEnumerable ids, Func, string, IMultiTermVectorOperation> getSelector = null) where T : class => - Assign(a => this._operations.AddRange(ids.Select(id => getSelector.InvokeOrDefault(new MultiTermVectorOperationDescriptor().Id(id), id)))); + Assign(a => this.Operations.AddRange(ids.Select(id => getSelector.InvokeOrDefault(new MultiTermVectorOperationDescriptor().Id(id), id)))); + // TODO: Rename to Documents in 7.x + /// public MultiTermVectorsDescriptor GetMany(IEnumerable ids, Func, Id, IMultiTermVectorOperation> getSelector = null) where T : class => - Assign(a => this._operations.AddRange(ids.Select(id => getSelector.InvokeOrDefault(new MultiTermVectorOperationDescriptor().Id(id), id)))); + Assign(a => this.Operations.AddRange(ids.Select(id => getSelector.InvokeOrDefault(new MultiTermVectorOperationDescriptor().Id(id), id)))); + + /// + public MultiTermVectorsDescriptor Ids(IEnumerable ids) => Assign(a => a.Ids = ids); + /// + public MultiTermVectorsDescriptor Ids(params Id[] ids) => Assign(a => a.Ids = ids); } } diff --git a/src/Tests/Tests/Document/Multiple/MultiTermVectors/MultiTermVectorsApiTests.cs b/src/Tests/Tests/Document/Multiple/MultiTermVectors/MultiTermVectorsApiTests.cs index 32f5e3e4951..936e23a5793 100644 --- a/src/Tests/Tests/Document/Multiple/MultiTermVectors/MultiTermVectorsApiTests.cs +++ b/src/Tests/Tests/Document/Multiple/MultiTermVectors/MultiTermVectorsApiTests.cs @@ -14,9 +14,9 @@ namespace Tests.Document.Multiple.MultiTermVectors { - public class MultiTermVectorsApiTests : ApiIntegrationTestBase + public class MultiTermVectorsDocsApiTests : ApiIntegrationTestBase { - public MultiTermVectorsApiTests(ReadOnlyCluster cluster, EndpointUsage usage) : base(cluster, usage) { } + public MultiTermVectorsDocsApiTests(ReadOnlyCluster cluster, EndpointUsage usage) : base(cluster, usage) { } protected override LazyResponses ClientUsage() => Calls( fluent: (client, f) => client.MultiTermVectors(f), fluentAsync: (client, f) => client.MultiTermVectorsAsync(f), @@ -123,4 +123,84 @@ private static void AssertTermVectors(TermVector vectors) }) }; } + + public class MultiTermVectorsIdsApiTests : ApiIntegrationTestBase + { + public MultiTermVectorsIdsApiTests(ReadOnlyCluster cluster, EndpointUsage usage) : base(cluster, usage) { } + protected override LazyResponses ClientUsage() => Calls( + fluent: (client, f) => client.MultiTermVectors(f), + fluentAsync: (client, f) => client.MultiTermVectorsAsync(f), + request: (client, r) => client.MultiTermVectors(r), + requestAsync: (client, r) => client.MultiTermVectorsAsync(r) + ); + + protected override bool ExpectIsValid => true; + protected override int ExpectStatusCode => 200; + protected override HttpMethod HttpMethod => HttpMethod.POST; + protected override string UrlPath => + $"/devs/developer/_mtermvectors?field_statistics=true&payloads=true&term_statistics=true&positions=true&offsets=true"; + + protected override bool SupportsDeserialization => false; + + protected override object ExpectJson { get; } = new + { + ids = Developer.Developers.Select(p => (Id)p.Id).Take(2) + }; + + protected override void ExpectResponse(IMultiTermVectorsResponse response) + { + response.ShouldBeValid(); + response.Documents.Should().NotBeEmpty().And.HaveCount(2).And.OnlyContain(d => d.Found); + var termvectorDoc = response.Documents.FirstOrDefault(d => d.TermVectors.Count > 0); + + termvectorDoc.Should().NotBeNull(); + termvectorDoc.Index.Should().NotBeNull(); + termvectorDoc.Type.Should().NotBeNull(); + termvectorDoc.Id.Should().NotBeNull(); + + termvectorDoc.TermVectors.Should().NotBeEmpty().And.ContainKey("firstName"); + var vectors = termvectorDoc.TermVectors["firstName"]; + AssertTermVectors(vectors); + + vectors = termvectorDoc.TermVectors[Field(p=>p.FirstName)]; + AssertTermVectors(vectors); + } + + private static void AssertTermVectors(TermVector vectors) + { + vectors.Terms.Should().NotBeEmpty(); + foreach (var vectorTerm in vectors.Terms) + { + vectorTerm.Key.Should().NotBeNullOrWhiteSpace(); + vectorTerm.Value.Should().NotBeNull(); + vectorTerm.Value.TermFrequency.Should().BeGreaterThan(0); + vectorTerm.Value.TotalTermFrequency.Should().BeGreaterThan(0); + vectorTerm.Value.Tokens.Should().NotBeEmpty(); + + var token = vectorTerm.Value.Tokens.First(); + token.EndOffset.Should().BeGreaterThan(0); + } + } + + protected override Func Fluent => d => d + .Index() + .Type() + .Ids(Developer.Developers.Select(p => (Id)p.Id).Take(2)) + .FieldStatistics() + .Payloads() + .TermStatistics() + .Positions() + .Offsets() + ; + + protected override MultiTermVectorsRequest Initializer => new MultiTermVectorsRequest(Index(), Type()) + { + Ids = Developer.Developers.Select(p => (Id)p.Id).Take(2), + FieldStatistics = true, + Payloads = true, + TermStatistics = true, + Positions = true, + Offsets = true + }; + } }