Skip to content

Support Ids in MultiTermVectors API #3382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ public partial interface IElasticClient
/// <param name="selector">The descriptor describing the multi termvectors operation</param>
IMultiTermVectorsResponse MultiTermVectors(Func<MultiTermVectorsDescriptor, IMultiTermVectorsRequest> selector = null);

/// <inheritdoc/>
/// <inheritdoc cref="MultiTermVectors(System.Func{Nest.MultiTermVectorsDescriptor,Nest.IMultiTermVectorsRequest})"/>
IMultiTermVectorsResponse MultiTermVectors(IMultiTermVectorsRequest request);

/// <inheritdoc/>
/// <inheritdoc cref="MultiTermVectors(System.Func{Nest.MultiTermVectorsDescriptor,Nest.IMultiTermVectorsRequest})"/>
Task<IMultiTermVectorsResponse> MultiTermVectorsAsync(Func<MultiTermVectorsDescriptor, IMultiTermVectorsRequest> selector = null, CancellationToken cancellationToken = default(CancellationToken));

/// <inheritdoc/>
/// <inheritdoc cref="MultiTermVectors(System.Func{Nest.MultiTermVectorsDescriptor,Nest.IMultiTermVectorsRequest})"/>
Task<IMultiTermVectorsResponse> MultiTermVectorsAsync(IMultiTermVectorsRequest request, CancellationToken cancellationToken = default(CancellationToken));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,42 +4,121 @@

namespace Nest
{
/// <summary>
/// An operation to define the calculation of
/// term vectors when using Multi termvectors API
/// </summary>
public interface IMultiTermVectorOperation
{
/// <summary>
/// The index in which the document resides
/// </summary>
[JsonProperty("_index")]
IndexName Index { get; set; }

/// <summary>
/// The type of the document
/// </summary>
[JsonProperty("_type")]
TypeName Type { get; set; }

/// <summary>
/// The id of the document
/// </summary>
[JsonProperty("_id")]
Id Id { get; set; }

/// <summary>
/// A document not indexed in Elasticsearch,
/// to generate term vectors for
/// </summary>
[JsonProperty("doc")]
[JsonConverter(typeof(SourceConverter))]
object Document { get; set; }

/// <summary>
/// The document field to generate term
/// vectors for
/// </summary>
[JsonProperty("fields")]
// TODO: Rename to Fields in 7.x
Fields StoredFields { get; set; }

/// <summary>
/// Whether to include the start and end offsets.
/// Default is <c>true</c>.
/// </summary>
[JsonProperty("offsets")]
bool? Offsets { get; set; }

/// <summary>
/// Whether to include the term payloads as
/// base64 encoded bytes. Default is <c>true</c>
/// </summary>
[JsonProperty("payloads")]
bool? Payloads { get; set; }

/// <summary>
/// Whether to include the term positions.
/// Default is <c>true</c>
/// </summary>
[JsonProperty("positions")]
bool? Positions { get; set; }

/// <summary>
/// Whether to include term statistics. When set to <c>true</c>,
/// <para />- total term frequency (how often a term occurs in all documents)
/// <para />- document frequency (the number of documents containing the current term)
/// <para />will be returned. Default is <c>false</c> since
/// term statistics can have a large performance impact.
/// </summary>
[JsonProperty("term_statistics")]
bool? TermStatistics { get; set; }

/// <summary>
/// Whether to include field statistics. When set to <c>false</c>,
/// <para />- document count (how many documents contain this field)
/// <para />- sum of document frequencies (the sum of document frequencies for all terms in this field)
/// <para />- sum of total term frequencies (the sum of total term frequencies of each term in this field)
/// <para />will be omitted. Default is <c>true</c>.
/// </summary>
[JsonProperty("field_statistics")]
bool? FieldStatistics { get; set; }

/// <summary>
/// Filter terms based on their tf-idf scores.
/// This can be useful in order find out a good characteristic
/// vector of a document.
/// </summary>
[JsonProperty("filter")]
ITermVectorFilter Filter { get; set; }

/// <summary>
/// The version number
/// </summary>
[JsonProperty("version")]
long? Version { get; set; }

/// <summary>
/// The type of version
/// </summary>
[JsonProperty("version_type")]
VersionType? VersionType { get; set; }

/// <summary>
/// When requesting term vectors for <see cref="Document"/>,
/// a shard to get the statistics from is randomly selected.
/// Use <see cref="Routing"/> only to hit a particular shard.
/// </summary>
[JsonProperty("routing")]
Routing Routing { get; set; }
}

/// <inheritdoc />
public class MultiTermVectorOperation<T> : IMultiTermVectorOperation
where T : class
{
private Routing _routing;

public MultiTermVectorOperation(Id id)
{
Expand All @@ -48,31 +127,46 @@ public MultiTermVectorOperation(Id id)
this.Type = typeof (T);
}

/// <inheritdoc />
public IndexName Index { get; set; }
/// <inheritdoc />
public TypeName Type { get; set; }
/// <inheritdoc />
public Id Id { get; set; }
/// <inheritdoc />
public object Document { get; set; }
/// <inheritdoc />
public Fields StoredFields { get; set; }
/// <inheritdoc />
public bool? Offsets { get; set; }
/// <inheritdoc />
public bool? Payloads { get; set; }
/// <inheritdoc />
public bool? Positions { get; set; }
/// <inheritdoc />
public bool? TermStatistics { get; set; }
/// <inheritdoc />
public bool? FieldStatistics { get; set; }
/// <inheritdoc />
public ITermVectorFilter Filter { get; set; }
/// <inheritdoc />
public long? Version { get; set; }
/// <inheritdoc />
public VersionType? VersionType { get; set; }

private Routing _routing;
/// <inheritdoc />
public Routing Routing
{
get => _routing ?? (Document == null ? null : new Routing(Document));
set => _routing = value;
}
}

/// <inheritdoc cref="IMultiTermVectorOperation"/>
public class MultiTermVectorOperationDescriptor<T> : DescriptorBase<MultiTermVectorOperationDescriptor<T>, IMultiTermVectorOperation>, IMultiTermVectorOperation
where T : class
{
private Routing _routing;

IndexName IMultiTermVectorOperation.Index { get; set; } = typeof (T);
TypeName IMultiTermVectorOperation.Type { get; set; } = typeof (T);
Id IMultiTermVectorOperation.Id { get; set; }
Expand All @@ -86,40 +180,59 @@ public class MultiTermVectorOperationDescriptor<T> : DescriptorBase<MultiTermVec
ITermVectorFilter IMultiTermVectorOperation.Filter { get; set; }
long? IMultiTermVectorOperation.Version { get; set; }
VersionType? IMultiTermVectorOperation.VersionType { get; set; }

private Routing _routing;
Routing IMultiTermVectorOperation.Routing
{
get => _routing ?? (Self.Document == null ? null : new Routing(Self.Document));
set => _routing = value;
}

/// <inheritdoc cref="IMultiTermVectorOperation.StoredFields"/>
// TODO: Rename to Fields in 7.x
public MultiTermVectorOperationDescriptor<T> StoredFields(Func<FieldsDescriptor<T>, IPromise<Fields>> fields) =>
Assign(a => a.StoredFields = fields?.Invoke(new FieldsDescriptor<T>())?.Value);

/// <inheritdoc cref="IMultiTermVectorOperation.StoredFields"/>
// TODO: Rename to Fields in 7.x
public MultiTermVectorOperationDescriptor<T> StoredFields(Fields fields) => Assign(a => a.StoredFields = fields);

public MultiTermVectorOperationDescriptor<T> Id(Id id) => Assign(a=>a.Id = id);
/// <inheritdoc cref="IMultiTermVectorOperation.Id"/>
public MultiTermVectorOperationDescriptor<T> Id(Id id) => Assign(a=> a.Id = id);

/// <inheritdoc cref="IMultiTermVectorOperation.Index"/>
public MultiTermVectorOperationDescriptor<T> Index(IndexName index) => Assign(a => a.Index = index);

/// <inheritdoc cref="IMultiTermVectorOperation.Type"/>
public MultiTermVectorOperationDescriptor<T> Type(TypeName type) => Assign(a=> a.Type = type);

/// <inheritdoc cref="IMultiTermVectorOperation.Document"/>
public MultiTermVectorOperationDescriptor<T> Document(T document) => Assign(a => a.Document = document);

/// <inheritdoc cref="IMultiTermVectorOperation.Offsets"/>
public MultiTermVectorOperationDescriptor<T> Offsets(bool? offsets = true) => Assign(a => a.Offsets = offsets);

/// <inheritdoc cref="IMultiTermVectorOperation.Payloads"/>
public MultiTermVectorOperationDescriptor<T> Payloads(bool? payloads = true) => Assign(a => a.Payloads = payloads);

/// <inheritdoc cref="IMultiTermVectorOperation.Positions"/>
public MultiTermVectorOperationDescriptor<T> Positions(bool? positions = true) => Assign(a => a.Positions = positions);

/// <inheritdoc cref="IMultiTermVectorOperation.TermStatistics"/>
public MultiTermVectorOperationDescriptor<T> TermStatistics(bool? termStatistics = true) => Assign(a => a.TermStatistics = termStatistics);

/// <inheritdoc cref="IMultiTermVectorOperation.FieldStatistics"/>
public MultiTermVectorOperationDescriptor<T> FieldStatistics(bool? fieldStatistics = true) => Assign(a => a.FieldStatistics = fieldStatistics);

/// <inheritdoc cref="IMultiTermVectorOperation.Filter"/>
public MultiTermVectorOperationDescriptor<T> Filter(Func<TermVectorFilterDescriptor, ITermVectorFilter> filterSelector) =>
Assign(a => a.Filter = filterSelector?.Invoke(new TermVectorFilterDescriptor()));

/// <inheritdoc cref="IMultiTermVectorOperation.Version"/>
public MultiTermVectorOperationDescriptor<T> Version(long? version) => Assign(a => a.Version = version);

/// <inheritdoc cref="IMultiTermVectorOperation.VersionType"/>
public MultiTermVectorOperationDescriptor<T> VersionType(VersionType? versionType) => Assign(a => a.VersionType = versionType);

/// <inheritdoc cref="IMultiTermVectorOperation.Routing"/>
public MultiTermVectorOperationDescriptor<T> Routing(Routing routing) => Assign(a => a.Routing = routing);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,84 @@

namespace Nest
{
/// <summary>
/// A Multi termvectors API request
/// </summary>
public partial interface IMultiTermVectorsRequest
{
/// <summary>
/// The documents for which to generate term vectors
/// </summary>
[JsonProperty("docs")]
IEnumerable<IMultiTermVectorOperation> Documents { get; set; }

/// <summary>
/// The ids of documents within the same index and type
/// for which to generate term vectors. Must be used in
/// conjunction with <see cref="Index"/> and <see cref="Type"/>
/// </summary>
[JsonProperty("ids")]
IEnumerable<Id> Ids { get; set; }
}

/// <inheritdoc cref="IMultiTermVectorsRequest"/>
public partial class MultiTermVectorsRequest
{
/// <inheritdoc />
public IEnumerable<IMultiTermVectorOperation> Documents { get; set; }

/// <inheritdoc />
public IEnumerable<Id> Ids { get; set; }
}

/// <inheritdoc cref="IMultiTermVectorsRequest"/>
[DescriptorFor("Mtermvectors")]
public partial class MultiTermVectorsDescriptor
{
private List<IMultiTermVectorOperation> _operations = new List<IMultiTermVectorOperation>();
private List<IMultiTermVectorOperation> _operations;

private List<IMultiTermVectorOperation> Operations =>
this._operations ?? (this._operations = new List<IMultiTermVectorOperation>());

IEnumerable<IMultiTermVectorOperation> IMultiTermVectorsRequest.Documents
{
get { return this._operations; }
set { this._operations = value?.ToList(); }
get => this._operations;
set => this._operations = value?.ToList();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just out of curiosity, is there any reason why we .ToList() on setters like this; other than to force the execution of the IEnumerable at this point? Seems like we could save some allocations in these instances...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know about this specific case, but in general would be to force execution as you say.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is unusual with respect to the general API design in NEST, is that GetMany<T>(..) is additive rather than assignative to the operations. I understand why this is, and it's not the only place where a method call is additive

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Mpdreamz - would be able to offer any insight?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Places where we are additive is AFAIK only in the bulk related API helpers since you can IndexMany and then IndexMany<Y> afterwards in a single bulk. That was the rationale behind the deviation. Definitely up for further discussion 😄

}

IEnumerable<Id> IMultiTermVectorsRequest.Ids { get; set; }

// TODO: Rename to Documents in 7.x
/// <summary>
/// A document for which to generate term vectors
/// </summary>
public MultiTermVectorsDescriptor Get<T>(Func<MultiTermVectorOperationDescriptor<T>, IMultiTermVectorOperation> getSelector)
where T : class =>
Assign(a => this._operations.AddIfNotNull(getSelector?.Invoke(new MultiTermVectorOperationDescriptor<T>())));
Assign(a => this.Operations.AddIfNotNull(getSelector?.Invoke(new MultiTermVectorOperationDescriptor<T>())));

// TODO: Rename to Documents in 7.x
/// <inheritdoc cref="IMultiTermVectorsRequest.Documents"/>
public MultiTermVectorsDescriptor GetMany<T>(IEnumerable<long> ids,
Func<MultiTermVectorOperationDescriptor<T>, long, IMultiTermVectorOperation> getSelector = null)
where T : class =>
Assign(a => this._operations.AddRange(ids.Select(id => getSelector.InvokeOrDefault(new MultiTermVectorOperationDescriptor<T>().Id(id), id))));
Assign(a => this.Operations.AddRange(ids.Select(id => getSelector.InvokeOrDefault(new MultiTermVectorOperationDescriptor<T>().Id(id), id))));

// TODO: Rename to Documents in 7.x
/// <inheritdoc cref="IMultiTermVectorsRequest.Documents"/>
public MultiTermVectorsDescriptor GetMany<T>(IEnumerable<string> ids, Func<MultiTermVectorOperationDescriptor<T>, string, IMultiTermVectorOperation> getSelector = null)
where T : class =>
Assign(a => this._operations.AddRange(ids.Select(id => getSelector.InvokeOrDefault(new MultiTermVectorOperationDescriptor<T>().Id(id), id))));
Assign(a => this.Operations.AddRange(ids.Select(id => getSelector.InvokeOrDefault(new MultiTermVectorOperationDescriptor<T>().Id(id), id))));

// TODO: Rename to Documents in 7.x
/// <inheritdoc cref="IMultiTermVectorsRequest.Documents"/>
public MultiTermVectorsDescriptor GetMany<T>(IEnumerable<Id> ids, Func<MultiTermVectorOperationDescriptor<T>, Id, IMultiTermVectorOperation> getSelector = null)
where T : class =>
Assign(a => this._operations.AddRange(ids.Select(id => getSelector.InvokeOrDefault(new MultiTermVectorOperationDescriptor<T>().Id(id), id))));
Assign(a => this.Operations.AddRange(ids.Select(id => getSelector.InvokeOrDefault(new MultiTermVectorOperationDescriptor<T>().Id(id), id))));

/// <inheritdoc cref="IMultiTermVectorsRequest.Ids"/>
public MultiTermVectorsDescriptor Ids(IEnumerable<Id> ids) => Assign(a => a.Ids = ids);

/// <inheritdoc cref="IMultiTermVectorsRequest.Ids"/>
public MultiTermVectorsDescriptor Ids(params Id[] ids) => Assign(a => a.Ids = ids);
}
}
Loading