Skip to content

Commit d48dc4e

Browse files
Mpdreamzrusscam
authored andcommitted
Add support for indexed_chars_field on the AttachmentProcessor (#3442)
1 parent 577888a commit d48dc4e

File tree

2 files changed

+69
-71
lines changed

2 files changed

+69
-71
lines changed

src/Nest/Ingest/Processors/Plugins/AttachmentProcessor.cs

+35-68
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,11 @@ namespace Nest
1717
[JsonConverter(typeof(ProcessorJsonConverter<AttachmentProcessor>))]
1818
public interface IAttachmentProcessor : IProcessor
1919
{
20-
/// <summary>
21-
/// The field to get the base64 encoded field from
22-
/// </summary>
20+
/// <summary> The field to get the base64 encoded field from </summary>
2321
[JsonProperty("field")]
2422
Field Field { get; set; }
2523

26-
/// <summary>
27-
/// The field that will hold the attachment information
28-
/// </summary>
24+
/// <summary> The field that will hold the attachment information </summary>
2925
[JsonProperty("target_field")]
3026
Field TargetField { get; set; }
3127

@@ -43,59 +39,42 @@ public interface IAttachmentProcessor : IProcessor
4339
[JsonProperty("indexed_chars")]
4440
long? IndexedCharacters { get; set; }
4541

46-
/// <summary>
47-
/// If `true` and `field` does not exist, the processor quietly exits without modifying the document
48-
/// </summary>
42+
/// <summary> Field name from which you can overwrite the number of chars being used for extraction. </summary>
43+
[JsonProperty("indexed_chars_field")]
44+
Field IndexedCharactersField { get; set; }
45+
46+
47+
/// <summary> If `true` and `field` does not exist, the processor quietly exits without modifying the document </summary>
4948
[JsonProperty("ignore_missing")]
5049
bool? IgnoreMissing { get; set; }
5150
}
5251

53-
/// <summary>
54-
/// The ingest attachment plugin lets Elasticsearch extract file attachments in common formats
55-
/// (such as PPT, XLS, and PDF) by using the Apache text extraction library Tika.
56-
/// You can use the ingest attachment plugin as a replacement for the mapper attachment plugin.
57-
/// </summary>
58-
/// <remarks>
59-
/// Requires the Ingest Attachment Processor Plugin to be installed on the cluster.
60-
/// </remarks>
52+
/// <inheritdoc cref="IAttachmentProcessor"/>
6153
public class AttachmentProcessor : ProcessorBase, IAttachmentProcessor
6254
{
6355
protected override string Name => "attachment";
6456

65-
/// <summary>
66-
/// The field to get the base64 encoded field from
67-
/// </summary>
57+
/// <inheritdoc cref="IAttachmentProcessor.Field"/>
6858
public Field Field { get; set; }
6959

70-
/// <summary>
71-
/// The field that will hold the attachment information
72-
/// </summary>
60+
/// <inheritdoc cref="IAttachmentProcessor.TargetField"/>
7361
public Field TargetField { get; set; }
7462

75-
/// <summary>
76-
/// Properties to select to be stored. Can be content, title, name, author,
77-
/// keywords, date, content_type, content_length, language. Defaults to all
78-
/// </summary>
63+
/// <inheritdoc cref="IAttachmentProcessor.Properties"/>
7964
public IEnumerable<string> Properties { get; set; }
8065

81-
/// <summary>
82-
/// The number of chars being used for extraction to prevent huge fields. Use -1 for no limit.
83-
/// Defaults to 100000.
84-
/// </summary>
66+
/// <inheritdoc cref="IAttachmentProcessor.IndexedCharacters"/>
8567
public long? IndexedCharacters { get; set; }
8668

69+
/// <inheritdoc cref="IAttachmentProcessor.IndexedCharactersField"/>
70+
public Field IndexedCharactersField { get; set; }
71+
8772
/// <inheritdoc/>
73+
/// <inheritdoc cref="IAttachmentProcessor.IgnoreMissing"/>
8874
public bool? IgnoreMissing { get; set; }
8975
}
9076

91-
/// <summary>
92-
/// The ingest attachment plugin lets Elasticsearch extract file attachments in common formats
93-
/// (such as PPT, XLS, and PDF) by using the Apache text extraction library Tika.
94-
/// You can use the ingest attachment plugin as a replacement for the mapper attachment plugin.
95-
/// </summary>
96-
/// <remarks>
97-
/// Requires the Ingest Attachment Processor Plugin to be installed on the cluster.
98-
/// </remarks>
77+
/// <inheritdoc cref="IAttachmentProcessor"/>
9978
public class AttachmentProcessorDescriptor<T>
10079
: ProcessorDescriptorBase<AttachmentProcessorDescriptor<T>, IAttachmentProcessor>, IAttachmentProcessor
10180
where T : class
@@ -107,48 +86,36 @@ public class AttachmentProcessorDescriptor<T>
10786
IEnumerable<string> IAttachmentProcessor.Properties { get; set; }
10887
long? IAttachmentProcessor.IndexedCharacters { get; set; }
10988
bool? IAttachmentProcessor.IgnoreMissing { get; set; }
89+
Field IAttachmentProcessor.IndexedCharactersField { get; set; }
11090

111-
/// <summary>
112-
/// The field to get the base64 encoded field from
113-
/// </summary>
91+
/// <inheritdoc cref="IAttachmentProcessor.Field"/>
11492
public AttachmentProcessorDescriptor<T> Field(Field field) => Assign(a => a.Field = field);
11593

116-
/// <summary>
117-
/// The field to get the base64 encoded field from
118-
/// </summary>
119-
public AttachmentProcessorDescriptor<T> Field(Expression<Func<T, object>> objectPath) =>
120-
Assign(a => a.Field = objectPath);
94+
/// <inheritdoc cref="IAttachmentProcessor.Field"/>
95+
public AttachmentProcessorDescriptor<T> Field(Expression<Func<T, object>> objectPath) => Assign(a => a.Field = objectPath);
12196

122-
/// <summary>
123-
/// The field that will hold the attachment information
124-
/// </summary>
97+
/// <inheritdoc cref="IAttachmentProcessor.TargetField"/>
12598
public AttachmentProcessorDescriptor<T> TargetField(Field field) => Assign(a => a.TargetField = field);
12699

127-
/// <summary>
128-
/// The field that will hold the attachment information
129-
/// </summary>
130-
public AttachmentProcessorDescriptor<T> TargetField(Expression<Func<T, object>> objectPath) =>
131-
Assign(a => a.TargetField = objectPath);
100+
/// <inheritdoc cref="IAttachmentProcessor.TargetField"/>
101+
public AttachmentProcessorDescriptor<T> TargetField(Expression<Func<T, object>> objectPath) => Assign(a => a.TargetField = objectPath);
132102

133-
/// <summary>
134-
/// The number of chars being used for extraction to prevent huge fields. Use -1 for no limit.
135-
/// Defaults to 100000.
136-
/// </summary>
103+
/// <inheritdoc cref="IAttachmentProcessor.IndexedCharacters"/>
137104
public AttachmentProcessorDescriptor<T> IndexedCharacters(long? indexedCharacters) => Assign(a => a.IndexedCharacters = indexedCharacters);
138105

139-
/// <inheritdoc/>
106+
/// <inheritdoc cref="IAttachmentProcessor.IndexedCharactersField"/>
107+
public AttachmentProcessorDescriptor<T> IndexedCharactersField(Field field) => Assign(a => a.IndexedCharactersField = field);
108+
109+
/// <inheritdoc cref="IAttachmentProcessor.IndexedCharactersField"/>
110+
public AttachmentProcessorDescriptor<T> IndexedCharactersField(Expression<Func<T, object>> objectPath) => Assign(a => a.IndexedCharactersField = objectPath);
111+
112+
/// <inheritdoc cref="IAttachmentProcessor.IgnoreMissing"/>
140113
public AttachmentProcessorDescriptor<T> IgnoreMissing(bool? ignoreMissing = true) => Assign(a => a.IgnoreMissing = ignoreMissing);
141114

142-
/// <summary>
143-
/// Properties to select to be stored. Can be content, title, name, author,
144-
/// keywords, date, content_type, content_length, language. Defaults to all
145-
/// </summary>
115+
/// <inheritdoc cref="IAttachmentProcessor.Properties"/>
146116
public AttachmentProcessorDescriptor<T> Properties(IEnumerable<string> properties) => Assign(a => a.Properties = properties);
147117

148-
/// <summary>
149-
/// Properties to select to be stored. Can be content, title, name, author,
150-
/// keywords, date, content_type, content_length, language. Defaults to all
151-
/// </summary>
118+
/// <inheritdoc cref="IAttachmentProcessor.Properties"/>
152119
public AttachmentProcessorDescriptor<T> Properties(params string[] properties) => Assign(a => a.Properties = properties);
153120
}
154121
}

src/Tests/Tests/Ingest/ProcessorAssertions.cs

+34-3
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,36 @@ public class UrlDecode : ProcessorAssertion
342342
);
343343
}
344344

345+
[SkipVersion("<6.4.0", "")]
346+
public class Attachment : ProcessorAssertion
347+
{
348+
public override string Key => "attachment";
349+
350+
public override object Json => new
351+
{
352+
field = "description",
353+
ignore_missing = true,
354+
properties = new [] {"title", "author"},
355+
indexed_chars = 100_000,
356+
};
357+
358+
public override IProcessor Initializer => new AttachmentProcessor
359+
{
360+
Field = "description",
361+
Properties = new [] {"title", "author"},
362+
IndexedCharacters = 100_000,
363+
IgnoreMissing = true
364+
365+
};
366+
public override Func<ProcessorsDescriptor, IPromise<IList<IProcessor>>> Fluent => d => d
367+
.Attachment<Project>(ud => ud
368+
.Field(p => p.Description)
369+
.IndexedCharacters(100_000)
370+
.Properties("title", "author")
371+
.IgnoreMissing()
372+
);
373+
}
374+
345375

346376
[SkipVersion("<6.4.0", "")]
347377
public class Bytes : ProcessorAssertion
@@ -351,12 +381,12 @@ public class Bytes : ProcessorAssertion
351381
public override object Json => new { field = "description", ignore_missing = true };
352382

353383
public override IProcessor Initializer => new BytesProcessor { Field = "description", IgnoreMissing = true };
354-
384+
355385
public override Func<ProcessorsDescriptor, IPromise<IList<IProcessor>>> Fluent => d => d
356386
.Bytes<Project>(ud => ud
357387
.Field(p => p.Description)
358-
);
359-
}
388+
);
389+
}
360390

361391
public class KeyValue : ProcessorAssertion
362392
{
@@ -386,6 +416,7 @@ public class KeyValue : ProcessorAssertion
386416
.IgnoreMissing()
387417
);
388418
}
419+
389420
[SkipVersion("<6.4.0", "trimming options were introduced later")]
390421
public class KeyValueTrimming : ProcessorAssertion
391422
{

0 commit comments

Comments
 (0)