Skip to content

Commit 343310c

Browse files
author
Christoph Büscher
committed
Add field type for version strings
This PR adds a new 'version' field type that allows indexing string values representing software versions similar to the ones defined in the Semantic Versioning definition (semver.org). The field behaves very similar to a 'keyword' field but allows efficient sorting and range queries that take into accound the special ordering needed for version strings. For example, the main version parts are sorted numerically (ie 2.0.0 < 11.0.0) whereas this wouldn't be possible with 'keyword' fields today. Valid version values are similar to the Semantic Versioning definition, with the notable exception that in addition to the "main" version consiting of major.minor.patch, we allow less or more than three numeric identifiers, i.e. "1.2" or "1.4.6.123.12" are treated as valid too. Relates to elastic#48878
1 parent d31808d commit 343310c

File tree

12 files changed

+1628
-2
lines changed

12 files changed

+1628
-2
lines changed

server/src/main/java/org/elasticsearch/index/mapper/TermBasedFieldType.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,10 @@
3333

3434
/** Base {@link MappedFieldType} implementation for a field that is indexed
3535
* with the inverted index. */
36-
abstract class TermBasedFieldType extends SimpleMappedFieldType {
36+
public abstract class TermBasedFieldType extends SimpleMappedFieldType {
3737

38-
TermBasedFieldType(String name, boolean isSearchable, boolean hasDocValues, TextSearchInfo textSearchInfo, Map<String, String> meta) {
38+
public TermBasedFieldType(String name, boolean isSearchable, boolean hasDocValues, TextSearchInfo textSearchInfo,
39+
Map<String, String> meta) {
3940
super(name, isSearchable, hasDocValues, textSearchInfo, meta);
4041
}
4142

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
evaluationDependsOn(xpackModule('core'))
2+
3+
apply plugin: 'elasticsearch.esplugin'
4+
5+
esplugin {
6+
name 'versionfield'
7+
description 'A plugin for a field type to store sofware versions'
8+
classname 'org.elasticsearch.xpack.versionfield.VersionFieldPlugin'
9+
extendedPlugins = ['x-pack-core', 'lang-painless']
10+
}
11+
archivesBaseName = 'x-pack-versionfield'
12+
13+
dependencies {
14+
compileOnly project(path: xpackModule('core'), configuration: 'default')
15+
compileOnly project(':modules:lang-painless:spi')
16+
compileOnly project(':modules:lang-painless')
17+
testImplementation project(path: xpackModule('core'), configuration: 'testArtifacts')
18+
}
19+
20+
integTest.enabled = false
Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
package org.elasticsearch.xpack.versionfield;
8+
9+
import org.apache.lucene.util.BytesRef;
10+
import org.apache.lucene.util.BytesRefBuilder;
11+
12+
import java.nio.charset.StandardCharsets;
13+
import java.util.regex.Pattern;
14+
15+
/**
16+
* Encodes a version string to a {@link BytesRef} that correctly sorts according to software version precedence rules like
17+
* the ones described in Semantiv Versioning (https://semver.org/)
18+
*
19+
* Version strings are considered to consist of three parts:
20+
* <ul>
21+
* <li> a numeric major.minor.patch part starting the version string (e.g. 1.2.3)
22+
* <li> an optional "pre-release" part that starts with a `-` character and can consist of several alpha-numerical sections
23+
* separated by dots (e.g. "-alpha.2.3")
24+
* <li> an optional "build" part that starts with a `+` character. This will simply be treated as a prefix with no guaranteed ordering,
25+
* (although the ordering should be alphabetical in most cases).
26+
* </ul>
27+
*
28+
* The version string is encoded such that the ordering works like the following:
29+
* <ul>
30+
* <li> Major, minor, and patch versions are always compared numerically
31+
* <li> pre-release version have lower precedence than a normal version. (e.g 1.0.0-alpha &lt; 1.0.0)
32+
* <li> the precedence for pre-release versions with same main version is calculated comparing each dot separated identifier from
33+
* left to right. Identifiers consisting of only digits are compared numerically and identifiers with letters or hyphens are compared
34+
* lexically in ASCII sort order. Numeric identifiers always have lower precedence than non-numeric identifiers.
35+
* </ul>
36+
*/
37+
class VersionEncoder {
38+
39+
public static final byte NUMERIC_MARKER_BYTE = (byte) 0x01;
40+
public static final byte PRERELESE_SEPARATOR_BYTE = (byte) 0x02;
41+
public static final byte NO_PRERELESE_SEPARATOR_BYTE = (byte) 0x03;
42+
43+
private static final char PRERELESE_SEPARATOR = '-';
44+
private static final char DOT_SEPARATOR = '.';
45+
private static final char BUILD_SEPARATOR = '+';
46+
47+
// Regex to test version validity: \d+(\.\d+)*(-[\-\dA-Za-z]+){0,1}(\.[-\dA-Za-z]+)*(\+[\.\-\dA-Za-z]+)?
48+
// private static Pattern LEGAL_VERSION_PATTERN = Pattern.compile(
49+
// "\\d+(\\.\\d+)*(-[\\-\\dA-Za-z]+){0,1}(\\.[\\-\\dA-Za-z]+)*(\\+[\\.\\-\\dA-Za-z]+)?"
50+
// );
51+
52+
// Regex to test strict Semver Main Version validity:
53+
// private static Pattern LEGAL_MAIN_VERSION_SEMVER = Pattern.compile("(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)");
54+
55+
// Regex to test relaxed Semver Main Version validity. Allows for more or less than three main version parts
56+
private static Pattern LEGAL_MAIN_VERSION_SEMVER = Pattern.compile("(0|[1-9]\\d*)(\\.(0|[1-9]\\d*))*");
57+
58+
private static Pattern LEGAL_PRERELEASE_VERSION_SEMVER = Pattern.compile(
59+
"(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))"
60+
);
61+
62+
private static Pattern LEGAL_BUILDSUFFIX_SEMVER = Pattern.compile("(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?");
63+
64+
/**
65+
* Encodes a version string.
66+
*/
67+
public static EncodedVersion encodeVersion(String versionString) {
68+
// System.out.println("encoding: " + versionString);
69+
VersionParts versionParts = VersionParts.ofVersion(versionString);
70+
71+
// don't treat non-legal versions further, just mark them as illegal and return
72+
if (legalVersionString(versionParts) == false) {
73+
return new EncodedVersion(new BytesRef(versionString), false, true, 0, 0, 0);
74+
}
75+
76+
BytesRefBuilder encodedBytes = new BytesRefBuilder();
77+
Integer[] mainVersionParts = prefixDigitGroupsWithLength(versionParts.mainVersion, encodedBytes);
78+
79+
if (versionParts.preRelease != null) {
80+
encodedBytes.append(PRERELESE_SEPARATOR_BYTE); // versions with pre-release part sort before ones without
81+
encodedBytes.append((byte) PRERELESE_SEPARATOR);
82+
String[] preReleaseParts = versionParts.preRelease.substring(1).split("\\.");
83+
boolean first = true;
84+
for (String preReleasePart : preReleaseParts) {
85+
if (first == false) {
86+
encodedBytes.append((byte) DOT_SEPARATOR);
87+
}
88+
boolean isNumeric = preReleasePart.chars().allMatch(x -> Character.isDigit(x));
89+
if (isNumeric) {
90+
prefixDigitGroupsWithLength(preReleasePart, encodedBytes);
91+
} else {
92+
encodedBytes.append(new BytesRef(preReleasePart));
93+
}
94+
first = false;
95+
}
96+
} else {
97+
encodedBytes.append(NO_PRERELESE_SEPARATOR_BYTE);
98+
}
99+
100+
if (versionParts.buildSuffix != null) {
101+
encodedBytes.append(new BytesRef(versionParts.buildSuffix));
102+
}
103+
// System.out.println("encoded: " + encodedBytes.toBytesRef());
104+
return new EncodedVersion(
105+
encodedBytes.toBytesRef(),
106+
true,
107+
versionParts.preRelease != null,
108+
mainVersionParts[0],
109+
mainVersionParts[1],
110+
mainVersionParts[2]
111+
);
112+
}
113+
114+
private static Integer[] prefixDigitGroupsWithLength(String input, BytesRefBuilder result) {
115+
int pos = 0;
116+
int mainVersionCounter = 0;
117+
Integer[] mainVersionComponents = new Integer[3];
118+
while (pos < input.length()) {
119+
if (Character.isDigit(input.charAt(pos))) {
120+
// found beginning of number block, so get its length
121+
int start = pos;
122+
BytesRefBuilder number = new BytesRefBuilder();
123+
while (pos < input.length() && Character.isDigit(input.charAt(pos))) {
124+
number.append((byte) input.charAt(pos));
125+
pos++;
126+
}
127+
int length = pos - start;
128+
if (length >= 128) {
129+
throw new IllegalArgumentException("Groups of digits cannot be longer than 127, but found: " + length);
130+
}
131+
result.append(NUMERIC_MARKER_BYTE); // ensure length byte does cause higher sort order comparing to other byte[]
132+
result.append((byte) (length | 0x80)); // add upper bit to mark as length
133+
result.append(number);
134+
135+
// if present, parse out three leftmost version parts
136+
if (mainVersionCounter < 3) {
137+
mainVersionComponents[mainVersionCounter] = Integer.valueOf(number.toBytesRef().utf8ToString());
138+
mainVersionCounter++;
139+
}
140+
} else {
141+
result.append((byte) input.charAt(pos));
142+
pos++;
143+
}
144+
}
145+
return mainVersionComponents;
146+
}
147+
148+
public static String decodeVersion(BytesRef version) {
149+
// System.out.println("decoding: " + version);
150+
int inputPos = version.offset;
151+
int resultPos = 0;
152+
byte[] result = new byte[version.length];
153+
while (inputPos < version.offset + version.length) {
154+
byte inputByte = version.bytes[inputPos];
155+
if (inputByte == NUMERIC_MARKER_BYTE) {
156+
// need to skip this byte
157+
inputPos++;
158+
// this should always be a length encoding, which is skipped by increasing inputPos at the end of the loop
159+
assert version.bytes[inputPos] < 0;
160+
} else if (inputByte != PRERELESE_SEPARATOR_BYTE && inputByte != NO_PRERELESE_SEPARATOR_BYTE) {
161+
result[resultPos] = inputByte;
162+
resultPos++;
163+
}
164+
inputPos++;
165+
}
166+
// System.out.println("decoded to: " + new String(result, 0, resultPos));
167+
return new String(result, 0, resultPos, StandardCharsets.UTF_8);
168+
}
169+
170+
private static boolean legalVersionString(VersionParts versionParts) {
171+
boolean legalMainVersion = LEGAL_MAIN_VERSION_SEMVER.matcher(versionParts.mainVersion).matches();
172+
boolean legalPreRelease = true;
173+
if (versionParts.preRelease != null) {
174+
legalPreRelease = LEGAL_PRERELEASE_VERSION_SEMVER.matcher(versionParts.preRelease).matches();
175+
}
176+
boolean legalBuildSuffix = true;
177+
if (versionParts.buildSuffix != null) {
178+
legalBuildSuffix = LEGAL_BUILDSUFFIX_SEMVER.matcher(versionParts.buildSuffix).matches();
179+
}
180+
return legalMainVersion && legalPreRelease && legalBuildSuffix;
181+
}
182+
183+
public static class EncodedVersion {
184+
185+
public final boolean isLegal;
186+
public final boolean isPreRelease;
187+
public final BytesRef bytesRef;
188+
public final Integer major;
189+
public final Integer minor;
190+
public final Integer patch;
191+
192+
EncodedVersion(BytesRef bytesRef, boolean isLegal, boolean isPreRelease, Integer major, Integer minor, Integer patch) {
193+
super();
194+
this.bytesRef = bytesRef;
195+
this.isLegal = isLegal;
196+
this.isPreRelease = isPreRelease;
197+
this.major = major;
198+
this.minor = minor;
199+
this.patch = patch;
200+
}
201+
}
202+
203+
private static class VersionParts {
204+
final String mainVersion;
205+
final String preRelease;
206+
final String buildSuffix;
207+
208+
private VersionParts(String mainVersion, String preRelease, String buildSuffix) {
209+
this.mainVersion = mainVersion;
210+
this.preRelease = preRelease;
211+
this.buildSuffix = buildSuffix;
212+
}
213+
214+
private static VersionParts ofVersion(String versionString) {
215+
String buildSuffix = extractSuffix(versionString, BUILD_SEPARATOR);
216+
if (buildSuffix != null) {
217+
versionString = versionString.substring(0, versionString.length() - buildSuffix.length());
218+
}
219+
220+
String preRelease = extractSuffix(versionString, PRERELESE_SEPARATOR);
221+
if (preRelease != null) {
222+
versionString = versionString.substring(0, versionString.length() - preRelease.length());
223+
}
224+
return new VersionParts(versionString, preRelease, buildSuffix);
225+
}
226+
227+
private static String extractSuffix(String input, char separator) {
228+
int start = input.indexOf(separator);
229+
return start > 0 ? input.substring(start) : null;
230+
}
231+
}
232+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
package org.elasticsearch.xpack.versionfield;
8+
9+
import org.elasticsearch.painless.spi.PainlessExtension;
10+
import org.elasticsearch.painless.spi.Whitelist;
11+
import org.elasticsearch.painless.spi.WhitelistLoader;
12+
import org.elasticsearch.script.AggregationScript;
13+
import org.elasticsearch.script.FieldScript;
14+
import org.elasticsearch.script.FilterScript;
15+
import org.elasticsearch.script.NumberSortScript;
16+
import org.elasticsearch.script.ScoreScript;
17+
import org.elasticsearch.script.ScriptContext;
18+
import org.elasticsearch.script.StringSortScript;
19+
20+
import java.util.HashMap;
21+
import java.util.List;
22+
import java.util.Map;
23+
24+
import static java.util.Collections.singletonList;
25+
26+
public class VersionFieldDocValuesExtension implements PainlessExtension {
27+
28+
private static final Whitelist WHITELIST = WhitelistLoader.loadFromResourceFiles(VersionFieldDocValuesExtension.class, "whitelist.txt");
29+
30+
@Override
31+
public Map<ScriptContext<?>, List<Whitelist>> getContextWhitelists() {
32+
Map<ScriptContext<?>, List<Whitelist>> whitelist = new HashMap<>();
33+
List<Whitelist> list = singletonList(WHITELIST);
34+
whitelist.put(AggregationScript.CONTEXT, list);
35+
whitelist.put(ScoreScript.CONTEXT, list);
36+
whitelist.put(FilterScript.CONTEXT, list);
37+
whitelist.put(FieldScript.CONTEXT, list);
38+
whitelist.put(NumberSortScript.CONTEXT, list);
39+
whitelist.put(StringSortScript.CONTEXT, list);
40+
return whitelist;
41+
}
42+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
package org.elasticsearch.xpack.versionfield;
8+
9+
import org.elasticsearch.common.settings.Settings;
10+
import org.elasticsearch.index.mapper.Mapper;
11+
import org.elasticsearch.plugins.MapperPlugin;
12+
import org.elasticsearch.plugins.Plugin;
13+
14+
import java.util.Collections;
15+
import java.util.LinkedHashMap;
16+
import java.util.Map;
17+
18+
public class VersionFieldPlugin extends Plugin implements MapperPlugin {
19+
20+
public VersionFieldPlugin(Settings settings) {}
21+
22+
@Override
23+
public Map<String, Mapper.TypeParser> getMappers() {
24+
Map<String, Mapper.TypeParser> mappers = new LinkedHashMap<>();
25+
mappers.put(VersionStringFieldMapper.CONTENT_TYPE, new VersionStringFieldMapper.TypeParser());
26+
return Collections.unmodifiableMap(mappers);
27+
}
28+
}

0 commit comments

Comments
 (0)