Skip to content

Commit f491af6

Browse files
author
Andrew Stucki
authored
Registered domain processor (#67611)
1 parent d84824f commit f491af6

11 files changed

+1221
-11
lines changed

modules/ingest-common/build.gradle

+6-10
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ dependencies {
1919
compileOnly project(':modules:lang-painless')
2020
api project(':libs:elasticsearch-grok')
2121
api project(':libs:elasticsearch-dissect')
22+
implementation "org.apache.httpcomponents:httpclient:${versions.httpclient}"
23+
implementation "org.apache.httpcomponents:httpcore:${versions.httpcore}"
2224
}
2325

2426
restResources {
@@ -35,16 +37,10 @@ testClusters.all {
3537

3638
tasks.named("thirdPartyAudit").configure {
3739
ignoreMissingClasses(
38-
// from log4j
39-
'org.osgi.framework.AdaptPermission',
40-
'org.osgi.framework.AdminPermission',
41-
'org.osgi.framework.Bundle',
42-
'org.osgi.framework.BundleActivator',
43-
'org.osgi.framework.BundleContext',
44-
'org.osgi.framework.BundleEvent',
45-
'org.osgi.framework.SynchronousBundleListener',
46-
'org.osgi.framework.wiring.BundleWire',
47-
'org.osgi.framework.wiring.BundleWiring'
40+
//commons-logging
41+
'org.apache.commons.codec.binary.Base64',
42+
'org.apache.commons.logging.Log',
43+
'org.apache.commons.logging.LogFactory',
4844
)
4945
}
5046

Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
7ca2e4276f4ef95e4db725a8cd4a1d1e7585b9e5

modules/ingest-common/licenses/httpclient-LICENSE.txt

+558
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Apache HttpComponents Client
2+
Copyright 1999-2016 The Apache Software Foundation
3+
4+
This product includes software developed at
5+
The Apache Software Foundation (http://www.apache.org/).
6+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
21ebaf6d532bc350ba95bd81938fa5f0e511c132

modules/ingest-common/licenses/httpcore-LICENSE.txt

+241
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Apache HttpComponents Core
2+
Copyright 2005-2014 The Apache Software Foundation
3+
4+
This product includes software developed at
5+
The Apache Software Foundation (http://www.apache.org/).
6+
7+
This project contains annotations derived from JCIP-ANNOTATIONS
8+
Copyright (c) 2005 Brian Goetz and Tim Peierls. See http://www.jcip.net

modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ public Map<String, Processor.Factory> getProcessors(Processor.Parameters paramet
8181
entry(UriPartsProcessor.TYPE, new UriPartsProcessor.Factory()),
8282
entry(NetworkDirectionProcessor.TYPE, new NetworkDirectionProcessor.Factory()),
8383
entry(CommunityIdProcessor.TYPE, new CommunityIdProcessor.Factory()),
84-
entry(FingerprintProcessor.TYPE, new FingerprintProcessor.Factory())
84+
entry(FingerprintProcessor.TYPE, new FingerprintProcessor.Factory()),
85+
entry(RegisteredDomainProcessor.TYPE, new RegisteredDomainProcessor.Factory())
8586
);
8687
}
8788

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.ingest.common;
10+
11+
import org.apache.http.conn.util.PublicSuffixMatcher;
12+
import org.apache.http.conn.util.PublicSuffixMatcherLoader;
13+
import org.elasticsearch.ingest.AbstractProcessor;
14+
import org.elasticsearch.ingest.ConfigurationUtils;
15+
import org.elasticsearch.ingest.IngestDocument;
16+
import org.elasticsearch.ingest.Processor;
17+
18+
import java.util.Map;
19+
20+
public class RegisteredDomainProcessor extends AbstractProcessor {
21+
private static final PublicSuffixMatcher SUFFIX_MATCHER = PublicSuffixMatcherLoader.getDefault();
22+
23+
public static final String TYPE = "registered_domain";
24+
25+
private final String field;
26+
private final String targetField;
27+
private final boolean ignoreMissing;
28+
29+
RegisteredDomainProcessor(
30+
String tag,
31+
String description,
32+
String field,
33+
String targetField,
34+
boolean ignoreMissing
35+
) {
36+
super(tag, description);
37+
this.field = field;
38+
this.targetField = targetField;
39+
this.ignoreMissing = ignoreMissing;
40+
}
41+
42+
public String getField() {
43+
return field;
44+
}
45+
46+
public String getTargetField() {
47+
return targetField;
48+
}
49+
50+
public boolean getIgnoreMissing() {
51+
return ignoreMissing;
52+
}
53+
54+
@Override
55+
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
56+
DomainInfo info = getRegisteredDomain(ingestDocument);
57+
if (info == null) {
58+
if (ignoreMissing) {
59+
return ingestDocument;
60+
} else {
61+
throw new IllegalArgumentException("unable to set domain information for document");
62+
}
63+
}
64+
String fieldPrefix = targetField;
65+
if (fieldPrefix.equals("") == false) {
66+
fieldPrefix += ".";
67+
}
68+
String domainTarget = fieldPrefix + "domain";
69+
String registeredDomainTarget = fieldPrefix + "registered_domain";
70+
String subdomainTarget = fieldPrefix + "subdomain";
71+
String topLevelDomainTarget = fieldPrefix + "top_level_domain";
72+
73+
if (info.getDomain() != null) {
74+
ingestDocument.setFieldValue(domainTarget, info.getDomain());
75+
}
76+
if (info.getRegisteredDomain() != null) {
77+
ingestDocument.setFieldValue(registeredDomainTarget, info.getRegisteredDomain());
78+
}
79+
if (info.getETLD() != null) {
80+
ingestDocument.setFieldValue(topLevelDomainTarget, info.getETLD());
81+
}
82+
if (info.getSubdomain() != null) {
83+
ingestDocument.setFieldValue(subdomainTarget, info.getSubdomain());
84+
}
85+
return ingestDocument;
86+
}
87+
88+
private DomainInfo getRegisteredDomain(IngestDocument d) {
89+
String fieldString = d.getFieldValue(field, String.class, ignoreMissing);
90+
if (fieldString == null) {
91+
return null;
92+
}
93+
String registeredDomain = SUFFIX_MATCHER.getDomainRoot(fieldString);
94+
if (registeredDomain == null) {
95+
if (SUFFIX_MATCHER.matches(fieldString)) {
96+
return new DomainInfo(fieldString);
97+
}
98+
return null;
99+
}
100+
if (registeredDomain.indexOf(".") == -1) {
101+
// we have domain with no matching public suffix, but "." in it
102+
return null;
103+
}
104+
return new DomainInfo(registeredDomain, fieldString);
105+
}
106+
107+
@Override
108+
public String getType() {
109+
return TYPE;
110+
}
111+
112+
private class DomainInfo {
113+
private final String domain;
114+
private final String registeredDomain;
115+
private final String eTLD;
116+
private final String subdomain;
117+
118+
private DomainInfo(String eTLD) {
119+
this.domain = eTLD;
120+
this.eTLD = eTLD;
121+
this.registeredDomain = null;
122+
this.subdomain = null;
123+
}
124+
125+
private DomainInfo(String registeredDomain, String domain) {
126+
int index = registeredDomain.indexOf(".") + 1;
127+
if (index > 0 && index < registeredDomain.length()) {
128+
this.domain = domain;
129+
this.eTLD = registeredDomain.substring(index);
130+
this.registeredDomain = registeredDomain;
131+
int subdomainIndex = domain.lastIndexOf("." + registeredDomain);
132+
if (subdomainIndex > 0) {
133+
this.subdomain = domain.substring(0, subdomainIndex);
134+
} else {
135+
this.subdomain = null;
136+
}
137+
} else {
138+
this.domain = null;
139+
this.eTLD = null;
140+
this.registeredDomain = null;
141+
this.subdomain = null;
142+
}
143+
}
144+
145+
public String getDomain() {
146+
return domain;
147+
}
148+
149+
public String getSubdomain() {
150+
return subdomain;
151+
}
152+
153+
public String getRegisteredDomain() {
154+
return registeredDomain;
155+
}
156+
157+
public String getETLD() {
158+
return eTLD;
159+
}
160+
}
161+
162+
public static final class Factory implements Processor.Factory {
163+
164+
static final String DEFAULT_TARGET_FIELD = "";
165+
166+
@Override
167+
public RegisteredDomainProcessor create(
168+
Map<String, Processor.Factory> registry,
169+
String processorTag,
170+
String description,
171+
Map<String, Object> config
172+
) throws Exception {
173+
String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field");
174+
String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", DEFAULT_TARGET_FIELD);
175+
boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", true);
176+
177+
return new RegisteredDomainProcessor(
178+
processorTag,
179+
description,
180+
field,
181+
targetField,
182+
ignoreMissing
183+
);
184+
}
185+
}
186+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.ingest.common;
10+
11+
import org.elasticsearch.ElasticsearchParseException;
12+
import org.elasticsearch.test.ESTestCase;
13+
import org.junit.Before;
14+
15+
import java.util.HashMap;
16+
import java.util.Map;
17+
18+
import static org.hamcrest.CoreMatchers.equalTo;
19+
20+
public class RegisteredDomainProcessorFactoryTests extends ESTestCase {
21+
22+
private RegisteredDomainProcessor.Factory factory;
23+
24+
@Before
25+
public void init() {
26+
factory = new RegisteredDomainProcessor.Factory();
27+
}
28+
29+
public void testCreate() throws Exception {
30+
Map<String, Object> config = new HashMap<>();
31+
32+
String field = randomAlphaOfLength(6);
33+
config.put("field", field);
34+
String targetField = randomAlphaOfLength(6);
35+
config.put("target_field", targetField);
36+
boolean ignoreMissing = randomBoolean();
37+
config.put("ignore_missing", ignoreMissing);
38+
39+
String processorTag = randomAlphaOfLength(10);
40+
RegisteredDomainProcessor publicSuffixProcessor = factory.create(null, processorTag, null, config);
41+
assertThat(publicSuffixProcessor.getTag(), equalTo(processorTag));
42+
assertThat(publicSuffixProcessor.getTargetField(), equalTo(targetField));
43+
assertThat(publicSuffixProcessor.getIgnoreMissing(), equalTo(ignoreMissing));
44+
}
45+
46+
public void testCreateDefaults() throws Exception {
47+
Map<String, Object> config = new HashMap<>();
48+
49+
String field = randomAlphaOfLength(6);
50+
config.put("field", field);
51+
52+
String processorTag = randomAlphaOfLength(10);
53+
RegisteredDomainProcessor publicSuffixProcessor = factory.create(null, processorTag, null, config);
54+
assertThat(publicSuffixProcessor.getTargetField(), equalTo(RegisteredDomainProcessor.Factory.DEFAULT_TARGET_FIELD));
55+
}
56+
57+
58+
public void testFieldRequired() throws Exception {
59+
HashMap<String, Object> config = new HashMap<>();
60+
String processorTag = randomAlphaOfLength(10);
61+
try {
62+
factory.create(null, processorTag, null, config);
63+
fail("factory create should have failed");
64+
} catch (ElasticsearchParseException e) {
65+
assertThat(e.getMessage(), equalTo("[field] required property is missing"));
66+
}
67+
}
68+
}

0 commit comments

Comments
 (0)