Skip to content

Commit ee15503

Browse files
nik9000jbaiera
authored andcommitted
Docs: Drop inline callouts (#1270)
Drops the inline callouts from the docs. This is when you write `<1>` anywhere but the end of a line. Asciidoctor doesn't support them and we'd very much like to move to Asciidoctor to generate the docs because it is being actively maintained.
1 parent a939a9a commit ee15503

File tree

5 files changed

+53
-49
lines changed

5 files changed

+53
-49
lines changed

docs/src/reference/asciidoc/core/cascading.adoc

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ Simply hook, `EsTap` into the Cascading flow:
9595
----
9696
Tap in = new Lfs(new TextDelimited(new Fields("id", "name", "url", "picture")),
9797
"/resources/artists.dat");
98-
Tap out = new EsTap("radio/artists" <1>, new Fields("name", "url", "picture") <2>);
98+
Tap out = new EsTap("radio/artists", <1>
99+
new Fields("name", "url", "picture")); <2>
99100
new HadoopFlowConnector().connect(in, out, new Pipe("write-to-Es")).complete();
100101
----
101102

@@ -132,8 +133,8 @@ One can index the data to a different resource, depending on the tuple being rea
132133

133134
[source,java]
134135
----
135-
Tap out = new EsTap("my-collection/{media.type}" <1>,
136-
new Fields("name", "media.type", "year") <2>);
136+
Tap out = new EsTap("my-collection/{media.type}", <1>
137+
new Fields("name", "media.type", "year")); <2>
137138
----
138139

139140
<1> Resource pattern using field `media.type`
@@ -146,7 +147,7 @@ The functionality is available when dealing with raw JSON as well - in this case
146147
[source,js]
147148
----
148149
{
149-
"media_type":"book",<1>
150+
"media_type":"book", <1>
150151
"title":"Harry Potter",
151152
"year":"2010"
152153
}
@@ -159,7 +160,8 @@ the `Tap` declaration can be as follows:
159160
----
160161
props.setProperty("es.input.json", "true");
161162
Tap in = new Lfs(new TextLine(new Fields("line")),"/archives/collection.json");
162-
Tap out = new EsTap("my-collection/{media_type}" <1>, new Fields("line") <2>);
163+
Tap out = new EsTap("my-collection/{media_type}", <1>
164+
new Fields("line")); <2>
163165
----
164166

165167
<1> Resource pattern relying on fields _within_ the JSON document and _not_ on the `Tap` schema
@@ -172,7 +174,8 @@ Just the same, add `EsTap` on the other end of a pipe, to read (instead of writi
172174

173175
[source,java]
174176
----
175-
Tap in = new EsTap("radio/artists/"<1>,"?q=me*"<2>);
177+
Tap in = new EsTap("radio/artists/", <1>
178+
"?q=me*"); <2>
176179
Tap out = new StdOut(new TextLine());
177180
new LocalFlowConnector().connect(in, out, new Pipe("read-from-Es")).complete();
178181
----

docs/src/reference/asciidoc/core/hive.adoc

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ When using Hive, one can use `TBLPROPERTIES` to specify the <<configuration,conf
5656
CREATE EXTERNAL TABLE artists (...)
5757
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
5858
TBLPROPERTIES('es.resource' = 'radio/artists',
59-
'es.index.auto.create' = 'false') <1>;
59+
'es.index.auto.create' = 'false'); <1>
6060
----
6161

6262
<1> {eh} setting
@@ -75,12 +75,10 @@ To wit:
7575
CREATE EXTERNAL TABLE artists (...)
7676
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
7777
TBLPROPERTIES('es.resource' = 'radio/artists',
78-
<1>'es.mapping.names' = 'date:@timestamp <2>, url:url_123 <3>');
78+
'es.mapping.names' = 'date:@timestamp, url:url_123'); <1>
7979
----
8080

81-
<1> name mapping for two fields
82-
<2> Hive column `date` mapped in {es} to `@timestamp`
83-
<3> Hive column `url` mapped in {es} to `url_123`
81+
<1> Hive column `date` mapped in {es} to `@timestamp`; Hive column `url` mapped in {es} to `url_123`
8482

8583
TIP: {es} accepts only lower-case field name and, as such, {eh} will always convert Hive column names to lower-case. This poses no issue as Hive is **case insensitive**
8684
however it is recommended to use the default Hive style and use upper-case names only for Hive commands and avoid mixed-case names.
@@ -97,7 +95,7 @@ CREATE EXTERNAL TABLE artists (
9795
name STRING,
9896
links STRUCT<url:STRING, picture:STRING>)
9997
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'<1>
100-
TBLPROPERTIES('es.resource' = 'radio/artists'<2>);
98+
TBLPROPERTIES('es.resource' = 'radio/artists'); <2>
10199
102100
-- insert data to Elasticsearch from another table called 'source'
103101
INSERT OVERWRITE TABLE artists
@@ -136,10 +134,10 @@ IMPORTANT: Make sure the data is properly encoded, in `UTF-8`. The field content
136134

137135
[source,java]
138136
----
139-
CREATE EXTERNAL TABLE json (data STRING<1>)
137+
CREATE EXTERNAL TABLE json (data STRING) <1>
140138
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
141139
TBLPROPERTIES('es.resource' = '...',
142-
'es.input.json` = 'yes'<2>);
140+
'es.input.json` = 'yes'); <2>
143141
...
144142
----
145143

@@ -158,7 +156,7 @@ CREATE EXTERNAL TABLE media (
158156
type STRING,<1>
159157
year STRING,
160158
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
161-
TBLPROPERTIES('es.resource' = 'my-collection/{type}'<2>);
159+
TBLPROPERTIES('es.resource' = 'my-collection/{type}'); <2>
162160
----
163161

164162
<1> Table field used by the resource pattern. Any of the declared fields can be used.
@@ -183,9 +181,9 @@ the table declaration can be as follows:
183181

184182
[source,sql]
185183
----
186-
CREATE EXTERNAL TABLE json (data STRING<1>)
184+
CREATE EXTERNAL TABLE json (data STRING) <1>
187185
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
188-
TBLPROPERTIES('es.resource' = 'my-collection/{media_type}'<2>,
186+
TBLPROPERTIES('es.resource' = 'my-collection/{media_type}', <2>
189187
'es.input.json` = 'yes');
190188
----
191189

@@ -204,7 +202,8 @@ CREATE EXTERNAL TABLE artists (
204202
name STRING,
205203
links STRUCT<url:STRING, picture:STRING>)
206204
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'<1>
207-
TBLPROPERTIES('es.resource' = 'radio/artists'<2>, 'es.query' = '?q=me*'<3>);
205+
TBLPROPERTIES('es.resource' = 'radio/artists', <2>
206+
'es.query' = '?q=me*'); <3>
208207
209208
-- stream data from Elasticsearch
210209
SELECT * FROM artists;

docs/src/reference/asciidoc/core/pig.adoc

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ In order to use {eh}, its jar needs to be in Pig's classpath. There are various
1818
REGISTER /path/elasticsearch-hadoop.jar;
1919
----
2020

21-
NOTE: the command expects a proper URI that can be found either on the local file-system or remotely. Typically it's best to use a distributed file-system (like HDFS or Amazon S3) and use that since the script might be executed
21+
NOTE: The command expects a proper URI that can be found either on the local file-system or remotely. Typically it's best to use a distributed file-system (like HDFS or Amazon S3) and use that since the script might be executed
2222
on various machines.
2323

2424
As an alternative, when using the command-line, one can register additional jars through the `-Dpig.additional.jars` option (that accepts an URI as well):
@@ -44,9 +44,10 @@ With Pig, one can specify the <<configuration,configuration>> properties (as an
4444

4545
[source,sql]
4646
----
47-
STORE B INTO 'radio/artists'<1> USING org.elasticsearch.hadoop.pig.EsStorage
48-
('es.http.timeout = 5m<2>',
49-
'es.index.auto.create = false' <3>);
47+
STORE B INTO 'radio/artists' <1>
48+
USING org.elasticsearch.hadoop.pig.EsStorage
49+
('es.http.timeout = 5m', <2>
50+
'es.index.auto.create = false'); <3>
5051
----
5152

5253
<1> {eh} configuration (target resource)
@@ -163,12 +164,10 @@ For example:
163164
[source,sql]
164165
----
165166
STORE B INTO '...' USING org.elasticsearch.hadoop.pig.EsStorage(
166-
'<1>es.mapping.names=date:@timestamp<2>, uRL:url<3>')
167+
'es.mapping.names=date:@timestamp, uRL:url') <1>
167168
----
168169

169-
<1> name mapping for two fields
170-
<2> Pig column `date` mapped in {es} to `@timestamp`
171-
<3> Pig column `url` mapped in {es} to `url_123`
170+
<1> Pig column `date` mapped in {es} to `@timestamp`; Pig column `uRL` mapped in {es} to `url`
172171

173172
TIP: {es} accepts only lower-case field name and, as such, {eh} will always convert Pig column names to lower-case. Because Pig is **case sensitive**, {eh} handles the reverse
174173
field mapping as well. It is recommended to use the default Pig style and use upper-case names only for commands and avoid mixed-case names.
@@ -186,11 +185,13 @@ A = LOAD 'src/test/resources/artists.dat' USING PigStorage()
186185
-- transform data
187186
B = FOREACH A GENERATE name, TOTUPLE(url, picture) AS links;
188187
-- save the result to Elasticsearch
189-
STORE B INTO 'radio/artists'<1> USING org.elasticsearch.hadoop.pig.EsStorage(<2>);
188+
STORE B INTO 'radio/artists'<1>
189+
USING org.elasticsearch.hadoop.pig.EsStorage(); <2>
190190
----
191191

192192
<1> {es} resource (index and type) associated with the given storage
193-
<2> additional configuration parameters can be passed here - in this case the defaults are used
193+
<2> additional configuration parameters can be passed inside the `()` - in this
194+
case the defaults are used
194195

195196
[float]
196197
==== Writing existing JSON to {es}
@@ -213,9 +214,9 @@ IMPORTANT: Make sure the data is properly encoded, in `UTF-8`. The field content
213214

214215
[source,sql]
215216
----
216-
A = LOAD '/resources/artists.json' USING PigStorage() AS (json:chararray<1>);"
217+
A = LOAD '/resources/artists.json' USING PigStorage() AS (json:chararray);" <1>
217218
STORE B INTO 'radio/artists'
218-
USING org.elasticsearch.hadoop.pig.EsStorage('es.input.json=true'<2>...);
219+
USING org.elasticsearch.hadoop.pig.EsStorage('es.input.json=true'...); <2>
219220
----
220221

221222
<1> Load the (JSON) data as a single field (`json`)
@@ -229,8 +230,9 @@ One can index the data to a different resource, depending on the 'row' being rea
229230
[source,sql]
230231
----
231232
A = LOAD 'src/test/resources/media.dat' USING PigStorage()
232-
AS (name:chararray, type:chararray <1>, year: chararray);
233-
STORE B INTO 'my-collection/{type}'<2> USING org.elasticsearch.hadoop.pig.EsStorage();
233+
AS (name:chararray, type:chararray, year: chararray); <1>
234+
STORE B INTO 'my-collection/{type}' <2>
235+
USING org.elasticsearch.hadoop.pig.EsStorage();
234236
----
235237

236238
<1> Tuple field used by the resource pattern. Any of the declared fields can be used.
@@ -256,8 +258,8 @@ the table declaration can be as follows:
256258

257259
[source,sql]
258260
----
259-
A = LOAD '/resources/media.json' USING PigStorage() AS (json:chararray<1>);"
260-
STORE B INTO 'my-collection/{media_type}'<2>
261+
A = LOAD '/resources/media.json' USING PigStorage() AS (json:chararray);" <1>
262+
STORE B INTO 'my-collection/{media_type}' <2>
261263
USING org.elasticsearch.hadoop.pig.EsStorage('es.input.json=true');
262264
----
263265

@@ -272,15 +274,15 @@ As you would expect, loading the data is straight forward:
272274
[source,sql]
273275
----
274276
-- execute Elasticsearch query and load data into Pig
275-
A = LOAD 'radio/artists'<1>
276-
USING org.elasticsearch.hadoop.pig.EsStorage('es.query=?me*'<2>);
277+
A = LOAD 'radio/artists' <1>
278+
USING org.elasticsearch.hadoop.pig.EsStorage('es.query=?me*'); <2>
277279
DUMP A;
278280
----
279281

280282
<1> {es} resource
281283
<2> search query to execute
282284

283-
IMPORTANT: Due to a https://issues.apache.org/jira/browse/PIG-3646[bug] in Pig, +LoadFunc+tions are not aware of any schema associated with them. This means +EsStorage+ is forced to fully the documents
285+
IMPORTANT: Due to a https://issues.apache.org/jira/browse/PIG-3646[bug] in Pig, +LoadFunctions+ are not aware of any schema associated with them. This means +EsStorage+ is forced to fully parse the documents
284286
from Elasticsearch before passing the data to Pig for projection. In practice, this has little impact as long as a document top-level fields are used; for nested fields consider extracting the values
285287
yourself in Pig.
286288

@@ -303,7 +305,7 @@ Pig internally uses native java types for most of its types and {eh} abides to t
303305
| `double` | `double`
304306
| `float` | `float`
305307
| `bytearray` | `binary`
306-
| `tuple` | `array` or `map` (depending on <<tuple-names,this>> settings)
308+
| `tuple` | `array` or `map` (depending on <<tuple-names,this>> setting)
307309
| `bag` | `array`
308310
| `map` | `map`
309311

docs/src/reference/asciidoc/core/spark.adoc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ The good news is, one can easily enable a different serialization (https://githu
2828
SparkConf sc = new SparkConf(); //.setMaster("local");
2929
sc.set("spark.serializer", KryoSerializer.class.getName()); <1>
3030
31-
// needed only when using the Java API
32-
JavaSparkContext jsc = new JavaSparkContext(sc);
31+
// needed only when using the Java API
32+
JavaSparkContext jsc = new JavaSparkContext(sc);
3333
----
3434

3535
<1> Enable the Kryo serialization support with Spark
@@ -60,15 +60,15 @@ JobConf conf = new JobConf(); <1>
6060
conf.set("es.resource", "radio/artists"); <2>
6161
conf.set("es.query", "?q=me*"); <3>
6262
63-
JavaPairRDD esRDD = jsc.hadoopRDD(conf, EsInputFormat.class,
63+
JavaPairRDD esRDD = jsc.hadoopRDD(conf, EsInputFormat.class,
6464
Text.class, MapWritable.class); <4>
6565
long docCount = esRDD.count();
6666
----
6767

6868
<1> Create the Hadoop object (use the old API)
6969
<2> Configure the source (index)
7070
<3> Setup the query (optional)
71-
<4> Create a Spark RDD on top of {es} through `EsInputFormat` - the key represent the doc id, the value the doc itself
71+
<4> Create a Spark +RDD+ on top of {es} through `EsInputFormat` - the key represent the doc id, the value the doc itself
7272

7373
The Scala version is below:
7474

@@ -85,7 +85,7 @@ val docCount = esRDD.count();
8585
<1> Create the Hadoop object (use the old API)
8686
<2> Configure the source (index)
8787
<3> Setup the query (optional)
88-
<4> Create a Spark RDD on top of {es} through `EsInputFormat`
88+
<4> Create a Spark +RDD+ on top of {es} through `EsInputFormat`
8989

9090
[float]
9191
==== 'New' (`org.apache.hadoop.mapreduce`) API
@@ -98,15 +98,15 @@ Configuration conf = new Configuration(); <1>
9898
conf.set("es.resource", "radio/artists"); <2>
9999
conf.set("es.query", "?q=me*"); <3>
100100
101-
JavaPairRDD esRDD = jsc.newAPIHadoopRDD(conf, EsInputFormat.class,
101+
JavaPairRDD esRDD = jsc.newAPIHadoopRDD(conf, EsInputFormat.class,
102102
Text.class, MapWritable.class); <4>
103103
long docCount = esRDD.count();
104104
----
105105

106106
<1> Create the Hadoop object (use the new API)
107107
<2> Configure the source (index)
108108
<3> Setup the query (optional)
109-
<4> Create a Spark RDD on top of {es} through `EsInputFormat` - the key represent the doc id, the value the doc itself
109+
<4> Create a Spark +RDD+ on top of {es} through `EsInputFormat` - the key represent the doc id, the value the doc itself
110110

111111
The Scala version is below:
112112

docs/src/reference/asciidoc/intro/download.adoc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
2727
----
2828
<dependency>
2929
<groupId>org.elasticsearch</groupId>
30-
<artifactId>elasticsearch-hadoop-mr<1></artifactId>
30+
<artifactId>elasticsearch-hadoop-mr</artifactId> <1>
3131
<version>{ver}</version>
3232
</dependency>
3333
----
@@ -39,7 +39,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
3939
----
4040
<dependency>
4141
<groupId>org.elasticsearch</groupId>
42-
<artifactId>elasticsearch-hadoop-hive<1></artifactId>
42+
<artifactId>elasticsearch-hadoop-hive</artifactId> <1>
4343
<version>{ver}</version>
4444
</dependency>
4545
----
@@ -51,7 +51,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
5151
----
5252
<dependency>
5353
<groupId>org.elasticsearch</groupId>
54-
<artifactId>elasticsearch-hadoop-pig<1></artifactId>
54+
<artifactId>elasticsearch-hadoop-pig</artifactId> <1>
5555
<version>{ver}</version>
5656
</dependency>
5757
----
@@ -63,7 +63,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
6363
----
6464
<dependency>
6565
<groupId>org.elasticsearch</groupId>
66-
<artifactId>elasticsearch-hadoop-cascading<1></artifactId>
66+
<artifactId>elasticsearch-hadoop-cascading</artifactId> <1>
6767
<version>{ver}</version>
6868
</dependency>
6969
----

0 commit comments

Comments
 (0)