Docs: Drop inline callouts (#1270)

nik9000 · jbaiera · commit ee15503f1b2d · 2019-04-05T15:09:17.000-04:00
Drops the inline callouts from the docs. This is when you write `&lt;1&gt;`
anywhere but the end of a line. Asciidoctor doesn't support them and
we'd very much like to move to Asciidoctor to generate the docs because
it is being actively maintained.
diff --git a/docs/src/reference/asciidoc/core/cascading.adoc b/docs/src/reference/asciidoc/core/cascading.adoc
@@ -95,7 +95,8 @@ Simply hook, `EsTap` into the Cascading flow:
 ----
 Tap in = new Lfs(new TextDelimited(new Fields("id", "name", "url", "picture")), 
                  "/resources/artists.dat");
-Tap out = new EsTap("radio/artists" <1>, new Fields("name", "url", "picture") <2>);
+Tap out = new EsTap("radio/artists", <1>
+                    new Fields("name", "url", "picture")); <2>
 new HadoopFlowConnector().connect(in, out, new Pipe("write-to-Es")).complete();
 ----
 
@@ -132,8 +133,8 @@ One can index the data to a different resource, depending on the tuple being rea
 
 [source,java]
 ----
-Tap out = new EsTap("my-collection/{media.type}" <1>, 
-                    new Fields("name", "media.type", "year") <2>);
+Tap out = new EsTap("my-collection/{media.type}", <1>
+                    new Fields("name", "media.type", "year")); <2>
 ----
 
 <1> Resource pattern using field `media.type`
@@ -146,7 +147,7 @@ The functionality is available when dealing with raw JSON as well - in this case
 [source,js]
 ----
 {
-    "media_type":"book",<1>
+    "media_type":"book", <1>
     "title":"Harry Potter",
     "year":"2010"
 }
@@ -159,7 +160,8 @@ the `Tap` declaration can be as follows:
 ----
 props.setProperty("es.input.json", "true");                                     
 Tap in = new Lfs(new TextLine(new Fields("line")),"/archives/collection.json");
-Tap out = new EsTap("my-collection/{media_type}" <1>, new Fields("line") <2>);
+Tap out = new EsTap("my-collection/{media_type}", <1>
+                    new Fields("line")); <2>
 ----
 
 <1> Resource pattern relying on fields _within_ the JSON document and _not_ on the `Tap` schema
@@ -172,7 +174,8 @@ Just the same, add `EsTap` on the other end of a pipe, to read (instead of writi
 
 [source,java]
 ----
-Tap in = new EsTap("radio/artists/"<1>,"?q=me*"<2>);
+Tap in = new EsTap("radio/artists/", <1>
+                   "?q=me*"); <2>
 Tap out = new StdOut(new TextLine());
 new LocalFlowConnector().connect(in, out, new Pipe("read-from-Es")).complete();
 ----
diff --git a/docs/src/reference/asciidoc/core/hive.adoc b/docs/src/reference/asciidoc/core/hive.adoc
@@ -56,7 +56,7 @@ When using Hive, one can use `TBLPROPERTIES` to specify the <<configuration,conf
 CREATE EXTERNAL TABLE artists (...)
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
 TBLPROPERTIES('es.resource' = 'radio/artists',
-              'es.index.auto.create' = 'false') <1>;
+              'es.index.auto.create' = 'false'); <1>
 ----
 
 <1> {eh} setting
@@ -75,12 +75,10 @@ To wit:
 CREATE EXTERNAL TABLE artists (...)
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
 TBLPROPERTIES('es.resource' = 'radio/artists',
-            <1>'es.mapping.names' = 'date:@timestamp <2>, url:url_123 <3>');
+            'es.mapping.names' = 'date:@timestamp, url:url_123'); <1>
 ----
 
-<1> name mapping for two fields
-<2> Hive column `date` mapped in {es} to `@timestamp`
-<3> Hive column `url` mapped in {es} to `url_123`
+<1> Hive column `date` mapped in {es} to `@timestamp`; Hive column `url` mapped in {es} to `url_123`
 
 TIP: {es} accepts only lower-case field name and, as such, {eh} will always convert Hive column names to lower-case. This poses no issue as Hive is **case insensitive**
 however it is recommended to use the default Hive style and use upper-case names only for Hive commands and avoid mixed-case names.
@@ -97,7 +95,7 @@ CREATE EXTERNAL TABLE artists (
     name    STRING,
     links   STRUCT<url:STRING, picture:STRING>)
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'<1>
-TBLPROPERTIES('es.resource' = 'radio/artists'<2>);
+TBLPROPERTIES('es.resource' = 'radio/artists'); <2>
 
 -- insert data to Elasticsearch from another table called 'source'
 INSERT OVERWRITE TABLE artists 
@@ -136,10 +134,10 @@ IMPORTANT: Make sure the data is properly encoded, in `UTF-8`. The field content
 
 [source,java]
 ----
-CREATE EXTERNAL TABLE json (data STRING<1>)
+CREATE EXTERNAL TABLE json (data STRING) <1>
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
 TBLPROPERTIES('es.resource' = '...',
-              'es.input.json` = 'yes'<2>);
+              'es.input.json` = 'yes'); <2>
 ...
 ----
 
@@ -158,7 +156,7 @@ CREATE EXTERNAL TABLE media (
     type    STRING,<1>
     year    STRING,
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
-TBLPROPERTIES('es.resource' = 'my-collection/{type}'<2>);
+TBLPROPERTIES('es.resource' = 'my-collection/{type}'); <2>
 ----
 
 <1> Table field used by the resource pattern. Any of the declared fields can be used.
@@ -183,9 +181,9 @@ the table declaration can be as follows:
 
 [source,sql]
 ----
-CREATE EXTERNAL TABLE json (data STRING<1>)
+CREATE EXTERNAL TABLE json (data STRING) <1>
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
-TBLPROPERTIES('es.resource' = 'my-collection/{media_type}'<2>,
+TBLPROPERTIES('es.resource' = 'my-collection/{media_type}', <2>
               'es.input.json` = 'yes');
 ----
 
@@ -204,7 +202,8 @@ CREATE EXTERNAL TABLE artists (
     name    STRING,
     links   STRUCT<url:STRING, picture:STRING>)
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'<1>
-TBLPROPERTIES('es.resource' = 'radio/artists'<2>, 'es.query' = '?q=me*'<3>);
+TBLPROPERTIES('es.resource' = 'radio/artists', <2>
+              'es.query' = '?q=me*');          <3>
 
 -- stream data from Elasticsearch
 SELECT * FROM artists;
diff --git a/docs/src/reference/asciidoc/core/pig.adoc b/docs/src/reference/asciidoc/core/pig.adoc
@@ -18,7 +18,7 @@ In order to use {eh}, its jar needs to be in Pig's classpath. There are various
 REGISTER /path/elasticsearch-hadoop.jar;
 ----
 
-NOTE: the command expects a proper URI that can be found either on the local file-system or remotely. Typically it's best to use a distributed file-system (like HDFS or Amazon S3) and use that since the script might be executed
+NOTE: The command expects a proper URI that can be found either on the local file-system or remotely. Typically it's best to use a distributed file-system (like HDFS or Amazon S3) and use that since the script might be executed
 on various machines.
 
 As an alternative, when using the command-line, one can register additional jars through the `-Dpig.additional.jars` option (that accepts an URI as well):
@@ -44,9 +44,10 @@ With Pig, one can specify the <<configuration,configuration>> properties (as an
 
 [source,sql]
 ----
-STORE B INTO 'radio/artists'<1> USING org.elasticsearch.hadoop.pig.EsStorage
-             ('es.http.timeout = 5m<2>',
-              'es.index.auto.create = false' <3>);
+STORE B INTO 'radio/artists' <1>
+       USING org.elasticsearch.hadoop.pig.EsStorage
+             ('es.http.timeout = 5m', <2>
+              'es.index.auto.create = false'); <3>
 ----
 
 <1> {eh} configuration (target resource)
@@ -163,12 +164,10 @@ For example:
 [source,sql]
 ----
 STORE B INTO  '...' USING org.elasticsearch.hadoop.pig.EsStorage(
-	'<1>es.mapping.names=date:@timestamp<2>, uRL:url<3>')
+	'es.mapping.names=date:@timestamp, uRL:url')         <1>
 ----
 
-<1> name mapping for two fields
-<2> Pig column `date` mapped in {es} to `@timestamp`
-<3> Pig column `url` mapped in {es} to `url_123`
+<1> Pig column `date` mapped in {es} to `@timestamp`; Pig column `uRL` mapped in {es} to `url`
 
 TIP: {es} accepts only lower-case field name and, as such, {eh} will always convert Pig column names to lower-case. Because Pig is **case sensitive**, {eh} handles the reverse
 field mapping as well. It is recommended to use the default Pig style and use upper-case names only for commands and avoid mixed-case names.
@@ -186,11 +185,13 @@ A = LOAD 'src/test/resources/artists.dat' USING PigStorage()
 -- transform data
 B = FOREACH A GENERATE name, TOTUPLE(url, picture) AS links;
 -- save the result to Elasticsearch
-STORE B INTO 'radio/artists'<1> USING org.elasticsearch.hadoop.pig.EsStorage(<2>);
+STORE B INTO 'radio/artists'<1>
+       USING org.elasticsearch.hadoop.pig.EsStorage(); <2>
 ----
 
 <1> {es} resource (index and type) associated with the given storage
-<2> additional configuration parameters can be passed here - in this case the defaults are used
+<2> additional configuration parameters can be passed inside the `()` - in this
+case the defaults are used
 
 [float]
 ==== Writing existing JSON to {es}
@@ -213,9 +214,9 @@ IMPORTANT: Make sure the data is properly encoded, in `UTF-8`. The field content
 
 [source,sql]
 ----
-A = LOAD '/resources/artists.json' USING PigStorage() AS (json:chararray<1>);"
+A = LOAD '/resources/artists.json' USING PigStorage() AS (json:chararray);" <1>
 STORE B INTO 'radio/artists' 
-    USING org.elasticsearch.hadoop.pig.EsStorage('es.input.json=true'<2>...);
+    USING org.elasticsearch.hadoop.pig.EsStorage('es.input.json=true'...); <2>
 ----
 
 <1> Load the (JSON) data as a single field (`json`)
@@ -229,8 +230,9 @@ One can index the data to a different resource, depending on the 'row' being rea
 [source,sql]
 ----
 A = LOAD 'src/test/resources/media.dat' USING PigStorage()
-            AS (name:chararray, type:chararray <1>, year: chararray);
-STORE B INTO 'my-collection/{type}'<2> USING org.elasticsearch.hadoop.pig.EsStorage();
+            AS (name:chararray, type:chararray, year: chararray); <1>
+STORE B INTO 'my-collection/{type}' <2>
+       USING org.elasticsearch.hadoop.pig.EsStorage();
 ----
 
 <1> Tuple field used by the resource pattern. Any of the declared fields can be used.
@@ -256,8 +258,8 @@ the table declaration can be as follows:
 
 [source,sql]
 ----
-A = LOAD '/resources/media.json' USING PigStorage() AS (json:chararray<1>);"
-STORE B INTO 'my-collection/{media_type}'<2>
+A = LOAD '/resources/media.json' USING PigStorage() AS (json:chararray);" <1>
+STORE B INTO 'my-collection/{media_type}' <2>
     USING org.elasticsearch.hadoop.pig.EsStorage('es.input.json=true');
 ----
 
@@ -272,15 +274,15 @@ As you would expect, loading the data is straight forward:
 [source,sql]
 ----
 -- execute Elasticsearch query and load data into Pig
-A = LOAD 'radio/artists'<1> 
-    USING org.elasticsearch.hadoop.pig.EsStorage('es.query=?me*'<2>);
+A = LOAD 'radio/artists' <1>
+    USING org.elasticsearch.hadoop.pig.EsStorage('es.query=?me*'); <2>
 DUMP A;
 ----
 
 <1> {es} resource
 <2> search query to execute
 
-IMPORTANT: Due to a https://issues.apache.org/jira/browse/PIG-3646[bug] in Pig, +LoadFunc+tions are not aware of any schema associated with them. This means +EsStorage+ is forced to fully the documents 
+IMPORTANT: Due to a https://issues.apache.org/jira/browse/PIG-3646[bug] in Pig, +LoadFunctions+ are not aware of any schema associated with them. This means +EsStorage+ is forced to fully parse the documents
 from Elasticsearch before passing the data to Pig for projection. In practice, this has little impact as long as a document top-level fields are used; for nested fields consider extracting the values
 yourself in Pig.
 
@@ -303,7 +305,7 @@ Pig internally uses native java types for most of its types and {eh} abides to t
 | `double`          | `double`
 | `float`           | `float`
 | `bytearray`       | `binary`
-| `tuple`           | `array` or `map` (depending on <<tuple-names,this>> settings)
+| `tuple`           | `array` or `map` (depending on <<tuple-names,this>> setting)
 | `bag`             | `array`
 | `map`             | `map`
 
diff --git a/docs/src/reference/asciidoc/core/spark.adoc b/docs/src/reference/asciidoc/core/spark.adoc
@@ -28,8 +28,8 @@ The good news is, one can easily enable a different serialization (https://githu
 SparkConf sc = new SparkConf(); //.setMaster("local");
 sc.set("spark.serializer", KryoSerializer.class.getName()); <1>
 
-// needed only when using the Java API      
-JavaSparkContext jsc = new JavaSparkContext(sc);    
+// needed only when using the Java API
+JavaSparkContext jsc = new JavaSparkContext(sc);
 ----
 
 <1> Enable the Kryo serialization support with Spark
@@ -60,15 +60,15 @@ JobConf conf = new JobConf();                   <1>
 conf.set("es.resource", "radio/artists");       <2>
 conf.set("es.query", "?q=me*");                 <3>
 
-JavaPairRDD esRDD = jsc.hadoopRDD(conf, EsInputFormat.class, 
+JavaPairRDD esRDD = jsc.hadoopRDD(conf, EsInputFormat.class,
                                         Text.class, MapWritable.class); <4>
 long docCount = esRDD.count();
 ----
 
 <1> Create the Hadoop object (use the old API)
 <2> Configure the source (index)
 <3> Setup the query (optional)
-<4> Create a Spark RDD on top of {es} through `EsInputFormat` - the key represent the doc id, the value the doc itself
+<4> Create a Spark +RDD+ on top of {es} through `EsInputFormat` - the key represent the doc id, the value the doc itself
 
 The Scala version is below:
 
@@ -85,7 +85,7 @@ val docCount = esRDD.count();
 <1> Create the Hadoop object (use the old API)
 <2> Configure the source (index)
 <3> Setup the query (optional)
-<4> Create a Spark RDD on top of {es} through `EsInputFormat`
+<4> Create a Spark +RDD+ on top of {es} through `EsInputFormat`
 
 [float]
 ==== 'New' (`org.apache.hadoop.mapreduce`) API
@@ -98,15 +98,15 @@ Configuration conf = new Configuration();                   <1>
 conf.set("es.resource", "radio/artists");       <2>
 conf.set("es.query", "?q=me*");                 <3>
 
-JavaPairRDD esRDD = jsc.newAPIHadoopRDD(conf, EsInputFormat.class, 
+JavaPairRDD esRDD = jsc.newAPIHadoopRDD(conf, EsInputFormat.class,
                                               Text.class, MapWritable.class); <4>
 long docCount = esRDD.count();
 ----
 
 <1> Create the Hadoop object (use the new API)
 <2> Configure the source (index)
 <3> Setup the query (optional)
-<4> Create a Spark RDD on top of {es} through `EsInputFormat` - the key represent the doc id, the value the doc itself
+<4> Create a Spark +RDD+ on top of {es} through `EsInputFormat` - the key represent the doc id, the value the doc itself
 
 The Scala version is below:
 
diff --git a/docs/src/reference/asciidoc/intro/download.adoc b/docs/src/reference/asciidoc/intro/download.adoc
@@ -27,7 +27,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
 ----
 <dependency>
   <groupId>org.elasticsearch</groupId>
-  <artifactId>elasticsearch-hadoop-mr<1></artifactId>
+  <artifactId>elasticsearch-hadoop-mr</artifactId> <1>
   <version>{ver}</version>
 </dependency>
 ----
@@ -39,7 +39,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
 ----
 <dependency>
   <groupId>org.elasticsearch</groupId>
-  <artifactId>elasticsearch-hadoop-hive<1></artifactId>
+  <artifactId>elasticsearch-hadoop-hive</artifactId> <1>
   <version>{ver}</version>
 </dependency>
 ----
@@ -51,7 +51,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
 ----
 <dependency>
   <groupId>org.elasticsearch</groupId>
-  <artifactId>elasticsearch-hadoop-pig<1></artifactId>
+  <artifactId>elasticsearch-hadoop-pig</artifactId> <1>
   <version>{ver}</version>
 </dependency>
 ----
@@ -63,7 +63,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
 ----
 <dependency>
   <groupId>org.elasticsearch</groupId>
-  <artifactId>elasticsearch-hadoop-cascading<1></artifactId>
+  <artifactId>elasticsearch-hadoop-cascading</artifactId> <1>
   <version>{ver}</version>
 </dependency>
 ----