Docs: Drop inline callouts (#1270)

nik9000 · jbaiera · commit 52102ca93b57 · 2019-04-05T14:47:07.000-04:00
Drops the inline callouts from the docs. This is when you write `&lt;1&gt;`
anywhere but the end of a line. Asciidoctor doesn't support them and
we'd very much like to move to Asciidoctor to generate the docs because
it is being actively maintained.
diff --git a/docs/src/reference/asciidoc/core/cascading.adoc b/docs/src/reference/asciidoc/core/cascading.adoc
@@ -95,7 +95,8 @@ Simply hook, `EsTap` into the Cascading flow:
 ----
 Tap in = new Lfs(new TextDelimited(new Fields("id", "name", "url", "picture")), 
                  "/resources/artists.dat");
-Tap out = new EsTap("radio/artists" <1>, new Fields("name", "url", "picture") <2>);
+Tap out = new EsTap("radio/artists", <1>
+                    new Fields("name", "url", "picture")); <2>
 new HadoopFlowConnector().connect(in, out, new Pipe("write-to-Es")).complete();
 ----
 
@@ -140,8 +141,8 @@ One can index the data to a different resource, depending on the tuple being rea
 
 [source,java]
 ----
-Tap out = new EsTap("my-collection/{media.type}" <1>, 
-                    new Fields("name", "media.type", "year") <2>);
+Tap out = new EsTap("my-collection/{media.type}", <1>
+                    new Fields("name", "media.type", "year")); <2>
 ----
 
 <1> Resource pattern using field `media.type`
@@ -154,7 +155,7 @@ The functionality is available when dealing with raw JSON as well - in this case
 [source,js]
 ----
 {
-    "media_type":"book",<1>
+    "media_type":"book", <1>
     "title":"Harry Potter",
     "year":"2010"
 }
@@ -167,7 +168,8 @@ the `Tap` declaration can be as follows:
 ----
 props.setProperty("es.input.json", "true");                                     
 Tap in = new Lfs(new TextLine(new Fields("line")),"/archives/collection.json");
-Tap out = new EsTap("my-collection/{media_type}" <1>, new Fields("line") <2>);
+Tap out = new EsTap("my-collection/{media_type}", <1>
+                    new Fields("line")); <2>
 ----
 
 <1> Resource pattern relying on fields _within_ the JSON document and _not_ on the `Tap` schema
@@ -180,7 +182,8 @@ Just the same, add `EsTap` on the other end of a pipe, to read (instead of writi
 
 [source,java]
 ----
-Tap in = new EsTap("radio/artists/"<1>,"?q=me*"<2>);
+Tap in = new EsTap("radio/artists/", <1>
+                   "?q=me*"); <2>
 Tap out = new StdOut(new TextLine());
 new LocalFlowConnector().connect(in, out, new Pipe("read-from-Es")).complete();
 ----
diff --git a/docs/src/reference/asciidoc/core/hive.adoc b/docs/src/reference/asciidoc/core/hive.adoc
@@ -56,7 +56,7 @@ When using Hive, one can use `TBLPROPERTIES` to specify the <<configuration,conf
 CREATE EXTERNAL TABLE artists (...)
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
 TBLPROPERTIES('es.resource' = 'radio/artists',
-              'es.index.auto.create' = 'false') <1>;
+              'es.index.auto.create' = 'false'); <1>
 ----
 
 <1> {eh} setting
@@ -75,12 +75,10 @@ To wit:
 CREATE EXTERNAL TABLE artists (...)
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
 TBLPROPERTIES('es.resource' = 'radio/artists',
-            <1>'es.mapping.names' = 'date:@timestamp <2>, url:url_123 <3>');
+            'es.mapping.names' = 'date:@timestamp, url:url_123'); <1>
 ----
 
-<1> name mapping for two fields
-<2> Hive column `date` mapped in {es} to `@timestamp`
-<3> Hive column `url` mapped in {es} to `url_123`
+<1> Hive column `date` mapped in {es} to `@timestamp`; Hive column `url` mapped in {es} to `url_123`
 
 TIP: Hive is case **insensitive** while {es} is not. The loss of information can create invalid queries (as the column in Hive might not match the one in {es}). To avoid this, {eh} will always convert Hive column names to lower-case. 
 This being said, it is recommended to use the default Hive style and use upper-case names only for Hive commands and avoid mixed-case names.
@@ -99,7 +97,7 @@ CREATE EXTERNAL TABLE artists (
     name    STRING,
     links   STRUCT<url:STRING, picture:STRING>)
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'<1>
-TBLPROPERTIES('es.resource' = 'radio/artists'<2>);
+TBLPROPERTIES('es.resource' = 'radio/artists'); <2>
 
 -- insert data to Elasticsearch from another table called 'source'
 INSERT OVERWRITE TABLE artists 
@@ -149,10 +147,10 @@ IMPORTANT: Make sure the data is properly encoded, in `UTF-8`. The field content
 
 [source,java]
 ----
-CREATE EXTERNAL TABLE json (data STRING<1>)
+CREATE EXTERNAL TABLE json (data STRING) <1>
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
 TBLPROPERTIES('es.resource' = '...',
-              'es.input.json` = 'yes'<2>);
+              'es.input.json` = 'yes'); <2>
 ...
 ----
 
@@ -171,7 +169,7 @@ CREATE EXTERNAL TABLE media (
     type    STRING,<1>
     year    STRING,
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
-TBLPROPERTIES('es.resource' = 'my-collection/{type}'<2>);
+TBLPROPERTIES('es.resource' = 'my-collection/{type}'); <2>
 ----
 
 <1> Table field used by the resource pattern. Any of the declared fields can be used.
@@ -196,9 +194,9 @@ the table declaration can be as follows:
 
 [source,sql]
 ----
-CREATE EXTERNAL TABLE json (data STRING<1>)
+CREATE EXTERNAL TABLE json (data STRING) <1>
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
-TBLPROPERTIES('es.resource' = 'my-collection/{media_type}'<2>,
+TBLPROPERTIES('es.resource' = 'my-collection/{media_type}', <2>
               'es.input.json` = 'yes');
 ----
 
@@ -217,7 +215,8 @@ CREATE EXTERNAL TABLE artists (
     name    STRING,
     links   STRUCT<url:STRING, picture:STRING>)
 STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'<1>
-TBLPROPERTIES('es.resource' = 'radio/artists'<2>, 'es.query' = '?q=me*'<3>);
+TBLPROPERTIES('es.resource' = 'radio/artists', <2>
+              'es.query' = '?q=me*');          <3>
 
 -- stream data from Elasticsearch
 SELECT * FROM artists;
diff --git a/docs/src/reference/asciidoc/core/intro/download.adoc b/docs/src/reference/asciidoc/core/intro/download.adoc
@@ -27,7 +27,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
 ----
 <dependency>
   <groupId>org.elasticsearch</groupId>
-  <artifactId>elasticsearch-hadoop-mr<1></artifactId>
+  <artifactId>elasticsearch-hadoop-mr</artifactId> <1>
   <version>{ver}</version>
 </dependency>
 ----
@@ -39,7 +39,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
 ----
 <dependency>
   <groupId>org.elasticsearch</groupId>
-  <artifactId>elasticsearch-hadoop-hive<1></artifactId>
+  <artifactId>elasticsearch-hadoop-hive</artifactId> <1>
   <version>{ver}</version>
 </dependency>
 ----
@@ -51,7 +51,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
 ----
 <dependency>
   <groupId>org.elasticsearch</groupId>
-  <artifactId>elasticsearch-hadoop-pig<1></artifactId>
+  <artifactId>elasticsearch-hadoop-pig</artifactId> <1>
   <version>{ver}</version>
 </dependency>
 ----
@@ -63,7 +63,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
 ----
 <dependency>
   <groupId>org.elasticsearch</groupId>
-  <artifactId>elasticsearch-spark_2.10<1></artifactId>
+  <artifactId>elasticsearch-spark_2.10</artifactId> <1>
   <version>{ver}</version>
 </dependency>
 ----
@@ -75,7 +75,7 @@ These are available under the same `groupId`, using an `artifactId` with the pat
 ----
 <dependency>
   <groupId>org.elasticsearch</groupId>
-  <artifactId>elasticsearch-hadoop-cascading<1></artifactId>
+  <artifactId>elasticsearch-hadoop-cascading</artifactId> <1>
   <version>{ver}</version>
 </dependency>
 ----
@@ -100,7 +100,7 @@ in order for the Cascading dependencies to be properly resolved:
 ----
 <dependency>
   <groupId>org.elasticsearch</groupId>
-  <artifactId>elasticsearch-storm<1></artifactId>
+  <artifactId>elasticsearch-storm</artifactId> <1>
   <version>{ver}</version>
 </dependency>
 ----
diff --git a/docs/src/reference/asciidoc/core/pig.adoc b/docs/src/reference/asciidoc/core/pig.adoc
@@ -18,7 +18,7 @@ In order to use {eh}, its jar needs to be in Pig's classpath. There are various
 REGISTER /path/elasticsearch-hadoop.jar;
 ----
 
-NOTE: the command expects a proper URI that can be found either on the local file-system or remotely. Typically it's best to use a distributed file-system (like HDFS or Amazon S3) and use that since the script might be executed
+NOTE: The command expects a proper URI that can be found either on the local file-system or remotely. Typically it's best to use a distributed file-system (like HDFS or Amazon S3) and use that since the script might be executed
 on various machines.
 
 As an alternative, when using the command-line, one can register additional jars through the `-Dpig.additional.jars` option (that accepts an URI as well):
@@ -44,9 +44,10 @@ With Pig, one can specify the <<configuration,configuration>> properties (as an
 
 [source,sql]
 ----
-STORE B INTO 'radio/artists'<1> USING org.elasticsearch.hadoop.pig.EsStorage
-             ('es.http.timeout = 5m<2>',
-              'es.index.auto.create = false' <3>);
+STORE B INTO 'radio/artists' <1>
+       USING org.elasticsearch.hadoop.pig.EsStorage
+             ('es.http.timeout = 5m', <2>
+              'es.index.auto.create = false'); <3>
 ----
 
 <1> {eh} configuration (target resource)
@@ -163,12 +164,10 @@ For example:
 [source,sql]
 ----
 STORE B INTO  '...' USING org.elasticsearch.hadoop.pig.EsStorage(
-	'<1>es.mapping.names=date:@timestamp<2>, uRL:url<3>')
+	'es.mapping.names=date:@timestamp, uRL:url')         <1>
 ----
 
-<1> name mapping for two fields
-<2> Pig column `date` mapped in {es} to `@timestamp`
-<3> Pig column `url` mapped in {es} to `url_123`
+<1> Pig column `date` mapped in {es} to `@timestamp`; Pig column `uRL` mapped in {es} to `url`
 
 TIP: Since {eh} 2.1, the Pig schema case sensitivity is preserved to {es} and back. 
 
@@ -185,11 +184,13 @@ A = LOAD 'src/test/resources/artists.dat' USING PigStorage()
 -- transform data
 B = FOREACH A GENERATE name, TOTUPLE(url, picture) AS links;
 -- save the result to Elasticsearch
-STORE B INTO 'radio/artists'<1> USING org.elasticsearch.hadoop.pig.EsStorage(<2>);
+STORE B INTO 'radio/artists'<1>
+       USING org.elasticsearch.hadoop.pig.EsStorage(); <2>
 ----
 
 <1> {es} resource (index and type) associated with the given storage
-<2> additional configuration parameters can be passed here - in this case the defaults are used
+<2> additional configuration parameters can be passed inside the `()` - in this
+case the defaults are used
 
 For cases where the id (or other metadata fields like +ttl+ or +timestamp+) of the document needs to be specified, one can do so by setting the appropriate <<cfg-mapping, mapping>> namely +es.mapping.id+. Following the previous example, to indicate to {es} to use the field +id+ as the document id, update the +Storage+ configuration:
 
@@ -219,9 +220,9 @@ IMPORTANT: Make sure the data is properly encoded, in `UTF-8`. The field content
 
 [source,sql]
 ----
-A = LOAD '/resources/artists.json' USING PigStorage() AS (json:chararray<1>);"
+A = LOAD '/resources/artists.json' USING PigStorage() AS (json:chararray);" <1>
 STORE B INTO 'radio/artists' 
-    USING org.elasticsearch.hadoop.pig.EsStorage('es.input.json=true'<2>...);
+    USING org.elasticsearch.hadoop.pig.EsStorage('es.input.json=true'...); <2>
 ----
 
 <1> Load the (JSON) data as a single field (`json`)
@@ -235,8 +236,9 @@ One can index the data to a different resource, depending on the 'row' being rea
 [source,sql]
 ----
 A = LOAD 'src/test/resources/media.dat' USING PigStorage()
-            AS (name:chararray, type:chararray <1>, year: chararray);
-STORE B INTO 'my-collection/{type}'<2> USING org.elasticsearch.hadoop.pig.EsStorage();
+            AS (name:chararray, type:chararray, year: chararray); <1>
+STORE B INTO 'my-collection/{type}' <2>
+       USING org.elasticsearch.hadoop.pig.EsStorage();
 ----
 
 <1> Tuple field used by the resource pattern. Any of the declared fields can be used.
@@ -262,8 +264,8 @@ the table declaration can be as follows:
 
 [source,sql]
 ----
-A = LOAD '/resources/media.json' USING PigStorage() AS (json:chararray<1>);"
-STORE B INTO 'my-collection/{media_type}'<2>
+A = LOAD '/resources/media.json' USING PigStorage() AS (json:chararray);" <1>
+STORE B INTO 'my-collection/{media_type}' <2>
     USING org.elasticsearch.hadoop.pig.EsStorage('es.input.json=true');
 ----
 
@@ -278,23 +280,23 @@ As you would expect, loading the data is straight forward:
 [source,sql]
 ----
 -- execute Elasticsearch query and load data into Pig
-A = LOAD 'radio/artists'<1> 
-    USING org.elasticsearch.hadoop.pig.EsStorage('es.query=?me*'<2>);
+A = LOAD 'radio/artists' <1>
+    USING org.elasticsearch.hadoop.pig.EsStorage('es.query=?me*'); <2>
 DUMP A;
 ----
 
 <1> {es} resource
 <2> search query to execute
 
-IMPORTANT: Due to a https://issues.apache.org/jira/browse/PIG-3646[bug] in Pig, +LoadFunctions+ are not aware of any schema associated with them. This means +EsStorage+ is forced to fully the documents 
+IMPORTANT: Due to a https://issues.apache.org/jira/browse/PIG-3646[bug] in Pig, +LoadFunctions+ are not aware of any schema associated with them. This means +EsStorage+ is forced to fully parse the documents
 from Elasticsearch before passing the data to Pig for projection. In practice, this has little impact as long as a document top-level fields are used; for nested fields consider extracting the values
 yourself in Pig.
 
 
 [float]
 === Reading data from {es} as JSON
 
-In case where the results from {es} need to be in JSON format (typically to be sent down the wire to some other system), one can instruct the {eh} to return the data as is. By setting `es.output.json` to `true`, the connector will parse the response from {es}, identify the documents and, without converting them, return their content to the user as +String/chararray+ objects.
+In the case where the results from {es} need to be in JSON format (typically to be sent down the wire to some other system), one can instruct {eh} to return the data as is. By setting `es.output.json` to `true`, the connector will parse the response from {es}, identify the documents and, without converting them, return their content to the user as +String/chararray+ objects.
 
 
 [[pig-type-conversion]]
@@ -316,7 +318,7 @@ Pig internally uses native java types for most of its types and {eh} abides to t
 | `double`          | `double`
 | `float`           | `float`
 | `bytearray`       | `binary`
-| `tuple`           | `array` or `map` (depending on <<tuple-names,this>> settings)
+| `tuple`           | `array` or `map` (depending on <<tuple-names,this>> setting)
 | `bag`             | `array`
 | `map`             | `map`
 
diff --git a/docs/src/reference/asciidoc/core/spark.adoc b/docs/src/reference/asciidoc/core/spark.adoc
diff --git a/docs/src/reference/asciidoc/core/storm.adoc b/docs/src/reference/asciidoc/core/storm.adoc