syntax-tree
diff --git a/‎.editorconfig
+9 b/‎.editorconfig
+9
diff --git a/‎.gitignore
+7 b/‎.gitignore
+7
diff --git a/‎.travis.yml
+23 b/‎.travis.yml
+23
diff --git a/‎LICENSE
+22 b/‎LICENSE
+22
diff --git a/‎index.js
+287 b/‎index.js
+287
@@ -0,0 +1,9 @@
+root = true
+
+[*]
+indent_style = space
+indent_size = 2
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
@@ -0,0 +1,7 @@
+.DS_Store
+*.log
+.nyc_output/
+coverage/
+node_modules/
+hast-util-to-nlcst.js
+hast-util-to-nlcst.min.js
@@ -0,0 +1,23 @@
+language: node_js
+node_js:
+- '0.11'
+- '5.0'
+- '6.0'
+after_script: bash <(curl -s https://codecov.io/bash)
+deploy:
+  - provider: npm
+    email: [email protected]
+    api_key:
+      secure: pLTvdNPqZttWGiXd6hDD9jK2V+TDwUNiE4DYTT+2sk04GTX2DwmE9iGHUaKfNn8Y7bxw0yvMQSOrR1iIPZWqz4nqAXxnap7ADAdbo0exJ+hQptzUJGn9nXw3qqQ0ANSzWdlztEcpVvPQ+imIr8LnkR4+KiPhmajJbR4vi3HOBOvDeYkB3mMZr+FAocZH48Hj8zRfsF/PJsgfCQ5zrxk4eFwCHIwdw7Pu8zpoGtR0K1yvRhiiimSltYl+cdeKgEpDJr1fwGWgDc/XCKXp8QmuCYLnCJDPPDtAOlCB7ryFb5WmpLRNE6EvjzrtyW1o8PskDOdFyz65zyECf6N9bbxr7UWMck2dhQtpa8/9Kl9de1JNn59ZbJ2jJFH1EqEHpUOIFUb1h1QzWsUvm2nqxq6zBhMG1nwNk2/PiStGADvTh0xgtoDiRktJ+KrCkbQOuqeGuQoCfkjO1V4+ujA8ZTJPFhPabeD0hFwe2djMFC5iwHOUOjbVbSf3uxJS/gWZqgSKhwNUSgJifgL5hIitr2WJxs0UvtNOSLDBHSAZZxuGm/2fWHTXHEerRizOSPNngJlXHIV1Icf6y++ZF+oZ29OrbQmaPWSlV4ICI5buZPlJ91Iza5RXwI4fTqbqHp77MLe2sGKzRETI1uX6aTT4HxR1GmF64HMbJYR2LWCdZp2Gz44=
+    on:
+      tags: true
+      node: '5.0'
+  - provider: releases
+    api_key:
+      secure: LiIn+EKMvCmLq2ylwp26kaTLrkgGy0iBp5UAaQNW1BmDlbxtRclRUfercyA/e51TMS7ZGbSGCyTC4C3n+Kvn4CouFvm1g52ynUjKDs0y5U6bE0nmbzFZrZAEaR3Ai7z9AvGauW54XihTgmylpcC1aRUQ1Gqu3Ax7epNNToobJL/g71sLQfCZOwvYkIOs498+8eNYeS8afJrSYmbQDoq9E5Cu4NGR/vJZWevkjMaJCh320g3hQAd4XNkKp0tuvR6or4evgTpFeE1sZhvqYLLQiTHMki47A12VDfJyl32R7Gu7przibbpca8YjfCawXd490lRLkj93Gwkl4OqN37+b/JWTweLtJwCMzi39AOrrYvqevQ/I1fPAIE3UrT6GJ4Sa9sto70DXWpgCKn5iIhg1ZKFB18gjENZHoJZZYLa9Hl7igi9QrCv6w1Jcrb1IQi7YySXp5etwGsmltVmDtrV81+QRMokwGVOgY5tNYfBckJbXsIBP3+Mfhja8iIktLUXvyvvpUCPgpq7B68vlPSarKp0MNiDZBHGE+HJEDyQxZdY8tXEB7mJsqMGTYAMv2E5n57zj4ersSO1kImAFG71P6tblNLZBNUrzeePtjE8Xzuu1yaQro+cQf9nabZLuj9A27vGRbmm3n6RWRgL2FY4JWGVzMPKwvMb9mKoOBVINjbI=
+    file:
+      - "hast-util-to-nlcst.js"
+      - "hast-util-to-nlcst.min.js"
+    on:
+      tags: true
+      node: '6.0'
@@ -0,0 +1,22 @@
+(The MIT License)
+
+Copyright (c) 2016 Titus Wormer <[email protected]>
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,287 @@
+/**
+ * @author Titus Wormer
+ * @copyright 2016 Titus Wormer
+ * @license MIT
+ * @module hast:to-nlcst
+ * @fileoverview Transform HAST to NLCST.
+ */
+
+'use strict';
+
+/* Dependencies. */
+var vfileLocation = require('vfile-location');
+var toString = require('nlcst-to-string');
+var position = require('unist-util-position');
+var phrasing = require('hast-util-phrasing');
+var embedded = require('hast-util-embedded');
+var whitespace = require('hast-util-whitespace');
+var textContent = require('hast-util-to-string');
+var is = require('hast-util-is-element');
+
+/* Expose. */
+module.exports = toNLCST;
+
+/* Elements representing source. */
+var SOURCE = ['code'];
+var IGNORE = ['script', 'style', 'svg', 'math', 'del'];
+var EXPLICIT = ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'];
+
+/* Constants. */
+var FLOW_ACCEPTING = [
+  'body',
+  'article',
+  'section',
+  'blockquote',
+  'nav',
+  'aside',
+  'header',
+  'footer',
+  'address',
+  'li',
+  'dt',
+  'dd',
+  'figure',
+  'figcaption',
+  'div',
+  'main',
+  'caption',
+  'td',
+  'th',
+  'form',
+  'fieldset',
+  'details',
+  'dialog'
+];
+
+/**
+ * Transform `tree` into `nlcst`.
+ *
+ * @param {Node} tree - HAST node.
+ * @param {File} file - Virtual file.
+ * @param {Parser|Function} Parser - (Instance of) NLCST
+ *   parser.
+ * @return {NLCSTNode} - NLCST.
+ */
+function toNLCST(tree, file, Parser) {
+  var parser;
+  var location;
+  var results;
+  var doc;
+
+  /* Warn for invalid parameters. */
+  if (!tree || !tree.type) {
+    throw new Error('hast-util-to-nlcst expected node');
+  }
+
+  if (!file || !file.messages) {
+    throw new Error('hast-util-to-nlcst expected file');
+  }
+
+  /* Construct parser. */
+  if (!Parser) {
+    throw new Error('hast-util-to-nlcst expected parser');
+  }
+
+  if (
+    !position.start(tree).line ||
+    !position.start(tree).column
+  ) {
+    throw new Error('hast-util-to-nlcst expected position on nodes');
+  }
+
+  location = vfileLocation(file);
+  doc = String(file);
+  parser = 'parse' in Parser ? Parser : new Parser();
+
+  /* Transform HAST into NLCST tokens, and pass these
+   * into `parser.parse` to insert sentences, paragraphs
+   * where needed. */
+  results = [];
+
+  find(tree);
+
+  return {
+    type: 'root',
+    children: results,
+    position: {
+      start: location.toPosition(0),
+      end: location.toPosition(doc.length)
+    }
+  };
+
+  function find(node) {
+    var children = node.children;
+
+    if (node.type === 'root') {
+      findAll(children);
+    } else if (is(node) && !is(node, IGNORE)) {
+      /* Explicit paragraph. */
+      if (is(node, EXPLICIT)) {
+        add(node);
+      /* Slightly simplified version of:
+       * https://html.spec.whatwg.org/#paragraphs */
+      } else if (is(node, FLOW_ACCEPTING)) {
+        implicit(flattenAll(children));
+      /* Dig deeper. */
+      } else {
+        findAll(children);
+      }
+    }
+  }
+
+  function findAll(children) {
+    var length = children.length;
+    var index = -1;
+
+    while (++index < length) {
+      find(children[index]);
+    }
+  }
+
+  function flatten(node) {
+    if (is(node, ['a', 'ins', 'del', 'map'])) {
+      return flattenAll(node.children);
+    }
+
+    return node;
+  }
+
+  function flattenAll(children) {
+    var results = [];
+    var length = children.length;
+    var index = -1;
+
+    while (++index < length) {
+      results = results.concat(flatten(children[index]));
+    }
+
+    return results;
+  }
+
+  function add(node) {
+    var result = ('length' in node ? all : one)(node);
+
+    if (result.length) {
+      results.push(parser.tokenizeParagraph(result));
+    }
+  }
+
+  function implicit(children) {
+    var length = children.length + 1;
+    var index = -1;
+    var viable = false;
+    var start = -1;
+    var child;
+
+    while (++index < length) {
+      child = children[index];
+
+      if (child && phrasing(child)) {
+        if (start === -1) {
+          start = index;
+        }
+
+        if (!viable && !embedded(child) && !whitespace(child)) {
+          viable = true;
+        }
+      } else if (child && start === -1) {
+        find(child);
+      } else {
+        (viable ? add : findAll)(children.slice(start, index));
+
+        if (child) {
+          find(child);
+        }
+
+        viable = false;
+        start = -1;
+      }
+    }
+  }
+
+  /* Convert `node` (HAST) to NLCST. */
+  function one(node) {
+    var type = node.type;
+    var tagName = type === 'element' ? node.tagName : null;
+    var change;
+    var replacement;
+
+    if (type === 'text') {
+      change = true;
+      replacement = parser.tokenize(node.value);
+    } else if (tagName === 'wbr') {
+      change = true;
+      replacement = [parser.tokenizeWhiteSpace(' ')];
+    } else if (tagName === 'br') {
+      change = true;
+      replacement = [parser.tokenizeWhiteSpace('\n')];
+    } else if (SOURCE.indexOf(tagName) !== -1) {
+      change = true;
+      replacement = [parser.tokenizeSource(textContent(node))];
+    } else if (type === 'root' || IGNORE.indexOf(tagName) === -1) {
+      replacement = all(node.children);
+    } else {
+      return;
+    }
+
+    if (!change) {
+      return replacement;
+    }
+
+    return patch(replacement, location, location.toOffset(position.start(node)));
+  }
+
+  /* Convert all `children` (HAST) to NLCST. */
+  function all(children) {
+    var length = children && children.length;
+    var index = -1;
+    var result = [];
+    var child;
+
+    while (++index < length) {
+      child = one(children[index]);
+
+      if (child) {
+        result = result.concat(child);
+      }
+    }
+
+    return result;
+  }
+
+  /* Patch a position on each node in `nodes`.
+   * `offset` is the offset in `file` this run of content
+   * starts at.
+   *
+   * Note that NLCST nodes are concrete, meaning that their
+   * starting and ending positions can be inferred from their
+   * content. */
+  function patch(nodes, location, offset) {
+    var length = nodes.length;
+    var index = -1;
+    var start = offset;
+    var children;
+    var node;
+    var end;
+
+    while (++index < length) {
+      node = nodes[index];
+      children = node.children;
+
+      if (children) {
+        patch(children, location, start);
+      }
+
+      end = start + toString(node).length;
+
+      node.position = {
+        start: location.toPosition(start),
+        end: location.toPosition(end)
+      };
+
+      start = end;
+    }
+
+    return nodes;
+  }
+}