Skip to content

Commit 9e0c942

Browse files
committed
Initial commit
0 parents  commit 9e0c942

30 files changed

+12296
-0
lines changed

.editorconfig

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
root = true
2+
3+
[*]
4+
indent_style = space
5+
indent_size = 2
6+
end_of_line = lf
7+
charset = utf-8
8+
trim_trailing_whitespace = true
9+
insert_final_newline = true

.gitignore

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
.DS_Store
2+
*.log
3+
.nyc_output/
4+
coverage/
5+
node_modules/
6+
hast-util-to-nlcst.js
7+
hast-util-to-nlcst.min.js

.travis.yml

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
language: node_js
2+
node_js:
3+
- '0.11'
4+
- '5.0'
5+
- '6.0'
6+
after_script: bash <(curl -s https://codecov.io/bash)
7+
deploy:
8+
- provider: npm
9+
10+
api_key:
11+
secure: pLTvdNPqZttWGiXd6hDD9jK2V+TDwUNiE4DYTT+2sk04GTX2DwmE9iGHUaKfNn8Y7bxw0yvMQSOrR1iIPZWqz4nqAXxnap7ADAdbo0exJ+hQptzUJGn9nXw3qqQ0ANSzWdlztEcpVvPQ+imIr8LnkR4+KiPhmajJbR4vi3HOBOvDeYkB3mMZr+FAocZH48Hj8zRfsF/PJsgfCQ5zrxk4eFwCHIwdw7Pu8zpoGtR0K1yvRhiiimSltYl+cdeKgEpDJr1fwGWgDc/XCKXp8QmuCYLnCJDPPDtAOlCB7ryFb5WmpLRNE6EvjzrtyW1o8PskDOdFyz65zyECf6N9bbxr7UWMck2dhQtpa8/9Kl9de1JNn59ZbJ2jJFH1EqEHpUOIFUb1h1QzWsUvm2nqxq6zBhMG1nwNk2/PiStGADvTh0xgtoDiRktJ+KrCkbQOuqeGuQoCfkjO1V4+ujA8ZTJPFhPabeD0hFwe2djMFC5iwHOUOjbVbSf3uxJS/gWZqgSKhwNUSgJifgL5hIitr2WJxs0UvtNOSLDBHSAZZxuGm/2fWHTXHEerRizOSPNngJlXHIV1Icf6y++ZF+oZ29OrbQmaPWSlV4ICI5buZPlJ91Iza5RXwI4fTqbqHp77MLe2sGKzRETI1uX6aTT4HxR1GmF64HMbJYR2LWCdZp2Gz44=
12+
on:
13+
tags: true
14+
node: '5.0'
15+
- provider: releases
16+
api_key:
17+
secure: LiIn+EKMvCmLq2ylwp26kaTLrkgGy0iBp5UAaQNW1BmDlbxtRclRUfercyA/e51TMS7ZGbSGCyTC4C3n+Kvn4CouFvm1g52ynUjKDs0y5U6bE0nmbzFZrZAEaR3Ai7z9AvGauW54XihTgmylpcC1aRUQ1Gqu3Ax7epNNToobJL/g71sLQfCZOwvYkIOs498+8eNYeS8afJrSYmbQDoq9E5Cu4NGR/vJZWevkjMaJCh320g3hQAd4XNkKp0tuvR6or4evgTpFeE1sZhvqYLLQiTHMki47A12VDfJyl32R7Gu7przibbpca8YjfCawXd490lRLkj93Gwkl4OqN37+b/JWTweLtJwCMzi39AOrrYvqevQ/I1fPAIE3UrT6GJ4Sa9sto70DXWpgCKn5iIhg1ZKFB18gjENZHoJZZYLa9Hl7igi9QrCv6w1Jcrb1IQi7YySXp5etwGsmltVmDtrV81+QRMokwGVOgY5tNYfBckJbXsIBP3+Mfhja8iIktLUXvyvvpUCPgpq7B68vlPSarKp0MNiDZBHGE+HJEDyQxZdY8tXEB7mJsqMGTYAMv2E5n57zj4ersSO1kImAFG71P6tblNLZBNUrzeePtjE8Xzuu1yaQro+cQf9nabZLuj9A27vGRbmm3n6RWRgL2FY4JWGVzMPKwvMb9mKoOBVINjbI=
18+
file:
19+
- "hast-util-to-nlcst.js"
20+
- "hast-util-to-nlcst.min.js"
21+
on:
22+
tags: true
23+
node: '6.0'

LICENSE

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
(The MIT License)
2+
3+
Copyright (c) 2016 Titus Wormer <[email protected]>
4+
5+
Permission is hereby granted, free of charge, to any person obtaining
6+
a copy of this software and associated documentation files (the
7+
'Software'), to deal in the Software without restriction, including
8+
without limitation the rights to use, copy, modify, merge, publish,
9+
distribute, sublicense, and/or sell copies of the Software, and to
10+
permit persons to whom the Software is furnished to do so, subject to
11+
the following conditions:
12+
13+
The above copyright notice and this permission notice shall be
14+
included in all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

index.js

+287
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
/**
2+
* @author Titus Wormer
3+
* @copyright 2016 Titus Wormer
4+
* @license MIT
5+
* @module hast:to-nlcst
6+
* @fileoverview Transform HAST to NLCST.
7+
*/
8+
9+
'use strict';
10+
11+
/* Dependencies. */
12+
var vfileLocation = require('vfile-location');
13+
var toString = require('nlcst-to-string');
14+
var position = require('unist-util-position');
15+
var phrasing = require('hast-util-phrasing');
16+
var embedded = require('hast-util-embedded');
17+
var whitespace = require('hast-util-whitespace');
18+
var textContent = require('hast-util-to-string');
19+
var is = require('hast-util-is-element');
20+
21+
/* Expose. */
22+
module.exports = toNLCST;
23+
24+
/* Elements representing source. */
25+
var SOURCE = ['code'];
26+
var IGNORE = ['script', 'style', 'svg', 'math', 'del'];
27+
var EXPLICIT = ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'];
28+
29+
/* Constants. */
30+
var FLOW_ACCEPTING = [
31+
'body',
32+
'article',
33+
'section',
34+
'blockquote',
35+
'nav',
36+
'aside',
37+
'header',
38+
'footer',
39+
'address',
40+
'li',
41+
'dt',
42+
'dd',
43+
'figure',
44+
'figcaption',
45+
'div',
46+
'main',
47+
'caption',
48+
'td',
49+
'th',
50+
'form',
51+
'fieldset',
52+
'details',
53+
'dialog'
54+
];
55+
56+
/**
57+
* Transform `tree` into `nlcst`.
58+
*
59+
* @param {Node} tree - HAST node.
60+
* @param {File} file - Virtual file.
61+
* @param {Parser|Function} Parser - (Instance of) NLCST
62+
* parser.
63+
* @return {NLCSTNode} - NLCST.
64+
*/
65+
function toNLCST(tree, file, Parser) {
66+
var parser;
67+
var location;
68+
var results;
69+
var doc;
70+
71+
/* Warn for invalid parameters. */
72+
if (!tree || !tree.type) {
73+
throw new Error('hast-util-to-nlcst expected node');
74+
}
75+
76+
if (!file || !file.messages) {
77+
throw new Error('hast-util-to-nlcst expected file');
78+
}
79+
80+
/* Construct parser. */
81+
if (!Parser) {
82+
throw new Error('hast-util-to-nlcst expected parser');
83+
}
84+
85+
if (
86+
!position.start(tree).line ||
87+
!position.start(tree).column
88+
) {
89+
throw new Error('hast-util-to-nlcst expected position on nodes');
90+
}
91+
92+
location = vfileLocation(file);
93+
doc = String(file);
94+
parser = 'parse' in Parser ? Parser : new Parser();
95+
96+
/* Transform HAST into NLCST tokens, and pass these
97+
* into `parser.parse` to insert sentences, paragraphs
98+
* where needed. */
99+
results = [];
100+
101+
find(tree);
102+
103+
return {
104+
type: 'root',
105+
children: results,
106+
position: {
107+
start: location.toPosition(0),
108+
end: location.toPosition(doc.length)
109+
}
110+
};
111+
112+
function find(node) {
113+
var children = node.children;
114+
115+
if (node.type === 'root') {
116+
findAll(children);
117+
} else if (is(node) && !is(node, IGNORE)) {
118+
/* Explicit paragraph. */
119+
if (is(node, EXPLICIT)) {
120+
add(node);
121+
/* Slightly simplified version of:
122+
* https://html.spec.whatwg.org/#paragraphs */
123+
} else if (is(node, FLOW_ACCEPTING)) {
124+
implicit(flattenAll(children));
125+
/* Dig deeper. */
126+
} else {
127+
findAll(children);
128+
}
129+
}
130+
}
131+
132+
function findAll(children) {
133+
var length = children.length;
134+
var index = -1;
135+
136+
while (++index < length) {
137+
find(children[index]);
138+
}
139+
}
140+
141+
function flatten(node) {
142+
if (is(node, ['a', 'ins', 'del', 'map'])) {
143+
return flattenAll(node.children);
144+
}
145+
146+
return node;
147+
}
148+
149+
function flattenAll(children) {
150+
var results = [];
151+
var length = children.length;
152+
var index = -1;
153+
154+
while (++index < length) {
155+
results = results.concat(flatten(children[index]));
156+
}
157+
158+
return results;
159+
}
160+
161+
function add(node) {
162+
var result = ('length' in node ? all : one)(node);
163+
164+
if (result.length) {
165+
results.push(parser.tokenizeParagraph(result));
166+
}
167+
}
168+
169+
function implicit(children) {
170+
var length = children.length + 1;
171+
var index = -1;
172+
var viable = false;
173+
var start = -1;
174+
var child;
175+
176+
while (++index < length) {
177+
child = children[index];
178+
179+
if (child && phrasing(child)) {
180+
if (start === -1) {
181+
start = index;
182+
}
183+
184+
if (!viable && !embedded(child) && !whitespace(child)) {
185+
viable = true;
186+
}
187+
} else if (child && start === -1) {
188+
find(child);
189+
} else {
190+
(viable ? add : findAll)(children.slice(start, index));
191+
192+
if (child) {
193+
find(child);
194+
}
195+
196+
viable = false;
197+
start = -1;
198+
}
199+
}
200+
}
201+
202+
/* Convert `node` (HAST) to NLCST. */
203+
function one(node) {
204+
var type = node.type;
205+
var tagName = type === 'element' ? node.tagName : null;
206+
var change;
207+
var replacement;
208+
209+
if (type === 'text') {
210+
change = true;
211+
replacement = parser.tokenize(node.value);
212+
} else if (tagName === 'wbr') {
213+
change = true;
214+
replacement = [parser.tokenizeWhiteSpace(' ')];
215+
} else if (tagName === 'br') {
216+
change = true;
217+
replacement = [parser.tokenizeWhiteSpace('\n')];
218+
} else if (SOURCE.indexOf(tagName) !== -1) {
219+
change = true;
220+
replacement = [parser.tokenizeSource(textContent(node))];
221+
} else if (type === 'root' || IGNORE.indexOf(tagName) === -1) {
222+
replacement = all(node.children);
223+
} else {
224+
return;
225+
}
226+
227+
if (!change) {
228+
return replacement;
229+
}
230+
231+
return patch(replacement, location, location.toOffset(position.start(node)));
232+
}
233+
234+
/* Convert all `children` (HAST) to NLCST. */
235+
function all(children) {
236+
var length = children && children.length;
237+
var index = -1;
238+
var result = [];
239+
var child;
240+
241+
while (++index < length) {
242+
child = one(children[index]);
243+
244+
if (child) {
245+
result = result.concat(child);
246+
}
247+
}
248+
249+
return result;
250+
}
251+
252+
/* Patch a position on each node in `nodes`.
253+
* `offset` is the offset in `file` this run of content
254+
* starts at.
255+
*
256+
* Note that NLCST nodes are concrete, meaning that their
257+
* starting and ending positions can be inferred from their
258+
* content. */
259+
function patch(nodes, location, offset) {
260+
var length = nodes.length;
261+
var index = -1;
262+
var start = offset;
263+
var children;
264+
var node;
265+
var end;
266+
267+
while (++index < length) {
268+
node = nodes[index];
269+
children = node.children;
270+
271+
if (children) {
272+
patch(children, location, start);
273+
}
274+
275+
end = start + toString(node).length;
276+
277+
node.position = {
278+
start: location.toPosition(start),
279+
end: location.toPosition(end)
280+
};
281+
282+
start = end;
283+
}
284+
285+
return nodes;
286+
}
287+
}

0 commit comments

Comments
 (0)