1
1
'use strict'
2
2
3
- var vfileLocation = require ( 'vfile-location' )
4
- var toString = require ( 'nlcst-to-string' )
5
- var position = require ( 'unist-util-position' )
6
- var phrasing = require ( 'hast-util-phrasing' )
7
3
var embedded = require ( 'hast-util-embedded' )
8
- var whitespace = require ( 'hast-util-whitespace' )
4
+ var convert = require ( 'hast-util-is-element/convert' )
5
+ var phrasing = require ( 'hast-util-phrasing' )
9
6
var textContent = require ( 'hast-util-to-string' )
10
- var is = require ( 'hast-util-is-element' )
7
+ var whitespace = require ( 'hast-util-whitespace' )
8
+ var toString = require ( 'nlcst-to-string' )
9
+ var position = require ( 'unist-util-position' )
10
+ var vfileLocation = require ( 'vfile-location' )
11
11
12
12
module . exports = toNlcst
13
13
14
- var source = [ 'code' ]
15
- var ignore = [ 'script' , 'style' , 'svg' , 'math' , 'del' ]
16
- var explicit = [ 'p' , 'h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' ]
14
+ var push = [ ] . push
17
15
18
- var flowAccepting = [
16
+ var source = convert ( [ 'code' , dataNlcstSourced ] )
17
+ var ignore = convert ( [ 'script' , 'style' , 'svg' , 'math' , 'del' , dataNlcstIgnore ] )
18
+ var explicit = convert ( [ 'p' , 'h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' ] )
19
+
20
+ var flowAccepting = convert ( [
19
21
'body' ,
20
22
'article' ,
21
23
'section' ,
@@ -39,7 +41,10 @@ var flowAccepting = [
39
41
'fieldset' ,
40
42
'details' ,
41
43
'dialog'
42
- ]
44
+ ] )
45
+
46
+ // See: <https://html.spec.whatwg.org/multipage/dom.html#paragraphs>
47
+ var unravelInParagraph = convert ( [ 'a' , 'ins' , 'del' , 'map' ] )
43
48
44
49
// Transform `tree` to nlcst.
45
50
function toNlcst ( tree , file , Parser ) {
@@ -66,8 +71,8 @@ function toNlcst(tree, file, Parser) {
66
71
throw new Error ( 'hast-util-to-nlcst expected position on nodes' )
67
72
}
68
73
69
- location = vfileLocation ( file )
70
74
doc = String ( file )
75
+ location = vfileLocation ( doc )
71
76
parser = 'parse' in Parser ? Parser : new Parser ( )
72
77
73
78
// Transform hast to nlcst, and pass these into `parser.parse` to insert
@@ -79,55 +84,44 @@ function toNlcst(tree, file, Parser) {
79
84
return {
80
85
type : 'RootNode' ,
81
86
children : results ,
82
- position : {
83
- start : location . toPosition ( 0 ) ,
84
- end : location . toPosition ( doc . length )
85
- }
87
+ position : { start : location . toPoint ( 0 ) , end : location . toPoint ( doc . length ) }
86
88
}
87
89
88
90
function find ( node ) {
89
- var children = node . children
90
-
91
91
if ( node . type === 'root' ) {
92
- findAll ( children )
93
- } else if ( is ( node ) && ! ignored ( node ) ) {
94
- if ( is ( node , explicit ) ) {
92
+ findAll ( node . children )
93
+ } else if ( node . type === 'element' && ! ignore ( node ) ) {
94
+ if ( explicit ( node ) ) {
95
95
// Explicit paragraph.
96
96
add ( node )
97
- } else if ( is ( node , flowAccepting ) ) {
97
+ } else if ( flowAccepting ( node ) ) {
98
98
// Slightly simplified version of: <https://html.spec.whatwg.org/#paragraphs>.
99
- implicit ( flattenAll ( children ) )
99
+ implicit ( flattenAll ( node . children ) )
100
100
} else {
101
101
// Dig deeper.
102
- findAll ( children )
102
+ findAll ( node . children )
103
103
}
104
104
}
105
105
}
106
106
107
107
function findAll ( children ) {
108
- var length = children . length
109
108
var index = - 1
110
109
111
- while ( ++ index < length ) {
110
+ while ( ++ index < children . length ) {
112
111
find ( children [ index ] )
113
112
}
114
113
}
115
114
116
- function flatten ( node ) {
117
- if ( is ( node , [ 'a' , 'ins' , 'del' , 'map' ] ) ) {
118
- return flattenAll ( node . children )
119
- }
120
-
121
- return node
122
- }
123
-
124
115
function flattenAll ( children ) {
125
116
var results = [ ]
126
- var length = children . length
127
117
var index = - 1
128
118
129
- while ( ++ index < length ) {
130
- results = results . concat ( flatten ( children [ index ] ) )
119
+ while ( ++ index < children . length ) {
120
+ if ( unravelInParagraph ( children [ index ] ) ) {
121
+ push . apply ( results , flattenAll ( children [ index ] . children ) )
122
+ } else {
123
+ results . push ( children [ index ] )
124
+ }
131
125
}
132
126
133
127
return results
@@ -136,25 +130,22 @@ function toNlcst(tree, file, Parser) {
136
130
function add ( node ) {
137
131
var result = ( 'length' in node ? all : one ) ( node )
138
132
139
- if ( result . length > 0 ) {
133
+ if ( result . length ) {
140
134
results . push ( parser . tokenizeParagraph ( result ) )
141
135
}
142
136
}
143
137
144
138
function implicit ( children ) {
145
- var length = children . length + 1
146
139
var index = - 1
147
- var viable = false
148
140
var start = - 1
141
+ var viable
149
142
var child
150
143
151
- while ( ++ index < length ) {
144
+ while ( ++ index <= children . length ) {
152
145
child = children [ index ]
153
146
154
147
if ( child && phrasing ( child ) ) {
155
- if ( start === - 1 ) {
156
- start = index
157
- }
148
+ if ( start === - 1 ) start = index
158
149
159
150
if ( ! viable && ! embedded ( child ) && ! whitespace ( child ) ) {
160
151
viable = true
@@ -169,60 +160,50 @@ function toNlcst(tree, file, Parser) {
169
160
find ( child )
170
161
}
171
162
172
- viable = false
163
+ viable = null
173
164
start = - 1
174
165
}
175
166
}
176
167
}
177
168
178
169
// Convert `node` (hast) to nlcst.
179
170
function one ( node ) {
180
- var type = node . type
181
- var tagName = type === 'element' ? node . tagName : null
182
- var change
183
171
var replacement
172
+ var change
184
173
185
- if ( type === 'text' ) {
186
- change = true
174
+ if ( node . type === 'text' ) {
187
175
replacement = parser . tokenize ( node . value )
188
- } else if ( tagName === 'wbr' ) {
189
176
change = true
190
- replacement = [ parser . tokenizeWhiteSpace ( ' ' ) ]
191
- } else if ( tagName === 'br' ) {
192
- change = true
193
- replacement = [ parser . tokenizeWhiteSpace ( '\n' ) ]
194
- } else if ( sourced ( node ) ) {
195
- change = true
196
- replacement = [ parser . tokenizeSource ( textContent ( node ) ) ]
197
- } else if ( type === 'root' || ! ignored ( node ) ) {
198
- replacement = all ( node . children )
199
- } else {
200
- return
201
- }
202
-
203
- if ( ! change ) {
204
- return replacement
177
+ } else if ( node . type === 'element' && ! ignore ( node ) ) {
178
+ if ( node . tagName === 'wbr' ) {
179
+ replacement = [ parser . tokenizeWhiteSpace ( ' ' ) ]
180
+ change = true
181
+ } else if ( node . tagName === 'br' ) {
182
+ replacement = [ parser . tokenizeWhiteSpace ( '\n' ) ]
183
+ change = true
184
+ } else if ( source ( node ) ) {
185
+ replacement = [ parser . tokenizeSource ( textContent ( node ) ) ]
186
+ change = true
187
+ } else {
188
+ replacement = all ( node . children )
189
+ }
205
190
}
206
191
207
- return patch ( replacement , location , location . toOffset ( position . start ( node ) ) )
192
+ return change
193
+ ? patch ( replacement , location , location . toOffset ( position . start ( node ) ) )
194
+ : replacement
208
195
}
209
196
210
197
// Convert all `children` (hast) to nlcst.
211
198
function all ( children ) {
212
- var length = children && children . length
199
+ var results = [ ]
213
200
var index = - 1
214
- var result = [ ]
215
- var child
216
-
217
- while ( ++ index < length ) {
218
- child = one ( children [ index ] )
219
201
220
- if ( child ) {
221
- result = result . concat ( child )
222
- }
202
+ while ( ++ index < children . length ) {
203
+ push . apply ( results , one ( children [ index ] ) || [ ] )
223
204
}
224
205
225
- return result
206
+ return results
226
207
}
227
208
228
209
// Patch a position on each node in `nodes`.
@@ -231,26 +212,23 @@ function toNlcst(tree, file, Parser) {
231
212
// Note that nlcst nodes are concrete, meaning that their starting and ending
232
213
// positions can be inferred from their content.
233
214
function patch ( nodes , location , offset ) {
234
- var length = nodes . length
235
215
var index = - 1
236
216
var start = offset
237
- var children
238
- var node
239
217
var end
218
+ var node
240
219
241
- while ( ++ index < length ) {
220
+ while ( ++ index < nodes . length ) {
242
221
node = nodes [ index ]
243
- children = node . children
244
222
245
- if ( children ) {
246
- patch ( children , location , start )
223
+ if ( node . children ) {
224
+ patch ( node . children , location , start )
247
225
}
248
226
249
227
end = start + toString ( node ) . length
250
228
251
229
node . position = {
252
- start : location . toPosition ( start ) ,
253
- end : location . toPosition ( end )
230
+ start : location . toPoint ( start ) ,
231
+ end : location . toPoint ( end )
254
232
}
255
233
256
234
start = end
@@ -260,12 +238,10 @@ function toNlcst(tree, file, Parser) {
260
238
}
261
239
}
262
240
263
- function sourced ( node ) {
264
- var props = node . properties
265
- return is ( node ) && ( is ( node , source ) || props . dataNlcst === 'source' )
241
+ function dataNlcstSourced ( node ) {
242
+ return node . properties . dataNlcst === 'source'
266
243
}
267
244
268
- function ignored ( node ) {
269
- var props = node . properties
270
- return is ( node ) && ( is ( node , ignore ) || props . dataNlcst === 'ignore' )
245
+ function dataNlcstIgnore ( node ) {
246
+ return node . properties . dataNlcst === 'ignore'
271
247
}
0 commit comments