|
| 1 | +import { promises as fs } from 'node:fs' |
| 2 | +import { Octokit } from '@octokit/rest' |
| 3 | +import fetch from 'node-fetch' |
| 4 | +import { unified } from 'unified' |
| 5 | +import rehypeParse from 'rehype-parse' |
| 6 | +import { select, selectAll } from 'hast-util-select' |
| 7 | +import { toMarkdown } from 'mdast-util-to-markdown' |
| 8 | +import { gfmToMarkdown } from 'mdast-util-gfm' |
| 9 | + |
| 10 | +// Note: the GH token needs `gists` access! |
| 11 | +const ghToken = process.env.GH_TOKEN || process.env.GITHUB_TOKEN |
| 12 | + |
| 13 | +if (!ghToken) { |
| 14 | + throw new Error('Missing GitHub token: expected `GH_TOKEN` in env') |
| 15 | +} |
| 16 | + |
| 17 | +const octo = new Octokit({ auth: 'token ' + ghToken }) |
| 18 | +const categoryBase = new URL('../node_modules/@unicode/unicode-12.1.0/General_Category/', import.meta.url) |
| 19 | + |
| 20 | +// Take up to N samples from each category. |
| 21 | +const samples = 400 |
| 22 | + |
| 23 | +const otherTests = [ |
| 24 | + { name: 'Basic usage', input: 'alpha' }, |
| 25 | + { name: 'Basic usage (again)', input: 'alpha' }, |
| 26 | + { name: 'Camelcase', input: 'bravoCharlieDelta' }, |
| 27 | + { name: 'Prototypal injection: proto', input: '__proto__' }, |
| 28 | + { name: 'Prototypal injection: proto (again)', input: '__proto__' }, |
| 29 | + { name: 'Prototypal injection: has own', input: 'hasOwnProperty' }, |
| 30 | + { name: 'Repetition (1)', input: 'echo' }, |
| 31 | + { name: 'Repetition (2)', input: 'echo' }, |
| 32 | + { name: 'Repetition (3)', input: 'echo 1' }, |
| 33 | + { name: 'Repetition (4)', input: 'echo-1' }, |
| 34 | + { name: 'Repetition (5)', input: 'echo' }, |
| 35 | + { name: 'More repetition (1)', input: 'foxtrot-1' }, |
| 36 | + { name: 'More repetition (2)', input: 'foxtrot' }, |
| 37 | + { name: 'More repetition (3)', input: 'foxtrot' }, |
| 38 | + { name: 'Characters: dash', input: 'heading with a - dash' }, |
| 39 | + { name: 'Characters: underscore', input: 'heading with an _ underscore' }, |
| 40 | + { name: 'Characters: dot', input: 'heading with a period.txt' }, |
| 41 | + { name: 'Characters: dots, parents, brackets', input: 'exchange.bind_headers(exchange, routing [, bindCallback])' }, |
| 42 | + { name: 'Characters: space', input: ' ', markdownOverwrite: '#  ' }, |
| 43 | + { name: 'Characters: initial space', input: ' a', markdownOverwrite: '#  a' }, |
| 44 | + { name: 'Characters: final space', input: 'a ', markdownOverwrite: '# a ' }, |
| 45 | + { name: 'Characters: initial and final spaces', input: ' a ', markdownOverwrite: '#  a ' }, |
| 46 | + { name: 'Characters: initial and final dashes', input: '-a-' }, |
| 47 | + { name: 'Characters: apostrophe', input: 'apostrophe’s should be trimmed' }, |
| 48 | + { name: 'Some more duplicates (1)', input: 'golf' }, |
| 49 | + { name: 'Some more duplicates (2)', input: 'golf' }, |
| 50 | + { name: 'Some more duplicates (3)', input: 'golf' }, |
| 51 | + { name: 'Non-ascii: ♥', input: 'I ♥ unicode' }, |
| 52 | + { name: 'Non-ascii: -', input: 'dash-dash' }, |
| 53 | + { name: 'Non-ascii: –', input: 'en–dash' }, |
| 54 | + { name: 'Non-ascii: –', input: 'em–dash' }, |
| 55 | + { name: 'Non-ascii: 😄', input: '😄 unicode emoji' }, |
| 56 | + { name: 'Non-ascii: 😄-😄', input: '😄-😄 unicode emoji' }, |
| 57 | + { name: 'Non-ascii: 😄_😄', input: '😄_😄 unicode emoji' }, |
| 58 | + { name: 'Non-ascii: 😄', input: '😄 - an emoji' }, |
| 59 | + { name: 'Non-ascii: :smile:', input: ':smile: - a gemoji' }, |
| 60 | + { name: 'Non-ascii: Cyrillic (1)', input: 'Привет' }, |
| 61 | + { name: 'Non-ascii: Cyrillic (2)', input: 'Профили пользователей' }, |
| 62 | + { name: 'Non-ascii: Cyrillic + Han', input: 'Привет non-latin 你好' }, |
| 63 | + { name: 'Gemoji (1)', input: ':ok: No underscore' }, |
| 64 | + { name: 'Gemoji (2)', input: ':ok_hand: Single' }, |
| 65 | + { name: 'Gemoji (3)', input: ':ok_hand::hatched_chick: Two in a row with no spaces' }, |
| 66 | + { name: 'Gemoji (4)', input: ':ok_hand: :hatched_chick: Two in a row' } |
| 67 | +] |
| 68 | + |
| 69 | +main() |
| 70 | + |
| 71 | +async function main () { |
| 72 | + const files = await fs.readdir(categoryBase) |
| 73 | + const tests = [...otherTests] |
| 74 | + let index = -1 |
| 75 | + |
| 76 | + // Create a test case with a bunch of examples. |
| 77 | + while (++index < files.length) { |
| 78 | + const name = files[index] |
| 79 | + |
| 80 | + if (name === 'index.js') continue |
| 81 | + |
| 82 | + // These result in Git(Hub) thinking it’s a binary file. |
| 83 | + if (name === 'Control' || name === 'Surrogate') continue |
| 84 | + |
| 85 | + // This prevents GH from rendering markdown to HTML. |
| 86 | + if (name === 'Other') continue |
| 87 | + |
| 88 | + const fp = `./${name}/code-points.js` |
| 89 | + const { default: codePoints } = await import(new URL(fp, categoryBase)) |
| 90 | + const subs = [] |
| 91 | + |
| 92 | + let n = -1 |
| 93 | + |
| 94 | + while (++n < samples) { |
| 95 | + subs.push(codePoints[Math.floor(codePoints.length / samples * n)]) |
| 96 | + } |
| 97 | + |
| 98 | + subs.push(codePoints[codePoints.length - 1]) |
| 99 | + |
| 100 | + tests.push({ name, input: 'a' + [...new Set(subs)].map(d => String.fromCodePoint(d)).join(' ') + 'b' }) |
| 101 | + } |
| 102 | + |
| 103 | + // Create a Gist. |
| 104 | + const filename = 'readme.md' |
| 105 | + const gistResult = await octo.gists.create({ |
| 106 | + files: { |
| 107 | + [filename]: { |
| 108 | + content: tests.map(d => { |
| 109 | + return d.markdownOverwrite || toMarkdown({ type: 'heading', depth: 1, children: [{ type: 'text', value: d.input }] }, { extensions: [gfmToMarkdown()] }) |
| 110 | + }).join('\n\n') |
| 111 | + } |
| 112 | + } |
| 113 | + }) |
| 114 | + |
| 115 | + const file = gistResult.data.files[filename] |
| 116 | + |
| 117 | + if (!file.language) { |
| 118 | + throw new Error('The generated markdown was seen as binary data instead of text by GitHub. This is likely because there are weird characters (such as control characters or lone surrogates) in it') |
| 119 | + } |
| 120 | + |
| 121 | + // Fetch the rendered page. |
| 122 | + const response = await fetch(gistResult.data.html_url, { |
| 123 | + headers: { Authorization: 'token ' + ghToken } |
| 124 | + }) |
| 125 | + |
| 126 | + const doc = await response.text() |
| 127 | + |
| 128 | + // Remove the Gist. |
| 129 | + await octo.gists.delete({ gist_id: gistResult.data.id }) |
| 130 | + |
| 131 | + const tree = unified().use(rehypeParse).parse(doc) |
| 132 | + const markdownBody = select('.markdown-body', tree) |
| 133 | + |
| 134 | + if (!markdownBody) { |
| 135 | + throw new Error('The generated markdown could not be rendered by GitHub as HTML. This is likely because there are weird characters in it') |
| 136 | + } |
| 137 | + |
| 138 | + const anchors = selectAll('h1 .anchor', markdownBody) |
| 139 | + |
| 140 | + anchors.forEach((node, i) => { |
| 141 | + tests[i].expected = node.properties.href.slice(1) |
| 142 | + }) |
| 143 | + |
| 144 | + await fs.writeFile(new URL('../test/fixtures.json', import.meta.url), JSON.stringify(tests, null, 2) + '\n') |
| 145 | +} |
0 commit comments