Skip to content

Commit f24a6ac

Browse files
zeripathlafriks
andcommitted
Fix several render issues (go-gitea#14986)
Backport go-gitea#14986 * Fix an issue with panics related to attributes * Wrap goldmark render in a recovery function * Reduce memory use in render emoji * Use a pipe for rendering goldmark - still needs more work and a limiter Signed-off-by: Andrew Thornton <[email protected]> Co-authored-by: Lauris BH <[email protected]>
1 parent fff66eb commit f24a6ac

File tree

6 files changed

+216
-44
lines changed

6 files changed

+216
-44
lines changed

modules/emoji/emoji.go

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ var (
3030
// aliasMap provides a map of the alias to its emoji data.
3131
aliasMap map[string]int
3232

33+
// emptyReplacer is the string replacer for emoji codes.
34+
emptyReplacer *strings.Replacer
35+
3336
// codeReplacer is the string replacer for emoji codes.
3437
codeReplacer *strings.Replacer
3538

@@ -49,6 +52,7 @@ func loadMap() {
4952

5053
// process emoji codes and aliases
5154
codePairs := make([]string, 0)
55+
emptyPairs := make([]string, 0)
5256
aliasPairs := make([]string, 0)
5357

5458
// sort from largest to small so we match combined emoji first
@@ -64,6 +68,7 @@ func loadMap() {
6468
// setup codes
6569
codeMap[e.Emoji] = i
6670
codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
71+
emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)
6772

6873
// setup aliases
6974
for _, a := range e.Aliases {
@@ -77,6 +82,7 @@ func loadMap() {
7782
}
7883

7984
// create replacers
85+
emptyReplacer = strings.NewReplacer(emptyPairs...)
8086
codeReplacer = strings.NewReplacer(codePairs...)
8187
aliasReplacer = strings.NewReplacer(aliasPairs...)
8288
})
@@ -127,38 +133,53 @@ func ReplaceAliases(s string) string {
127133
return aliasReplacer.Replace(s)
128134
}
129135

130-
// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
131-
func FindEmojiSubmatchIndex(s string) []int {
132-
loadMap()
133-
found := make(map[int]int)
134-
keys := make([]int, 0)
136+
type rememberSecondWriteWriter struct {
137+
pos int
138+
idx int
139+
end int
140+
writecount int
141+
}
135142

136-
//see if there are any emoji in string before looking for position of specific ones
137-
//no performance difference when there is a match but 10x faster when there are not
138-
if s == ReplaceCodes(s) {
139-
return nil
143+
func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
144+
n.writecount++
145+
if n.writecount == 2 {
146+
n.idx = n.pos
147+
n.end = n.pos + len(p)
140148
}
149+
n.pos += len(p)
150+
return len(p), nil
151+
}
141152

142-
// get index of first emoji occurrence while also checking for longest combination
143-
for j := range GemojiData {
144-
i := strings.Index(s, GemojiData[j].Emoji)
145-
if i != -1 {
146-
if _, ok := found[i]; !ok {
147-
if len(keys) == 0 || i < keys[0] {
148-
found[i] = j
149-
keys = []int{i}
150-
}
151-
if i == 0 {
152-
break
153-
}
154-
}
155-
}
153+
func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
154+
n.writecount++
155+
if n.writecount == 2 {
156+
n.idx = n.pos
157+
n.end = n.pos + len(s)
156158
}
159+
n.pos += len(s)
160+
return len(s), nil
161+
}
157162

158-
if len(keys) > 0 {
159-
index := keys[0]
160-
return []int{index, index + len(GemojiData[found[index]].Emoji)}
163+
// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
164+
func FindEmojiSubmatchIndex(s string) []int {
165+
loadMap()
166+
secondWriteWriter := rememberSecondWriteWriter{}
167+
168+
// A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
169+
// we can be lazy here.
170+
//
171+
// The implementation of strings.Replacer.WriteString is such that the first index of the emoji
172+
// submatch is simply the second thing that is written to WriteString in the writer.
173+
//
174+
// Therefore we can simply take the index of the second write as our first emoji
175+
//
176+
// FIXME: just copy the trie implementation from strings.NewReplacer
177+
_, _ = emptyReplacer.WriteString(&secondWriteWriter, s)
178+
179+
// if we wrote less than twice then we never "replaced"
180+
if secondWriteWriter.writecount < 2 {
181+
return nil
161182
}
162183

163-
return nil
184+
return []int{secondWriteWriter.idx, secondWriteWriter.end}
164185
}

modules/emoji/emoji_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ package emoji
88
import (
99
"reflect"
1010
"testing"
11+
12+
"github.com/stretchr/testify/assert"
1113
)
1214

1315
func TestDumpInfo(t *testing.T) {
@@ -65,3 +67,34 @@ func TestReplacers(t *testing.T) {
6567
}
6668
}
6769
}
70+
71+
func TestFindEmojiSubmatchIndex(t *testing.T) {
72+
type testcase struct {
73+
teststring string
74+
expected []int
75+
}
76+
77+
testcases := []testcase{
78+
{
79+
"\U0001f44d",
80+
[]int{0, len("\U0001f44d")},
81+
},
82+
{
83+
"\U0001f44d +1 \U0001f44d \U0001f37a",
84+
[]int{0, 4},
85+
},
86+
{
87+
" \U0001f44d",
88+
[]int{1, 1 + len("\U0001f44d")},
89+
},
90+
{
91+
string([]byte{'\u0001'}) + "\U0001f44d",
92+
[]int{1, 1 + len("\U0001f44d")},
93+
},
94+
}
95+
96+
for _, kase := range testcases {
97+
actual := FindEmojiSubmatchIndex(kase.teststring)
98+
assert.Equal(t, kase.expected, actual)
99+
}
100+
}

modules/markup/html.go

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -298,19 +298,27 @@ func RenderEmoji(
298298
return ctx.postProcess(rawHTML)
299299
}
300300

301+
var tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL][ />]))`)
302+
var nulCleaner = strings.NewReplacer("\000", "")
303+
301304
func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
302305
if ctx.procs == nil {
303306
ctx.procs = defaultProcessors
304307
}
305308

306309
// give a generous extra 50 bytes
307-
res := make([]byte, 0, len(rawHTML)+50)
308-
res = append(res, "<html><body>"...)
309-
res = append(res, rawHTML...)
310-
res = append(res, "</body></html>"...)
310+
res := bytes.NewBuffer(make([]byte, 0, len(rawHTML)+50))
311+
// prepend "<html><body>"
312+
_, _ = res.WriteString("<html><body>")
313+
314+
// Strip out nuls - they're always invalid
315+
_, _ = nulCleaner.WriteString(res, string(tagCleaner.ReplaceAll(rawHTML, []byte("&lt;$1"))))
316+
317+
// close the tags
318+
_, _ = res.WriteString("</body></html>")
311319

312320
// parse the HTML
313-
nodes, err := html.ParseFragment(bytes.NewReader(res), nil)
321+
nodes, err := html.ParseFragment(res, nil)
314322
if err != nil {
315323
return nil, &postProcessError{"invalid HTML", err}
316324
}
@@ -347,17 +355,17 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
347355
// Create buffer in which the data will be placed again. We know that the
348356
// length will be at least that of res; to spare a few alloc+copy, we
349357
// reuse res, resetting its length to 0.
350-
buf := bytes.NewBuffer(res[:0])
358+
res.Reset()
351359
// Render everything to buf.
352360
for _, node := range nodes {
353-
err = html.Render(buf, node)
361+
err = html.Render(res, node)
354362
if err != nil {
355363
return nil, &postProcessError{"error rendering processed HTML", err}
356364
}
357365
}
358366

359367
// Everything done successfully, return parsed data.
360-
return buf.Bytes(), nil
368+
return res.Bytes(), nil
361369
}
362370

363371
func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) {

modules/markup/markdown/goldmark.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
7777
header.ID = util.BytesToReadOnlyString(id.([]byte))
7878
}
7979
toc = append(toc, header)
80+
} else {
81+
for _, attr := range v.Attributes() {
82+
if _, ok := attr.Value.([]byte); !ok {
83+
v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value)))
84+
}
85+
}
8086
}
8187
case *ast.Image:
8288
// Images need two things:

modules/markup/markdown/markdown.go

Lines changed: 93 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
package markdown
77

88
import (
9-
"bytes"
9+
"fmt"
10+
"io"
1011
"strings"
1112
"sync"
1213

@@ -18,7 +19,7 @@ import (
1819

1920
chromahtml "github.com/alecthomas/chroma/formatters/html"
2021
"github.com/yuin/goldmark"
21-
"github.com/yuin/goldmark-highlighting"
22+
highlighting "github.com/yuin/goldmark-highlighting"
2223
meta "github.com/yuin/goldmark-meta"
2324
"github.com/yuin/goldmark/extension"
2425
"github.com/yuin/goldmark/parser"
@@ -34,6 +35,44 @@ var urlPrefixKey = parser.NewContextKey()
3435
var isWikiKey = parser.NewContextKey()
3536
var renderMetasKey = parser.NewContextKey()
3637

38+
type closesWithError interface {
39+
io.WriteCloser
40+
CloseWithError(err error) error
41+
}
42+
43+
type limitWriter struct {
44+
w closesWithError
45+
sum int64
46+
limit int64
47+
}
48+
49+
// Write implements the standard Write interface:
50+
func (l *limitWriter) Write(data []byte) (int, error) {
51+
leftToWrite := l.limit - l.sum
52+
if leftToWrite < int64(len(data)) {
53+
n, err := l.w.Write(data[:leftToWrite])
54+
l.sum += int64(n)
55+
if err != nil {
56+
return n, err
57+
}
58+
_ = l.w.Close()
59+
return n, fmt.Errorf("Rendered content too large - truncating render")
60+
}
61+
n, err := l.w.Write(data)
62+
l.sum += int64(n)
63+
return n, err
64+
}
65+
66+
// Close closes the writer
67+
func (l *limitWriter) Close() error {
68+
return l.w.Close()
69+
}
70+
71+
// CloseWithError closes the writer
72+
func (l *limitWriter) CloseWithError(err error) error {
73+
return l.w.CloseWithError(err)
74+
}
75+
3776
// NewGiteaParseContext creates a parser.Context with the gitea context set
3877
func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool) parser.Context {
3978
pc := parser.NewContext(parser.WithIDs(newPrefixedIDs()))
@@ -43,8 +82,8 @@ func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool
4382
return pc
4483
}
4584

46-
// render renders Markdown to HTML without handling special links.
47-
func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
85+
// actualRender renders Markdown to HTML without handling special links.
86+
func actualRender(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
4887
once.Do(func() {
4988
converter = goldmark.New(
5089
goldmark.WithExtensions(extension.Table,
@@ -119,12 +158,57 @@ func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown
119158

120159
})
121160

122-
pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
123-
var buf bytes.Buffer
124-
if err := converter.Convert(giteautil.NormalizeEOL(body), &buf, parser.WithContext(pc)); err != nil {
125-
log.Error("Unable to render: %v", err)
161+
rd, wr := io.Pipe()
162+
defer func() {
163+
_ = rd.Close()
164+
_ = wr.Close()
165+
}()
166+
167+
lw := &limitWriter{
168+
w: wr,
169+
limit: setting.UI.MaxDisplayFileSize * 3,
126170
}
127-
return markup.SanitizeReader(&buf).Bytes()
171+
172+
// FIXME: should we include a timeout that closes the pipe to abort the parser and sanitizer if it takes too long?
173+
go func() {
174+
defer func() {
175+
err := recover()
176+
if err == nil {
177+
return
178+
}
179+
180+
log.Warn("Unable to render markdown due to panic in goldmark: %v", err)
181+
if log.IsDebug() {
182+
log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
183+
}
184+
_ = lw.CloseWithError(fmt.Errorf("%v", err))
185+
}()
186+
187+
pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
188+
if err := converter.Convert(giteautil.NormalizeEOL(body), lw, parser.WithContext(pc)); err != nil {
189+
log.Error("Unable to render: %v", err)
190+
_ = lw.CloseWithError(err)
191+
return
192+
}
193+
_ = lw.Close()
194+
}()
195+
return markup.SanitizeReader(rd).Bytes()
196+
}
197+
198+
func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) (ret []byte) {
199+
defer func() {
200+
err := recover()
201+
if err == nil {
202+
return
203+
}
204+
205+
log.Warn("Unable to render markdown due to panic in goldmark - will return sanitized raw bytes")
206+
if log.IsDebug() {
207+
log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
208+
}
209+
ret = markup.SanitizeBytes(body)
210+
}()
211+
return actualRender(body, urlPrefix, metas, wikiMarkdown)
128212
}
129213

130214
var (

modules/markup/markdown/markdown_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,25 @@ func TestRender_RenderParagraphs(t *testing.T) {
309309
test(t, "A\n\n\nB\nC\n", 2)
310310
}
311311

312+
func TestMarkdownRenderRaw(t *testing.T) {
313+
testcases := [][]byte{
314+
{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6267570554535936
315+
0x2a, 0x20, 0x2d, 0x0a, 0x09, 0x20, 0x60, 0x5b, 0x0a, 0x09, 0x20, 0x60,
316+
0x5b,
317+
},
318+
{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6278827345051648
319+
0x2d, 0x20, 0x2d, 0x0d, 0x09, 0x60, 0x0d, 0x09, 0x60,
320+
},
321+
{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6016973788020736[] = {
322+
0x7b, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x35, 0x7d, 0x0a, 0x3d,
323+
},
324+
}
325+
326+
for _, testcase := range testcases {
327+
_ = RenderRaw(testcase, "", false)
328+
}
329+
}
330+
312331
func TestRenderSiblingImages_Issue12925(t *testing.T) {
313332
testcase := `![image1](/image1)
314333
![image2](/image2)
@@ -318,4 +337,5 @@ func TestRenderSiblingImages_Issue12925(t *testing.T) {
318337
`
319338
res := string(RenderRaw([]byte(testcase), "", false))
320339
assert.Equal(t, expected, res)
340+
321341
}

0 commit comments

Comments
 (0)