Skip to content

Fix several render issues highlighted during fuzzing #14986

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 48 additions & 27 deletions modules/emoji/emoji.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ var (
// aliasMap provides a map of the alias to its emoji data.
aliasMap map[string]int

// emptyReplacer is the string replacer for emoji codes.
emptyReplacer *strings.Replacer

// codeReplacer is the string replacer for emoji codes.
codeReplacer *strings.Replacer

Expand All @@ -49,6 +52,7 @@ func loadMap() {

// process emoji codes and aliases
codePairs := make([]string, 0)
emptyPairs := make([]string, 0)
aliasPairs := make([]string, 0)

// sort from largest to small so we match combined emoji first
Expand All @@ -64,6 +68,7 @@ func loadMap() {
// setup codes
codeMap[e.Emoji] = i
codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)

// setup aliases
for _, a := range e.Aliases {
Expand All @@ -77,6 +82,7 @@ func loadMap() {
}

// create replacers
emptyReplacer = strings.NewReplacer(emptyPairs...)
codeReplacer = strings.NewReplacer(codePairs...)
aliasReplacer = strings.NewReplacer(aliasPairs...)
})
Expand Down Expand Up @@ -127,38 +133,53 @@ func ReplaceAliases(s string) string {
return aliasReplacer.Replace(s)
}

// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
func FindEmojiSubmatchIndex(s string) []int {
loadMap()
found := make(map[int]int)
keys := make([]int, 0)
type rememberSecondWriteWriter struct {
pos int
idx int
end int
writecount int
}

//see if there are any emoji in string before looking for position of specific ones
//no performance difference when there is a match but 10x faster when there are not
if s == ReplaceCodes(s) {
return nil
func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
n.writecount++
if n.writecount == 2 {
n.idx = n.pos
n.end = n.pos + len(p)
}
n.pos += len(p)
return len(p), nil
}

// get index of first emoji occurrence while also checking for longest combination
for j := range GemojiData {
i := strings.Index(s, GemojiData[j].Emoji)
if i != -1 {
if _, ok := found[i]; !ok {
if len(keys) == 0 || i < keys[0] {
found[i] = j
keys = []int{i}
}
if i == 0 {
break
}
}
}
func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
n.writecount++
if n.writecount == 2 {
n.idx = n.pos
n.end = n.pos + len(s)
}
n.pos += len(s)
return len(s), nil
}

if len(keys) > 0 {
index := keys[0]
return []int{index, index + len(GemojiData[found[index]].Emoji)}
// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
func FindEmojiSubmatchIndex(s string) []int {
loadMap()
secondWriteWriter := rememberSecondWriteWriter{}

// A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
// we can be lazy here.
//
// The implementation of strings.Replacer.WriteString is such that the first index of the emoji
// submatch is simply the second thing that is written to WriteString in the writer.
//
// Therefore we can simply take the index of the second write as our first emoji
//
// FIXME: just copy the trie implementation from strings.NewReplacer
_, _ = emptyReplacer.WriteString(&secondWriteWriter, s)

// if we wrote less than twice then we never "replaced"
if secondWriteWriter.writecount < 2 {
return nil
}

return nil
return []int{secondWriteWriter.idx, secondWriteWriter.end}
}
33 changes: 33 additions & 0 deletions modules/emoji/emoji_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ package emoji
import (
"reflect"
"testing"

"github.com/stretchr/testify/assert"
)

func TestDumpInfo(t *testing.T) {
Expand Down Expand Up @@ -65,3 +67,34 @@ func TestReplacers(t *testing.T) {
}
}
}

func TestFindEmojiSubmatchIndex(t *testing.T) {
type testcase struct {
teststring string
expected []int
}

testcases := []testcase{
{
"\U0001f44d",
[]int{0, len("\U0001f44d")},
},
{
"\U0001f44d +1 \U0001f44d \U0001f37a",
[]int{0, 4},
},
{
" \U0001f44d",
[]int{1, 1 + len("\U0001f44d")},
},
{
string([]byte{'\u0001'}) + "\U0001f44d",
[]int{1, 1 + len("\U0001f44d")},
},
}

for _, kase := range testcases {
actual := FindEmojiSubmatchIndex(kase.teststring)
assert.Equal(t, kase.expected, actual)
}
}
36 changes: 11 additions & 25 deletions modules/markup/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,41 +313,27 @@ func RenderEmoji(
return ctx.postProcess(rawHTML)
}

var tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL][ />]))`)
var nulCleaner = strings.NewReplacer("\000", "")

func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
if ctx.procs == nil {
ctx.procs = defaultProcessors
}

// give a generous extra 50 bytes
res := make([]byte, 0, len(rawHTML)+50)

res := bytes.NewBuffer(make([]byte, 0, len(rawHTML)+50))
// prepend "<html><body>"
res = append(res, "<html><body>"...)
_, _ = res.WriteString("<html><body>")

// Strip out nuls - they're always invalid
start := bytes.IndexByte(rawHTML, '\000')
if start >= 0 {
res = append(res, rawHTML[:start]...)
start++
for start < len(rawHTML) {
end := bytes.IndexByte(rawHTML[start:], '\000')
if end < 0 {
res = append(res, rawHTML[start:]...)
break
} else if end > 0 {
res = append(res, rawHTML[start:start+end]...)
}
start += end + 1
}
} else {
res = append(res, rawHTML...)
}
_, _ = nulCleaner.WriteString(res, string(tagCleaner.ReplaceAll(rawHTML, []byte("&lt;$1"))))

// close the tags
res = append(res, "</body></html>"...)
_, _ = res.WriteString("</body></html>")

// parse the HTML
nodes, err := html.ParseFragment(bytes.NewReader(res), nil)
nodes, err := html.ParseFragment(res, nil)
if err != nil {
return nil, &postProcessError{"invalid HTML", err}
}
Expand Down Expand Up @@ -384,17 +370,17 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
// Create buffer in which the data will be placed again. We know that the
// length will be at least that of res; to spare a few alloc+copy, we
// reuse res, resetting its length to 0.
buf := bytes.NewBuffer(res[:0])
res.Reset()
// Render everything to buf.
for _, node := range nodes {
err = html.Render(buf, node)
err = html.Render(res, node)
if err != nil {
return nil, &postProcessError{"error rendering processed HTML", err}
}
}

// Everything done successfully, return parsed data.
return buf.Bytes(), nil
return res.Bytes(), nil
}

func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) {
Expand Down
6 changes: 6 additions & 0 deletions modules/markup/markdown/goldmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
header.ID = util.BytesToReadOnlyString(id.([]byte))
}
toc = append(toc, header)
} else {
for _, attr := range v.Attributes() {
if _, ok := attr.Value.([]byte); !ok {
v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value)))
}
}
}
case *ast.Image:
// Images need two things:
Expand Down
102 changes: 93 additions & 9 deletions modules/markup/markdown/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
package markdown

import (
"bytes"
"fmt"
"io"
"strings"
"sync"

Expand All @@ -18,7 +19,7 @@ import (

chromahtml "github.com/alecthomas/chroma/formatters/html"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark-highlighting"
highlighting "github.com/yuin/goldmark-highlighting"
meta "github.com/yuin/goldmark-meta"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
Expand All @@ -34,6 +35,44 @@ var urlPrefixKey = parser.NewContextKey()
var isWikiKey = parser.NewContextKey()
var renderMetasKey = parser.NewContextKey()

type closesWithError interface {
io.WriteCloser
CloseWithError(err error) error
}

type limitWriter struct {
w closesWithError
sum int64
limit int64
}

// Write implements the standard Write interface:
func (l *limitWriter) Write(data []byte) (int, error) {
leftToWrite := l.limit - l.sum
if leftToWrite < int64(len(data)) {
n, err := l.w.Write(data[:leftToWrite])
l.sum += int64(n)
if err != nil {
return n, err
}
_ = l.w.Close()
return n, fmt.Errorf("Rendered content too large - truncating render")
}
n, err := l.w.Write(data)
l.sum += int64(n)
return n, err
}

// Close closes the writer
func (l *limitWriter) Close() error {
return l.w.Close()
}

// CloseWithError closes the writer
func (l *limitWriter) CloseWithError(err error) error {
return l.w.CloseWithError(err)
}

// NewGiteaParseContext creates a parser.Context with the gitea context set
func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool) parser.Context {
pc := parser.NewContext(parser.WithIDs(newPrefixedIDs()))
Expand All @@ -43,8 +82,8 @@ func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool
return pc
}

// render renders Markdown to HTML without handling special links.
func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
// actualRender renders Markdown to HTML without handling special links.
func actualRender(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
once.Do(func() {
converter = goldmark.New(
goldmark.WithExtensions(extension.Table,
Expand Down Expand Up @@ -119,12 +158,57 @@ func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown

})

pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
var buf bytes.Buffer
if err := converter.Convert(giteautil.NormalizeEOL(body), &buf, parser.WithContext(pc)); err != nil {
log.Error("Unable to render: %v", err)
rd, wr := io.Pipe()
defer func() {
_ = rd.Close()
_ = wr.Close()
}()

lw := &limitWriter{
w: wr,
limit: setting.UI.MaxDisplayFileSize * 3,
}
return markup.SanitizeReader(&buf).Bytes()

// FIXME: should we include a timeout that closes the pipe to abort the parser and sanitizer if it takes too long?
go func() {
defer func() {
err := recover()
if err == nil {
return
}

log.Warn("Unable to render markdown due to panic in goldmark: %v", err)
if log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
}
_ = lw.CloseWithError(fmt.Errorf("%v", err))
}()

pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
if err := converter.Convert(giteautil.NormalizeEOL(body), lw, parser.WithContext(pc)); err != nil {
log.Error("Unable to render: %v", err)
_ = lw.CloseWithError(err)
return
}
_ = lw.Close()
}()
return markup.SanitizeReader(rd).Bytes()
}

func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) (ret []byte) {
defer func() {
err := recover()
if err == nil {
return
}

log.Warn("Unable to render markdown due to panic in goldmark - will return sanitized raw bytes")
if log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
}
ret = markup.SanitizeBytes(body)
}()
return actualRender(body, urlPrefix, metas, wikiMarkdown)
}

var (
Expand Down
Loading