Skip to content

Commit 3310dd1

Browse files
authored
Improve code diff highlight, fix incorrect rendered diff result (#19958)
Use Unicode placeholders to replace HTML tags and HTML entities first, then do diff, then recover the HTML tags and HTML entities. Now the code diff with highlight has stable behavior, and won't emit broken tags.
1 parent 14178c5 commit 3310dd1

File tree

5 files changed

+379
-378
lines changed

5 files changed

+379
-378
lines changed

Diff for: modules/highlight/highlight.go

+5-3
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,11 @@ var (
4040
// NewContext loads custom highlight map from local config
4141
func NewContext() {
4242
once.Do(func() {
43-
keys := setting.Cfg.Section("highlight.mapping").Keys()
44-
for i := range keys {
45-
highlightMapping[keys[i].Name()] = keys[i].Value()
43+
if setting.Cfg != nil {
44+
keys := setting.Cfg.Section("highlight.mapping").Keys()
45+
for i := range keys {
46+
highlightMapping[keys[i].Name()] = keys[i].Value()
47+
}
4648
}
4749

4850
// The size 512 is simply a conservative rule of thumb

Diff for: services/gitdiff/gitdiff.go

+21-291
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import (
1515
"io"
1616
"net/url"
1717
"os"
18-
"regexp"
1918
"sort"
2019
"strings"
2120
"time"
@@ -40,7 +39,7 @@ import (
4039
"golang.org/x/text/transform"
4140
)
4241

43-
// DiffLineType represents the type of a DiffLine.
42+
// DiffLineType represents the type of DiffLine.
4443
type DiffLineType uint8
4544

4645
// DiffLineType possible values.
@@ -51,7 +50,7 @@ const (
5150
DiffLineSection
5251
)
5352

54-
// DiffFileType represents the type of a DiffFile.
53+
// DiffFileType represents the type of DiffFile.
5554
type DiffFileType uint8
5655

5756
// DiffFileType possible values.
@@ -100,12 +99,12 @@ type DiffLineSectionInfo struct {
10099
// BlobExcerptChunkSize represent max lines of excerpt
101100
const BlobExcerptChunkSize = 20
102101

103-
// GetType returns the type of a DiffLine.
102+
// GetType returns the type of DiffLine.
104103
func (d *DiffLine) GetType() int {
105104
return int(d.Type)
106105
}
107106

108-
// CanComment returns whether or not a line can get commented
107+
// CanComment returns whether a line can get commented
109108
func (d *DiffLine) CanComment() bool {
110109
return len(d.Comments) == 0 && d.Type != DiffLineSection
111110
}
@@ -191,287 +190,13 @@ var (
191190
codeTagSuffix = []byte(`</span>`)
192191
)
193192

194-
var (
195-
unfinishedtagRegex = regexp.MustCompile(`<[^>]*$`)
196-
trailingSpanRegex = regexp.MustCompile(`<span\s*[[:alpha:]="]*?[>]?$`)
197-
entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`)
198-
)
199-
200-
// shouldWriteInline represents combinations where we manually write inline changes
201-
func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool {
202-
if true &&
203-
diff.Type == diffmatchpatch.DiffEqual ||
204-
diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd ||
205-
diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel {
206-
return true
207-
}
208-
return false
209-
}
210-
211-
func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff {
212-
// Create a new array to store our fixed up blocks
213-
fixedup := make([]diffmatchpatch.Diff, 0, len(diffs))
214-
215-
// semantically label some numbers
216-
const insert, delete, equal = 0, 1, 2
217-
218-
// record the positions of the last type of each block in the fixedup blocks
219-
last := []int{-1, -1, -1}
220-
operation := []diffmatchpatch.Operation{diffmatchpatch.DiffInsert, diffmatchpatch.DiffDelete, diffmatchpatch.DiffEqual}
221-
222-
// create a writer for insert and deletes
223-
toWrite := []strings.Builder{
224-
{},
225-
{},
226-
}
227-
228-
// make some flags for insert and delete
229-
unfinishedTag := []bool{false, false}
230-
unfinishedEnt := []bool{false, false}
231-
232-
// store stores the provided text in the writer for the typ
233-
store := func(text string, typ int) {
234-
(&(toWrite[typ])).WriteString(text)
235-
}
236-
237-
// hasStored returns true if there is stored content
238-
hasStored := func(typ int) bool {
239-
return (&toWrite[typ]).Len() > 0
240-
}
241-
242-
// stored will return that content
243-
stored := func(typ int) string {
244-
return (&toWrite[typ]).String()
245-
}
246-
247-
// empty will empty the stored content
248-
empty := func(typ int) {
249-
(&toWrite[typ]).Reset()
250-
}
251-
252-
// pop will remove the stored content appending to a diff block for that typ
253-
pop := func(typ int, fixedup []diffmatchpatch.Diff) []diffmatchpatch.Diff {
254-
if hasStored(typ) {
255-
if last[typ] > last[equal] {
256-
fixedup[last[typ]].Text += stored(typ)
257-
} else {
258-
fixedup = append(fixedup, diffmatchpatch.Diff{
259-
Type: operation[typ],
260-
Text: stored(typ),
261-
})
262-
}
263-
empty(typ)
264-
}
265-
return fixedup
266-
}
267-
268-
// Now we walk the provided diffs and check the type of each block in turn
269-
for _, diff := range diffs {
270-
271-
typ := delete // flag for handling insert or delete typs
272-
switch diff.Type {
273-
case diffmatchpatch.DiffEqual:
274-
// First check if there is anything stored
275-
if hasStored(insert) || hasStored(delete) {
276-
// There are two reasons for storing content:
277-
// 1. Unfinished Entity <- Could be more efficient here by not doing this if we're looking for a tag
278-
if unfinishedEnt[insert] || unfinishedEnt[delete] {
279-
// we look for a ';' to finish an entity
280-
idx := strings.IndexRune(diff.Text, ';')
281-
if idx >= 0 {
282-
// if we find a ';' store the preceding content to both insert and delete
283-
store(diff.Text[:idx+1], insert)
284-
store(diff.Text[:idx+1], delete)
285-
286-
// and remove it from this block
287-
diff.Text = diff.Text[idx+1:]
288-
289-
// reset the ent flags
290-
unfinishedEnt[insert] = false
291-
unfinishedEnt[delete] = false
292-
} else {
293-
// otherwise store it all on insert and delete
294-
store(diff.Text, insert)
295-
store(diff.Text, delete)
296-
// and empty this block
297-
diff.Text = ""
298-
}
299-
}
300-
// 2. Unfinished Tag
301-
if unfinishedTag[insert] || unfinishedTag[delete] {
302-
// we look for a '>' to finish a tag
303-
idx := strings.IndexRune(diff.Text, '>')
304-
if idx >= 0 {
305-
store(diff.Text[:idx+1], insert)
306-
store(diff.Text[:idx+1], delete)
307-
diff.Text = diff.Text[idx+1:]
308-
unfinishedTag[insert] = false
309-
unfinishedTag[delete] = false
310-
} else {
311-
store(diff.Text, insert)
312-
store(diff.Text, delete)
313-
diff.Text = ""
314-
}
315-
}
316-
317-
// If we've completed the required tag/entities
318-
if !(unfinishedTag[insert] || unfinishedTag[delete] || unfinishedEnt[insert] || unfinishedEnt[delete]) {
319-
// pop off the stack
320-
fixedup = pop(insert, fixedup)
321-
fixedup = pop(delete, fixedup)
322-
}
323-
324-
// If that has left this diff block empty then shortcut
325-
if len(diff.Text) == 0 {
326-
continue
327-
}
328-
}
329-
330-
// check if this block ends in an unfinished tag?
331-
idx := unfinishedtagRegex.FindStringIndex(diff.Text)
332-
if idx != nil {
333-
unfinishedTag[insert] = true
334-
unfinishedTag[delete] = true
335-
} else {
336-
// otherwise does it end in an unfinished entity?
337-
idx = entityRegex.FindStringIndex(diff.Text)
338-
if idx != nil {
339-
unfinishedEnt[insert] = true
340-
unfinishedEnt[delete] = true
341-
}
342-
}
343-
344-
// If there is an unfinished component
345-
if idx != nil {
346-
// Store the fragment
347-
store(diff.Text[idx[0]:], insert)
348-
store(diff.Text[idx[0]:], delete)
349-
// and remove it from this block
350-
diff.Text = diff.Text[:idx[0]]
351-
}
352-
353-
// If that hasn't left the block empty
354-
if len(diff.Text) > 0 {
355-
// store the position of the last equal block and store it in our diffs
356-
last[equal] = len(fixedup)
357-
fixedup = append(fixedup, diff)
358-
}
359-
continue
360-
case diffmatchpatch.DiffInsert:
361-
typ = insert
362-
fallthrough
363-
case diffmatchpatch.DiffDelete:
364-
// First check if there is anything stored for this type
365-
if hasStored(typ) {
366-
// if there is prepend it to this block, empty the storage and reset our flags
367-
diff.Text = stored(typ) + diff.Text
368-
empty(typ)
369-
unfinishedEnt[typ] = false
370-
unfinishedTag[typ] = false
371-
}
372-
373-
// check if this block ends in an unfinished tag
374-
idx := unfinishedtagRegex.FindStringIndex(diff.Text)
375-
if idx != nil {
376-
unfinishedTag[typ] = true
377-
} else {
378-
// otherwise does it end in an unfinished entity
379-
idx = entityRegex.FindStringIndex(diff.Text)
380-
if idx != nil {
381-
unfinishedEnt[typ] = true
382-
}
383-
}
384-
385-
// If there is an unfinished component
386-
if idx != nil {
387-
// Store the fragment
388-
store(diff.Text[idx[0]:], typ)
389-
// and remove it from this block
390-
diff.Text = diff.Text[:idx[0]]
391-
}
392-
393-
// If that hasn't left the block empty
394-
if len(diff.Text) > 0 {
395-
// if the last block of this type was after the last equal block
396-
if last[typ] > last[equal] {
397-
// store this blocks content on that block
398-
fixedup[last[typ]].Text += diff.Text
399-
} else {
400-
// otherwise store the position of the last block of this type and store the block
401-
last[typ] = len(fixedup)
402-
fixedup = append(fixedup, diff)
403-
}
404-
}
405-
continue
406-
}
407-
}
408-
409-
// pop off any remaining stored content
410-
fixedup = pop(insert, fixedup)
411-
fixedup = pop(delete, fixedup)
412-
413-
return fixedup
414-
}
415-
416-
func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline {
193+
func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string {
417194
buf := bytes.NewBuffer(nil)
418-
match := ""
419-
420-
diffs = fixupBrokenSpans(diffs)
421-
195+
// restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary
196+
for _, tag := range lineWrapperTags {
197+
buf.WriteString(tag)
198+
}
422199
for _, diff := range diffs {
423-
if shouldWriteInline(diff, lineType) {
424-
if len(match) > 0 {
425-
diff.Text = match + diff.Text
426-
match = ""
427-
}
428-
// Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency.
429-
// Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together
430-
// before writing so we don't try insert added/removed code spans in the middle of one of those
431-
// and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of
432-
// a line ending with an incomplete HTML entity or partial/opening <span>.
433-
434-
// EX:
435-
// diffs[{Type: dmp.DiffDelete, Text: "language</span><span "},
436-
// {Type: dmp.DiffEqual, Text: "c"},
437-
// {Type: dmp.DiffDelete, Text: "lass="p">}]
438-
439-
// After first iteration
440-
// diffs[{Type: dmp.DiffDelete, Text: "language</span>"}, //write out
441-
// {Type: dmp.DiffEqual, Text: "<span c"},
442-
// {Type: dmp.DiffDelete, Text: "lass="p">,</span>}]
443-
444-
// After second iteration
445-
// {Type: dmp.DiffEqual, Text: ""}, // write out
446-
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]
447-
448-
// Final
449-
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]
450-
// end up writing <span class="removed-code"><span class="p">,</span></span>
451-
// Instead of <span class="removed-code">lass="p",</span></span>
452-
453-
m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text)
454-
if m != nil {
455-
match = diff.Text[m[0]:m[1]]
456-
diff.Text = strings.TrimSuffix(diff.Text, match)
457-
}
458-
m = entityRegex.FindStringSubmatchIndex(diff.Text)
459-
if m != nil {
460-
match = diff.Text[m[0]:m[1]]
461-
diff.Text = strings.TrimSuffix(diff.Text, match)
462-
}
463-
// Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it
464-
if strings.HasPrefix(diff.Text, "</span>") {
465-
buf.WriteString("</span>")
466-
diff.Text = strings.TrimPrefix(diff.Text, "</span>")
467-
}
468-
// If we weren't able to fix it then this should avoid broken HTML by not inserting more spans below
469-
// The previous/next diff section will contain the rest of the tag that is missing here
470-
if strings.Count(diff.Text, "<") != strings.Count(diff.Text, ">") {
471-
buf.WriteString(diff.Text)
472-
continue
473-
}
474-
}
475200
switch {
476201
case diff.Type == diffmatchpatch.DiffEqual:
477202
buf.WriteString(diff.Text)
@@ -485,7 +210,10 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT
485210
buf.Write(codeTagSuffix)
486211
}
487212
}
488-
return DiffInlineWithUnicodeEscape(template.HTML(buf.String()))
213+
for range lineWrapperTags {
214+
buf.WriteString("</span>")
215+
}
216+
return buf.String()
489217
}
490218

491219
// GetLine gets a specific line by type (add or del) and file line number
@@ -597,10 +325,12 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif
597325
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content)
598326
}
599327

600-
diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true)
601-
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
602-
603-
return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type)
328+
hcd := newHighlightCodeDiff()
329+
diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:])
330+
// it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back
331+
// if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)"
332+
diffHTML := diffToHTML(nil, diffRecord, diffLine.Type)
333+
return DiffInlineWithUnicodeEscape(template.HTML(diffHTML))
604334
}
605335

606336
// DiffFile represents a file diff.
@@ -1289,7 +1019,7 @@ func readFileName(rd *strings.Reader) (string, bool) {
12891019
if char == '"' {
12901020
fmt.Fscanf(rd, "%q ", &name)
12911021
if len(name) == 0 {
1292-
log.Error("Reader has no file name: %v", rd)
1022+
log.Error("Reader has no file name: reader=%+v", rd)
12931023
return "", true
12941024
}
12951025

@@ -1311,7 +1041,7 @@ func readFileName(rd *strings.Reader) (string, bool) {
13111041
}
13121042
}
13131043
if len(name) < 2 {
1314-
log.Error("Unable to determine name from reader: %v", rd)
1044+
log.Error("Unable to determine name from reader: reader=%+v", rd)
13151045
return "", true
13161046
}
13171047
return name[2:], ambiguity

0 commit comments

Comments
 (0)