From 785097218acd97f630e08cbf3b56ab64bb00635c Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Mon, 13 Jun 2022 15:09:11 +0800 Subject: [PATCH 01/19] Improve code highlight --- services/gitdiff/gitdiff.go | 454 ++++++++++++------------------- services/gitdiff/gitdiff_test.go | 84 ++---- 2 files changed, 185 insertions(+), 353 deletions(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index e56c2de8fada2..a616788f57f2e 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -15,7 +15,6 @@ import ( "io" "net/url" "os" - "regexp" "sort" "strings" "time" @@ -40,7 +39,7 @@ import ( "golang.org/x/text/transform" ) -// DiffLineType represents the type of a DiffLine. +// DiffLineType represents the type of DiffLine. type DiffLineType uint8 // DiffLineType possible values. @@ -51,7 +50,7 @@ const ( DiffLineSection ) -// DiffFileType represents the type of a DiffFile. +// DiffFileType represents the type of DiffFile. type DiffFileType uint8 // DiffFileType possible values. @@ -100,12 +99,12 @@ type DiffLineSectionInfo struct { // BlobExcerptChunkSize represent max lines of excerpt const BlobExcerptChunkSize = 20 -// GetType returns the type of a DiffLine. +// GetType returns the type of DiffLine. func (d *DiffLine) GetType() int { return int(d.Type) } -// CanComment returns whether or not a line can get commented +// CanComment returns whether a line can get commented func (d *DiffLine) CanComment() bool { return len(d.Comments) == 0 && d.Type != DiffLineSection } @@ -191,287 +190,14 @@ var ( codeTagSuffix = []byte(``) ) -var ( - unfinishedtagRegex = regexp.MustCompile(`<[^>]*$`) - trailingSpanRegex = regexp.MustCompile(`]?$`) - entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`) -) - -// shouldWriteInline represents combinations where we manually write inline changes -func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool { - if true && - diff.Type == diffmatchpatch.DiffEqual || - diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd || - diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel { - return true - } - return false -} - -func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff { - // Create a new array to store our fixed up blocks - fixedup := make([]diffmatchpatch.Diff, 0, len(diffs)) - - // semantically label some numbers - const insert, delete, equal = 0, 1, 2 - - // record the positions of the last type of each block in the fixedup blocks - last := []int{-1, -1, -1} - operation := []diffmatchpatch.Operation{diffmatchpatch.DiffInsert, diffmatchpatch.DiffDelete, diffmatchpatch.DiffEqual} - - // create a writer for insert and deletes - toWrite := []strings.Builder{ - {}, - {}, - } - - // make some flags for insert and delete - unfinishedTag := []bool{false, false} - unfinishedEnt := []bool{false, false} - - // store stores the provided text in the writer for the typ - store := func(text string, typ int) { - (&(toWrite[typ])).WriteString(text) - } - - // hasStored returns true if there is stored content - hasStored := func(typ int) bool { - return (&toWrite[typ]).Len() > 0 - } - - // stored will return that content - stored := func(typ int) string { - return (&toWrite[typ]).String() - } - - // empty will empty the stored content - empty := func(typ int) { - (&toWrite[typ]).Reset() - } - - // pop will remove the stored content appending to a diff block for that typ - pop := func(typ int, fixedup []diffmatchpatch.Diff) []diffmatchpatch.Diff { - if hasStored(typ) { - if last[typ] > last[equal] { - fixedup[last[typ]].Text += stored(typ) - } else { - fixedup = append(fixedup, diffmatchpatch.Diff{ - Type: operation[typ], - Text: stored(typ), - }) - } - empty(typ) - } - return fixedup - } - - // Now we walk the provided diffs and check the type of each block in turn - for _, diff := range diffs { - - typ := delete // flag for handling insert or delete typs - switch diff.Type { - case diffmatchpatch.DiffEqual: - // First check if there is anything stored - if hasStored(insert) || hasStored(delete) { - // There are two reasons for storing content: - // 1. Unfinished Entity <- Could be more efficient here by not doing this if we're looking for a tag - if unfinishedEnt[insert] || unfinishedEnt[delete] { - // we look for a ';' to finish an entity - idx := strings.IndexRune(diff.Text, ';') - if idx >= 0 { - // if we find a ';' store the preceding content to both insert and delete - store(diff.Text[:idx+1], insert) - store(diff.Text[:idx+1], delete) - - // and remove it from this block - diff.Text = diff.Text[idx+1:] - - // reset the ent flags - unfinishedEnt[insert] = false - unfinishedEnt[delete] = false - } else { - // otherwise store it all on insert and delete - store(diff.Text, insert) - store(diff.Text, delete) - // and empty this block - diff.Text = "" - } - } - // 2. Unfinished Tag - if unfinishedTag[insert] || unfinishedTag[delete] { - // we look for a '>' to finish a tag - idx := strings.IndexRune(diff.Text, '>') - if idx >= 0 { - store(diff.Text[:idx+1], insert) - store(diff.Text[:idx+1], delete) - diff.Text = diff.Text[idx+1:] - unfinishedTag[insert] = false - unfinishedTag[delete] = false - } else { - store(diff.Text, insert) - store(diff.Text, delete) - diff.Text = "" - } - } - - // If we've completed the required tag/entities - if !(unfinishedTag[insert] || unfinishedTag[delete] || unfinishedEnt[insert] || unfinishedEnt[delete]) { - // pop off the stack - fixedup = pop(insert, fixedup) - fixedup = pop(delete, fixedup) - } - - // If that has left this diff block empty then shortcut - if len(diff.Text) == 0 { - continue - } - } - - // check if this block ends in an unfinished tag? - idx := unfinishedtagRegex.FindStringIndex(diff.Text) - if idx != nil { - unfinishedTag[insert] = true - unfinishedTag[delete] = true - } else { - // otherwise does it end in an unfinished entity? - idx = entityRegex.FindStringIndex(diff.Text) - if idx != nil { - unfinishedEnt[insert] = true - unfinishedEnt[delete] = true - } - } - - // If there is an unfinished component - if idx != nil { - // Store the fragment - store(diff.Text[idx[0]:], insert) - store(diff.Text[idx[0]:], delete) - // and remove it from this block - diff.Text = diff.Text[:idx[0]] - } - - // If that hasn't left the block empty - if len(diff.Text) > 0 { - // store the position of the last equal block and store it in our diffs - last[equal] = len(fixedup) - fixedup = append(fixedup, diff) - } - continue - case diffmatchpatch.DiffInsert: - typ = insert - fallthrough - case diffmatchpatch.DiffDelete: - // First check if there is anything stored for this type - if hasStored(typ) { - // if there is prepend it to this block, empty the storage and reset our flags - diff.Text = stored(typ) + diff.Text - empty(typ) - unfinishedEnt[typ] = false - unfinishedTag[typ] = false - } - - // check if this block ends in an unfinished tag - idx := unfinishedtagRegex.FindStringIndex(diff.Text) - if idx != nil { - unfinishedTag[typ] = true - } else { - // otherwise does it end in an unfinished entity - idx = entityRegex.FindStringIndex(diff.Text) - if idx != nil { - unfinishedEnt[typ] = true - } - } - - // If there is an unfinished component - if idx != nil { - // Store the fragment - store(diff.Text[idx[0]:], typ) - // and remove it from this block - diff.Text = diff.Text[:idx[0]] - } - - // If that hasn't left the block empty - if len(diff.Text) > 0 { - // if the last block of this type was after the last equal block - if last[typ] > last[equal] { - // store this blocks content on that block - fixedup[last[typ]].Text += diff.Text - } else { - // otherwise store the position of the last block of this type and store the block - last[typ] = len(fixedup) - fixedup = append(fixedup, diff) - } - } - continue +func diffToHTML(hcd *HighlightCodeDiff, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline { + buf := bytes.NewBuffer(nil) + if hcd != nil { + for _, tag := range hcd.lineWrapperTags { + buf.WriteString(tag) } } - - // pop off any remaining stored content - fixedup = pop(insert, fixedup) - fixedup = pop(delete, fixedup) - - return fixedup -} - -func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline { - buf := bytes.NewBuffer(nil) - match := "" - - diffs = fixupBrokenSpans(diffs) - for _, diff := range diffs { - if shouldWriteInline(diff, lineType) { - if len(match) > 0 { - diff.Text = match + diff.Text - match = "" - } - // Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency. - // Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together - // before writing so we don't try insert added/removed code spans in the middle of one of those - // and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of - // a line ending with an incomplete HTML entity or partial/opening . - - // EX: - // diffs[{Type: dmp.DiffDelete, Text: "language}] - - // After first iteration - // diffs[{Type: dmp.DiffDelete, Text: "language"}, //write out - // {Type: dmp.DiffEqual, Text: ",}] - - // After second iteration - // {Type: dmp.DiffEqual, Text: ""}, // write out - // {Type: dmp.DiffDelete, Text: ",}] - - // Final - // {Type: dmp.DiffDelete, Text: ",}] - // end up writing , - // Instead of lass="p", - - m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text) - if m != nil { - match = diff.Text[m[0]:m[1]] - diff.Text = strings.TrimSuffix(diff.Text, match) - } - m = entityRegex.FindStringSubmatchIndex(diff.Text) - if m != nil { - match = diff.Text[m[0]:m[1]] - diff.Text = strings.TrimSuffix(diff.Text, match) - } - // Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it - if strings.HasPrefix(diff.Text, "") { - buf.WriteString("") - diff.Text = strings.TrimPrefix(diff.Text, "") - } - // If we weren't able to fix it then this should avoid broken HTML by not inserting more spans below - // The previous/next diff section will contain the rest of the tag that is missing here - if strings.Count(diff.Text, "<") != strings.Count(diff.Text, ">") { - buf.WriteString(diff.Text) - continue - } - } switch { case diff.Type == diffmatchpatch.DiffEqual: buf.WriteString(diff.Text) @@ -485,6 +211,11 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT buf.Write(codeTagSuffix) } } + if hcd != nil { + for range hcd.lineWrapperTags { + buf.WriteString("") + } + } return DiffInlineWithUnicodeEscape(template.HTML(buf.String())) } @@ -555,6 +286,146 @@ func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline { return DiffInline{EscapeStatus: status, Content: template.HTML(content)} } +type HighlightCodeDiff struct { + placeholderBegin rune + placeholderMaxCount int + placeholderCounter int + placeholderTagMap map[rune]string + tagPlaceholderMap map[string]rune + + lineWrapperTags []string +} + +func NewHighlightCodeDiff() *HighlightCodeDiff { + return &HighlightCodeDiff{ + placeholderBegin: rune(0xE000), // Private Use Unicode: U+E000..U+F8FF, BMP(0), 6400 + placeholderMaxCount: 6400, + placeholderTagMap: map[rune]string{}, + tagPlaceholderMap: map[string]rune{}, + } +} + +func (hcd *HighlightCodeDiff) nextPlaceholder() rune { + // TODO: handle placeholder rune conflicts ( case 1: counter reaches placeholderMaxCount, case 2: there are placeholder runes in code ) + r := hcd.placeholderBegin + rune(hcd.placeholderCounter%hcd.placeholderMaxCount) + hcd.placeholderCounter++ + hcd.placeholderCounter %= hcd.placeholderMaxCount + return r +} + +func (hcd *HighlightCodeDiff) convertToPlaceholders(highlightCode string) string { + var tagStack []string + res := strings.Builder{} + s := highlightCode + + firstRunForLineTags := hcd.lineWrapperTags == nil + + // the standard chroma highlight HTML is " ... " + for { + // find the next HTML tag + pos1 := strings.IndexByte(s, '<') + pos2 := strings.IndexByte(s, '>') + if pos1 == -1 || pos2 == -1 || pos2 < pos1 { + break + } + tag := s[pos1 : pos2+1] + + // write the content before the tag into result string, and consume the tag in the string + res.WriteString(s[:pos1]) + s = s[pos2+1:] + + // the line wrapper tags should be removed before diff + if strings.HasPrefix(tag, `") + continue + } + + var tagInMap string + if tag[1] == '/' { + // closed tag + if len(tagStack) == 0 { + break // error, no open tag but see close tag + } + // make sure the closed tag in map is related to the open tag, to make the diff algorithm can match the open/closed tags + // the closed tag should be "" for "" + tagInMap = tag + "" + tagStack = tagStack[:len(tagStack)-1] + } else { + tagInMap = tag + tagStack = append(tagStack, tag) + } + placeholder, ok := hcd.tagPlaceholderMap[tagInMap] + if !ok { + placeholder = hcd.nextPlaceholder() + hcd.tagPlaceholderMap[tagInMap] = placeholder + hcd.placeholderTagMap[placeholder] = tagInMap + } + res.WriteRune(placeholder) + } + res.WriteString(s) + return res.String() +} + +func (hcd *HighlightCodeDiff) recoverOneDiff(lastActiveTag string, diff *diffmatchpatch.Diff) (activeTag string) { + sb := strings.Builder{} + var tagStack []string + + if lastActiveTag != "" { + tagStack = append(tagStack, lastActiveTag) + sb.WriteString(lastActiveTag) + } + + for _, r := range diff.Text { + tag, ok := hcd.placeholderTagMap[r] + if !ok { + sb.WriteRune(r) + continue + } + var tagToRecover string + if tag[1] == '/' { + tagToRecover = tag[:strings.IndexByte(tag, '>')+1] + if len(tagStack) == 0 { + continue // if no open tag, skip the closed tag + } + tagStack = tagStack[:len(tagStack)-1] + } else { + tagToRecover = tag + tagStack = append(tagStack, tag) + } + sb.WriteString(tagToRecover) + } + + if len(tagStack) > 0 { + // at the moment, only one-level (non-nested) tag is supported, aka only the last level is used as active tag for next diff + tagStack = tagStack[:len(tagStack)-1] + // close all open tags + for i := len(tagStack) - 1; i >= 0; i-- { + tagToClose := tagStack[i] + pos := strings.IndexByte(tagToClose, ' ') + if pos == -1 { + pos = strings.IndexByte(tagToClose, '>') + } + if pos != -1 { + sb.WriteString("") + } + } + } + + diff.Text = sb.String() + return activeTag +} + +func (hcd *HighlightCodeDiff) recoverFromPlaceholders(diffs []diffmatchpatch.Diff) { + var lastActiveTag string + for i := range diffs { + lastActiveTag = hcd.recoverOneDiff(lastActiveTag, &diffs[i]) + } +} + // GetComputedInlineDiffFor computes inline diff for the given line. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) DiffInline { if setting.Git.DisableDiffHighlight { @@ -597,10 +468,19 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content) } - diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true) + highlightCodeA := highlight.Code(diffSection.FileName, language, diff1[1:]) + highlightCodeB := highlight.Code(diffSection.FileName, language, diff2[1:]) + + hcd := NewHighlightCodeDiff() + highlightCodeA = hcd.convertToPlaceholders(highlightCodeA) + highlightCodeB = hcd.convertToPlaceholders(highlightCodeB) + + diffRecord := diffMatchPatch.DiffMain(highlightCodeA, highlightCodeB, true) diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) - return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type) + hcd.recoverFromPlaceholders(diffRecord) + + return diffToHTML(hcd, diffRecord, diffLine.Type) } // DiffFile represents a file diff. @@ -1290,7 +1170,7 @@ func readFileName(rd *strings.Reader) (string, bool) { if char == '"' { fmt.Fscanf(rd, "%q ", &name) if len(name) == 0 { - log.Error("Reader has no file name: %v", rd) + log.Error("Reader has no file name: reader=%+v", rd) return "", true } @@ -1312,7 +1192,7 @@ func readFileName(rd *strings.Reader) (string, bool) { } } if len(name) < 2 { - log.Error("Unable to determine name from reader: %v", rd) + log.Error("Unable to determine name from reader: reader=%+v", rd) return "", true } return name[2:], ambiguity diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index 3457785e5de60..3f81d1d0b8d12 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -26,84 +26,27 @@ import ( "gopkg.in/ini.v1" ) -func assertEqual(t *testing.T, s1 string, s2 template.HTML) { - if s1 != string(s2) { - t.Errorf("Did not receive expected results:\nExpected: %s\nActual: %s", s1, s2) +func assertEqual(t *testing.T, expected string, actual template.HTML) { + if expected != string(actual) { + t.Errorf("Did not receive expected results:\nExpected: %s\nActual: %s", expected, actual) } } func TestDiffToHTML(t *testing.T) { setting.Cfg = ini.Empty() - assertEqual(t, "foo bar biz", diffToHTML("", []dmp.Diff{ + assertEqual(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{ {Type: dmp.DiffEqual, Text: "foo "}, {Type: dmp.DiffInsert, Text: "bar"}, {Type: dmp.DiffDelete, Text: " baz"}, {Type: dmp.DiffEqual, Text: " biz"}, }, DiffLineAdd).Content) - assertEqual(t, "foo bar biz", diffToHTML("", []dmp.Diff{ + assertEqual(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{ {Type: dmp.DiffEqual, Text: "foo "}, {Type: dmp.DiffDelete, Text: "bar"}, {Type: dmp.DiffInsert, Text: " baz"}, {Type: dmp.DiffEqual, Text: " biz"}, }, DiffLineDel).Content) - - assertEqual(t, "if !nohl && (lexer != nil || r.GuessLanguage) {", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "if !nohl && (lexer != nil"}, - {Type: dmp.DiffInsert, Text: " || r.GuessLanguage)"}, - {Type: dmp.DiffEqual, Text: " {"}, - }, DiffLineAdd).Content) - - assertEqual(t, "tagURL := fmt.Sprintf("## [%s](%s/%s/%s/%s?q=&type=all&state=closed&milestone=%d) - %s", ge.Milestone\", ge.BaseURL, ge.Owner, ge.Repo, from, milestoneID, time.Now().Format("2006-01-02"))", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "tagURL := fmt.Sprintf("## [%s](%s/%s/%s/%s?q=&type=all&state=closed&milestone=%d) - %s", ge.Milestone\""}, - {Type: dmp.DiffInsert, Text: "f\">getGiteaTagURL(client"}, - {Type: dmp.DiffEqual, Text: ", ge.BaseURL, ge.Owner, ge.Repo, "}, - {Type: dmp.DiffDelete, Text: "from, milestoneID, time.Now().Format("2006-01-02")"}, - {Type: dmp.DiffInsert, Text: "ge.Milestone, from, milestoneID"}, - {Type: dmp.DiffEqual, Text: ")"}, - }, DiffLineDel).Content) - - assertEqual(t, "r.WrapperRenderer(w, language, true, attrs, false)", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "r.WrapperRenderer(w, "}, - {Type: dmp.DiffDelete, Text: "language, true, attrs"}, - {Type: dmp.DiffEqual, Text: ", false)"}, - }, DiffLineDel).Content) - - assertEqual(t, "language, true, attrs, false)", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffInsert, Text: "language, true, attrs"}, - {Type: dmp.DiffEqual, Text: ", false)"}, - }, DiffLineAdd).Content) - - assertEqual(t, "print("// ", sys.argv)", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "print"}, - {Type: dmp.DiffInsert, Text: "("}, - {Type: dmp.DiffEqual, Text: ""// ", sys.argv"}, - {Type: dmp.DiffInsert, Text: ")"}, - }, DiffLineAdd).Content) - - assertEqual(t, "sh 'useradd -u $(stat -c "%u" .gitignore) jenkins'", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "sh "}, - {Type: dmp.DiffDelete, Text: "4;useradd -u 111 jenkins""}, - {Type: dmp.DiffInsert, Text: "9;useradd -u $(stat -c "%u" .gitignore) jenkins'"}, - {Type: dmp.DiffEqual, Text: ";"}, - }, DiffLineAdd).Content) - - assertEqual(t, " <h4 class="release-list-title df ac">", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffEqual, Text: " <h"}, - {Type: dmp.DiffInsert, Text: "4 class=&#"}, - {Type: dmp.DiffEqual, Text: "3"}, - {Type: dmp.DiffInsert, Text: "4;release-list-title df ac""}, - {Type: dmp.DiffEqual, Text: ">"}, - }, DiffLineAdd).Content) } func TestParsePatch_skipTo(t *testing.T) { @@ -592,7 +535,6 @@ index 0000000..6bb8f39 if err != nil { t.Errorf("ParsePatch failed: %s", err) } - println(result) diff2 := `diff --git "a/A \\ B" "b/A \\ B" --- "a/A \\ B" @@ -714,12 +656,22 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) { func TestDiffToHTML_14231(t *testing.T) { setting.Cfg = ini.Empty() - diffRecord := diffMatchPatch.DiffMain(highlight.Code("main.v", "", " run()\n"), highlight.Code("main.v", "", " run(db)\n"), true) + + hcd := NewHighlightCodeDiff() + + highlightCodeA := highlight.Code("main.v", "", " run()\n") + highlightCodeB := highlight.Code("main.v", "", " run(db)\n") + highlightCodeA = hcd.convertToPlaceholders(highlightCodeA) + highlightCodeB = hcd.convertToPlaceholders(highlightCodeB) + + diffRecord := diffMatchPatch.DiffMain(highlightCodeA, highlightCodeB, true) diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) - expected := ` run(db) + hcd.recoverFromPlaceholders(diffRecord) + + expected := ` run(db) ` - output := diffToHTML("main.v", diffRecord, DiffLineAdd) + output := diffToHTML(hcd, diffRecord, DiffLineAdd) assertEqual(t, expected, output.Content) } From f0e2f97b4079701396e676c7bb7e5fe57361eb8c Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Mon, 13 Jun 2022 22:10:50 +0800 Subject: [PATCH 02/19] remove unnecessary code, add more comments --- services/gitdiff/gitdiff.go | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 7dcf4c0c0decc..0d098965dfa6a 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -194,7 +194,7 @@ func diffToHTML(hcd *HighlightCodeDiff, diffs []diffmatchpatch.Diff, lineType Di buf := bytes.NewBuffer(nil) if hcd != nil { for _, tag := range hcd.lineWrapperTags { - buf.WriteString(tag) + buf.WriteString(tag) // restore the line wrapper tags and } } for _, diff := range diffs { @@ -345,40 +345,37 @@ func (hcd *HighlightCodeDiff) convertToPlaceholders(highlightCode string) string } var tagInMap string - if tag[1] == '/' { - // closed tag + if tag[1] == '/' { // for closed tag if len(tagStack) == 0 { - break // error, no open tag but see close tag + break // invalid diff result, no open tag but see close tag } // make sure the closed tag in map is related to the open tag, to make the diff algorithm can match the open/closed tags - // the closed tag should be "" for "" + // the closed tag will be recorded in the map by key "" for "" tagInMap = tag + "" tagStack = tagStack[:len(tagStack)-1] - } else { + } else { // for open tag tagInMap = tag tagStack = append(tagStack, tag) } + + // remember the placeholder and tag in the map placeholder, ok := hcd.tagPlaceholderMap[tagInMap] if !ok { placeholder = hcd.nextPlaceholder() hcd.tagPlaceholderMap[tagInMap] = placeholder hcd.placeholderTagMap[placeholder] = tagInMap } - res.WriteRune(placeholder) + + res.WriteRune(placeholder) // use the placeholder to replace the tag } res.WriteString(s) return res.String() } -func (hcd *HighlightCodeDiff) recoverOneDiff(lastActiveTag string, diff *diffmatchpatch.Diff) (activeTag string) { +func (hcd *HighlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { sb := strings.Builder{} var tagStack []string - if lastActiveTag != "" { - tagStack = append(tagStack, lastActiveTag) - sb.WriteString(lastActiveTag) - } - for _, r := range diff.Text { tag, ok := hcd.placeholderTagMap[r] if !ok { @@ -389,7 +386,7 @@ func (hcd *HighlightCodeDiff) recoverOneDiff(lastActiveTag string, diff *diffmat if tag[1] == '/' { tagToRecover = tag[:strings.IndexByte(tag, '>')+1] if len(tagStack) == 0 { - continue // if no open tag, skip the closed tag + continue // if no open tag yet, skip the closed tag } tagStack = tagStack[:len(tagStack)-1] } else { @@ -400,8 +397,6 @@ func (hcd *HighlightCodeDiff) recoverOneDiff(lastActiveTag string, diff *diffmat } if len(tagStack) > 0 { - // at the moment, only one-level (non-nested) tag is supported, aka only the last level is used as active tag for next diff - tagStack = tagStack[:len(tagStack)-1] // close all open tags for i := len(tagStack) - 1; i >= 0; i-- { tagToClose := tagStack[i] @@ -416,13 +411,11 @@ func (hcd *HighlightCodeDiff) recoverOneDiff(lastActiveTag string, diff *diffmat } diff.Text = sb.String() - return activeTag } func (hcd *HighlightCodeDiff) recoverFromPlaceholders(diffs []diffmatchpatch.Diff) { - var lastActiveTag string for i := range diffs { - lastActiveTag = hcd.recoverOneDiff(lastActiveTag, &diffs[i]) + hcd.recoverOneDiff(&diffs[i]) } } From 62eb155186d0e6cdf8e3b4d33ba6c7ba9346300c Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Tue, 14 Jun 2022 10:45:56 +0800 Subject: [PATCH 03/19] clear edge cases, fine tune tests --- modules/highlight/highlight.go | 8 +-- services/gitdiff/gitdiff.go | 89 +++++++++++++++++++++----------- services/gitdiff/gitdiff_test.go | 66 ++++++++++++----------- 3 files changed, 100 insertions(+), 63 deletions(-) diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index a72f26d5f0755..254c3acb9bfc4 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -40,9 +40,11 @@ var ( // NewContext loads custom highlight map from local config func NewContext() { once.Do(func() { - keys := setting.Cfg.Section("highlight.mapping").Keys() - for i := range keys { - highlightMapping[keys[i].Name()] = keys[i].Value() + if setting.Cfg != nil { + keys := setting.Cfg.Section("highlight.mapping").Keys() + for i := range keys { + highlightMapping[keys[i].Name()] = keys[i].Value() + } } // The size 512 is simply a conservative rule of thumb diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 0d098965dfa6a..861b459d2d6a3 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -190,7 +190,7 @@ var ( codeTagSuffix = []byte(``) ) -func diffToHTML(hcd *HighlightCodeDiff, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline { +func diffToHTML(hcd *HighlightCodeDiff, diffs []diffmatchpatch.Diff, lineType DiffLineType) string { buf := bytes.NewBuffer(nil) if hcd != nil { for _, tag := range hcd.lineWrapperTags { @@ -216,7 +216,7 @@ func diffToHTML(hcd *HighlightCodeDiff, diffs []diffmatchpatch.Diff, lineType Di buf.WriteString("") } } - return DiffInlineWithUnicodeEscape(template.HTML(buf.String())) + return buf.String() } // GetLine gets a specific line by type (add or del) and file line number @@ -289,7 +289,7 @@ func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline { type HighlightCodeDiff struct { placeholderBegin rune placeholderMaxCount int - placeholderCounter int + placeholderIndex int placeholderTagMap map[rune]string tagPlaceholderMap map[string]rune @@ -305,12 +305,49 @@ func NewHighlightCodeDiff() *HighlightCodeDiff { } } +// nextPlaceholder returns 0 if no more placeholder can be used func (hcd *HighlightCodeDiff) nextPlaceholder() rune { - // TODO: handle placeholder rune conflicts ( case 1: counter reaches placeholderMaxCount, case 2: there are placeholder runes in code ) - r := hcd.placeholderBegin + rune(hcd.placeholderCounter%hcd.placeholderMaxCount) - hcd.placeholderCounter++ - hcd.placeholderCounter %= hcd.placeholderMaxCount - return r + for hcd.placeholderIndex < hcd.placeholderMaxCount { + r := hcd.placeholderBegin + rune(hcd.placeholderIndex) + hcd.placeholderIndex++ + // only use non-existing (not used by code) rune as placeholders + if _, ok := hcd.placeholderTagMap[r]; !ok { + return r + } + } + return 0 // no more available placeholder +} + +func (hcd *HighlightCodeDiff) isInPlaceholderRange(r rune) bool { + return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount) +} + +func (hcd *HighlightCodeDiff) collectUsedRunes(code string) { + for _, r := range code { + if hcd.isInPlaceholderRange(r) { + // put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore. + hcd.placeholderTagMap[r] = "" + } + } +} + +func (hcd *HighlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff { + hcd.collectUsedRunes(codeA) + hcd.collectUsedRunes(codeB) + + highlightCodeA := highlight.Code(filename, language, codeA) + highlightCodeB := highlight.Code(filename, language, codeB) + + highlightCodeA = hcd.convertToPlaceholders(highlightCodeA) + highlightCodeB = hcd.convertToPlaceholders(highlightCodeB) + + diffs := diffMatchPatch.DiffMain(highlightCodeA, highlightCodeB, true) + diffs = diffMatchPatch.DiffCleanupEfficiency(diffs) + + for i := range diffs { + hcd.recoverOneDiff(&diffs[i]) + } + return diffs } func (hcd *HighlightCodeDiff) convertToPlaceholders(highlightCode string) string { @@ -362,12 +399,19 @@ func (hcd *HighlightCodeDiff) convertToPlaceholders(highlightCode string) string placeholder, ok := hcd.tagPlaceholderMap[tagInMap] if !ok { placeholder = hcd.nextPlaceholder() - hcd.tagPlaceholderMap[tagInMap] = placeholder - hcd.placeholderTagMap[placeholder] = tagInMap + if placeholder != 0 { + hcd.tagPlaceholderMap[tagInMap] = placeholder + hcd.placeholderTagMap[placeholder] = tagInMap + } } - res.WriteRune(placeholder) // use the placeholder to replace the tag + if placeholder != 0 { + res.WriteRune(placeholder) // use the placeholder to replace the tag + } else { + res.WriteString(tag) // unfortunately, all private use runes has been exhausted, no more placeholder could be used, so do not covert the tag + } } + res.WriteString(s) return res.String() } @@ -378,7 +422,7 @@ func (hcd *HighlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { for _, r := range diff.Text { tag, ok := hcd.placeholderTagMap[r] - if !ok { + if !ok || tag == "" { sb.WriteRune(r) continue } @@ -413,12 +457,6 @@ func (hcd *HighlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { diff.Text = sb.String() } -func (hcd *HighlightCodeDiff) recoverFromPlaceholders(diffs []diffmatchpatch.Diff) { - for i := range diffs { - hcd.recoverOneDiff(&diffs[i]) - } -} - // GetComputedInlineDiffFor computes inline diff for the given line. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) DiffInline { if setting.Git.DisableDiffHighlight { @@ -461,19 +499,10 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content) } - highlightCodeA := highlight.Code(diffSection.FileName, language, diff1[1:]) - highlightCodeB := highlight.Code(diffSection.FileName, language, diff2[1:]) - hcd := NewHighlightCodeDiff() - highlightCodeA = hcd.convertToPlaceholders(highlightCodeA) - highlightCodeB = hcd.convertToPlaceholders(highlightCodeB) - - diffRecord := diffMatchPatch.DiffMain(highlightCodeA, highlightCodeB, true) - diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) - - hcd.recoverFromPlaceholders(diffRecord) - - return diffToHTML(hcd, diffRecord, diffLine.Type) + diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:]) + diffHTML := diffToHTML(hcd, diffRecord, diffLine.Type) + return DiffInlineWithUnicodeEscape(template.HTML(diffHTML)) } // DiffFile represents a file diff. diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index 8ec9c0f3fe577..8983079c9f6d3 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -7,7 +7,6 @@ package gitdiff import ( "fmt" - "html/template" "strconv" "strings" "testing" @@ -17,36 +16,27 @@ import ( "code.gitea.io/gitea/models/unittest" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/setting" dmp "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" - "gopkg.in/ini.v1" ) -func assertEqual(t *testing.T, expected string, actual template.HTML) { - if expected != string(actual) { - t.Errorf("Did not receive expected results:\nExpected: %s\nActual: %s", expected, actual) - } -} - func TestDiffToHTML(t *testing.T) { - setting.Cfg = ini.Empty() - assertEqual(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{ + assert.Equal(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{ {Type: dmp.DiffEqual, Text: "foo "}, {Type: dmp.DiffInsert, Text: "bar"}, {Type: dmp.DiffDelete, Text: " baz"}, {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineAdd).Content) + }, DiffLineAdd)) - assertEqual(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{ + assert.Equal(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{ {Type: dmp.DiffEqual, Text: "foo "}, {Type: dmp.DiffDelete, Text: "bar"}, {Type: dmp.DiffInsert, Text: " baz"}, {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineDel).Content) + }, DiffLineDel)) } func TestParsePatch_skipTo(t *testing.T) { @@ -654,26 +644,42 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) { } } -func TestDiffToHTML_14231(t *testing.T) { - setting.Cfg = ini.Empty() - +func TestDiffWithHighlight(t *testing.T) { hcd := NewHighlightCodeDiff() - - highlightCodeA := highlight.Code("main.v", "", " run()\n") - highlightCodeB := highlight.Code("main.v", "", " run(db)\n") - highlightCodeA = hcd.convertToPlaceholders(highlightCodeA) - highlightCodeB = hcd.convertToPlaceholders(highlightCodeB) - - diffRecord := diffMatchPatch.DiffMain(highlightCodeA, highlightCodeB, true) - diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) - - hcd.recoverFromPlaceholders(diffRecord) - + diffs := hcd.diffWithHighlight( + "main.v", "", + " run()\n", + " run(db)\n", + ) expected := ` run(db) ` - output := diffToHTML(hcd, diffRecord, DiffLineAdd) + output := diffToHTML(hcd, diffs, DiffLineAdd) + assert.Equal(t, expected, output) +} - assertEqual(t, expected, output.Content) +func TestDiffWithHighlightPlaceholder(t *testing.T) { + hcd := NewHighlightCodeDiff() + diffs := hcd.diffWithHighlight( + "main.js", "", + "a='\uE000'", + "a='\uF8FF'", + ) + assert.Equal(t, "", hcd.placeholderTagMap[0xE000]) + assert.Equal(t, "", hcd.placeholderTagMap[0xF8FF]) + + expected := fmt.Sprintf(`a='%s'`, "\uF8FF") + output := diffToHTML(hcd, diffs, DiffLineAdd) + assert.Equal(t, expected, output) + + hcd = NewHighlightCodeDiff() + diffs = hcd.diffWithHighlight( + "main.js", "", + "a='\uE000'", + "a='\uF8FF'", + ) + expected = fmt.Sprintf(`a='%s'`, "\uE000") + output = diffToHTML(hcd, diffs, DiffLineDel) + assert.Equal(t, expected, output) } func TestNoCrashes(t *testing.T) { From bb64dff5c4c70f49259c9c79c33b75003f541a13 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Wed, 15 Jun 2022 08:59:43 +0800 Subject: [PATCH 04/19] remove line wrapper tags when doing highlight code diff --- services/gitdiff/gitdiff.go | 18 ++++++++---------- services/gitdiff/gitdiff_test.go | 13 ++++++------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 861b459d2d6a3..42e84f1e2d979 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -190,12 +190,10 @@ var ( codeTagSuffix = []byte(``) ) -func diffToHTML(hcd *HighlightCodeDiff, diffs []diffmatchpatch.Diff, lineType DiffLineType) string { +func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string { buf := bytes.NewBuffer(nil) - if hcd != nil { - for _, tag := range hcd.lineWrapperTags { - buf.WriteString(tag) // restore the line wrapper tags and - } + for _, tag := range lineWrapperTags { + buf.WriteString(tag) // restore the line wrapper tags and } for _, diff := range diffs { switch { @@ -211,10 +209,8 @@ func diffToHTML(hcd *HighlightCodeDiff, diffs []diffmatchpatch.Diff, lineType Di buf.Write(codeTagSuffix) } } - if hcd != nil { - for range hcd.lineWrapperTags { - buf.WriteString("") - } + for range lineWrapperTags { + buf.WriteString("") } return buf.String() } @@ -501,7 +497,9 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif hcd := NewHighlightCodeDiff() diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:]) - diffHTML := diffToHTML(hcd, diffRecord, diffLine.Type) + // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back + // if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)" + diffHTML := diffToHTML(nil, diffRecord, diffLine.Type) return DiffInlineWithUnicodeEscape(template.HTML(diffHTML)) } diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index 8983079c9f6d3..ee19385b7bcf3 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -651,9 +651,8 @@ func TestDiffWithHighlight(t *testing.T) { " run()\n", " run(db)\n", ) - expected := ` run(db) -` - output := diffToHTML(hcd, diffs, DiffLineAdd) + expected := ` run(db)` + "\n" + output := diffToHTML(nil, diffs, DiffLineAdd) assert.Equal(t, expected, output) } @@ -667,8 +666,8 @@ func TestDiffWithHighlightPlaceholder(t *testing.T) { assert.Equal(t, "", hcd.placeholderTagMap[0xE000]) assert.Equal(t, "", hcd.placeholderTagMap[0xF8FF]) - expected := fmt.Sprintf(`a='%s'`, "\uF8FF") - output := diffToHTML(hcd, diffs, DiffLineAdd) + expected := fmt.Sprintf(`a='%s'`, "\uE000") + output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel) assert.Equal(t, expected, output) hcd = NewHighlightCodeDiff() @@ -677,8 +676,8 @@ func TestDiffWithHighlightPlaceholder(t *testing.T) { "a='\uE000'", "a='\uF8FF'", ) - expected = fmt.Sprintf(`a='%s'`, "\uE000") - output = diffToHTML(hcd, diffs, DiffLineDel) + expected = fmt.Sprintf(`a='%s'`, "\uF8FF") + output = diffToHTML(nil, diffs, DiffLineAdd) assert.Equal(t, expected, output) } From adaf320aef3ccee47f6b752bbf346847eaeaa863 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 17 Jun 2022 11:25:37 +0800 Subject: [PATCH 05/19] fine tune --- services/gitdiff/gitdiff.go | 42 ++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index d4eac88509163..0dcb3a37218e8 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -192,8 +192,9 @@ var ( func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string { buf := bytes.NewBuffer(nil) + // restore the line wrapper tags and , if necessary for _, tag := range lineWrapperTags { - buf.WriteString(tag) // restore the line wrapper tags and + buf.WriteString(tag) } for _, diff := range diffs { switch { @@ -282,6 +283,11 @@ func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline { return DiffInline{EscapeStatus: status, Content: template.HTML(content)} } +// HighlightCodeDiff is used to do diff with highlighted HTML code. +// The HTML tags will be replaced by Unicode placeholders: "{TEXT}" => "\uE000{TEXT}\uE001" +// These Unicode placeholders are friendly to the diff. +// Then after diff, the placeholders in diff result will be recovered to the HTML tags. +// It's guaranteed that the tags in final diff result are paired correctly. type HighlightCodeDiff struct { placeholderBegin rune placeholderMaxCount int @@ -346,26 +352,25 @@ func (hcd *HighlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB return diffs } -func (hcd *HighlightCodeDiff) convertToPlaceholders(highlightCode string) string { +func (hcd *HighlightCodeDiff) convertToPlaceholders(htmlCode string) string { var tagStack []string res := strings.Builder{} - s := highlightCode firstRunForLineTags := hcd.lineWrapperTags == nil // the standard chroma highlight HTML is " ... " for { // find the next HTML tag - pos1 := strings.IndexByte(s, '<') - pos2 := strings.IndexByte(s, '>') + pos1 := strings.IndexByte(htmlCode, '<') + pos2 := strings.IndexByte(htmlCode, '>') if pos1 == -1 || pos2 == -1 || pos2 < pos1 { break } - tag := s[pos1 : pos2+1] + tag := htmlCode[pos1 : pos2+1] // write the content before the tag into result string, and consume the tag in the string - res.WriteString(s[:pos1]) - s = s[pos2+1:] + res.WriteString(htmlCode[:pos1]) + htmlCode = htmlCode[pos2+1:] // the line wrapper tags should be removed before diff if strings.HasPrefix(tag, `") + htmlCode = strings.TrimSuffix(htmlCode, "") continue } @@ -404,11 +409,11 @@ func (hcd *HighlightCodeDiff) convertToPlaceholders(highlightCode string) string if placeholder != 0 { res.WriteRune(placeholder) // use the placeholder to replace the tag } else { - res.WriteString(tag) // unfortunately, all private use runes has been exhausted, no more placeholder could be used, so do not covert the tag + res.WriteString(tag) // unfortunately, all private use runes has been exhausted, no more placeholder could be used, so do not convert the tag } } - - res.WriteString(s) + // write the remaining string + res.WriteString(htmlCode) return res.String() } @@ -419,14 +424,15 @@ func (hcd *HighlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { for _, r := range diff.Text { tag, ok := hcd.placeholderTagMap[r] if !ok || tag == "" { - sb.WriteRune(r) + sb.WriteRune(r) // if the run is not a placeholder, write it as it is continue } var tagToRecover string if tag[1] == '/' { + // only get the tag itself, ignore the trailing comment (for how the comment is generated, see the code in `convert` function) tagToRecover = tag[:strings.IndexByte(tag, '>')+1] if len(tagStack) == 0 { - continue // if no open tag yet, skip the closed tag + continue // if no open tag in stack yet, skip the closed tag } tagStack = tagStack[:len(tagStack)-1] } else { @@ -440,13 +446,11 @@ func (hcd *HighlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { // close all open tags for i := len(tagStack) - 1; i >= 0; i-- { tagToClose := tagStack[i] - pos := strings.IndexByte(tagToClose, ' ') - if pos == -1 { - pos = strings.IndexByte(tagToClose, '>') - } + // get the closed tag "" from "" or "" + pos := strings.IndexAny(tagToClose, " >") if pos != -1 { sb.WriteString("") - } + } // else: impossible. every tag was pushed into the stack by the code above and is valid HTML open tag } } From 4aa94e849a7c0aaf044c76d98abe42e37fc2c73b Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 17 Jun 2022 17:21:35 +0800 Subject: [PATCH 06/19] Update services/gitdiff/gitdiff.go Co-authored-by: delvh --- services/gitdiff/gitdiff.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 0dcb3a37218e8..d0b9005e34551 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -428,7 +428,7 @@ func (hcd *HighlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { continue } var tagToRecover string - if tag[1] == '/' { + if tag[1] == '/' { // Closing tag // only get the tag itself, ignore the trailing comment (for how the comment is generated, see the code in `convert` function) tagToRecover = tag[:strings.IndexByte(tag, '>')+1] if len(tagStack) == 0 { From 77445c2e918a708258dc01c34038c5cc692ddc7b Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sat, 18 Jun 2022 19:02:09 +0800 Subject: [PATCH 07/19] fine tune comments --- services/gitdiff/gitdiff.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index d0b9005e34551..4f7dca4f3e444 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -308,6 +308,8 @@ func NewHighlightCodeDiff() *HighlightCodeDiff { } // nextPlaceholder returns 0 if no more placeholder can be used +// the diff is done line by line, usually there are only a few (no more than 10) placeholders in one line +// so the placeholderMaxCount is impossible to be exhausted in real cases. func (hcd *HighlightCodeDiff) nextPlaceholder() rune { for hcd.placeholderIndex < hcd.placeholderMaxCount { r := hcd.placeholderBegin + rune(hcd.placeholderIndex) @@ -424,7 +426,7 @@ func (hcd *HighlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { for _, r := range diff.Text { tag, ok := hcd.placeholderTagMap[r] if !ok || tag == "" { - sb.WriteRune(r) // if the run is not a placeholder, write it as it is + sb.WriteRune(r) // if the rune is not a placeholder, write it as it is continue } var tagToRecover string From df5fef8a4c1e76c3a4e46c80f027222ce7fe345e Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 8 Jul 2022 06:49:30 +0800 Subject: [PATCH 08/19] Update gitdiff.go --- services/gitdiff/gitdiff.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index eb0364838df05..6f01711218f29 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -410,9 +410,9 @@ func (hcd *HighlightCodeDiff) convertToPlaceholders(htmlCode string) string { if placeholder != 0 { res.WriteRune(placeholder) // use the placeholder to replace the tag - } else { - res.WriteString(tag) // unfortunately, all private use runes has been exhausted, no more placeholder could be used, so do not convert the tag - } + } + // else: unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting + // usually, the exhausting won't occur in real cases, the used placeholders are not more than the CSS classes outputted by chroma. } // write the remaining string res.WriteString(htmlCode) From 843579ae92efa6cec0e75e58482e3902d1711936 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 8 Jul 2022 06:52:25 +0800 Subject: [PATCH 09/19] Update gitdiff.go --- services/gitdiff/gitdiff.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 6f01711218f29..762bac85f36ee 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -412,7 +412,7 @@ func (hcd *HighlightCodeDiff) convertToPlaceholders(htmlCode string) string { res.WriteRune(placeholder) // use the placeholder to replace the tag } // else: unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting - // usually, the exhausting won't occur in real cases, the used placeholders are not more than the CSS classes outputted by chroma. + // usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma. } // write the remaining string res.WriteString(htmlCode) From 2e798d88ebacef522821e2eec18f6c7fb4dee9a7 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 8 Jul 2022 06:53:37 +0800 Subject: [PATCH 10/19] Update gitdiff.go --- services/gitdiff/gitdiff.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 762bac85f36ee..a934805c20972 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -410,7 +410,7 @@ func (hcd *HighlightCodeDiff) convertToPlaceholders(htmlCode string) string { if placeholder != 0 { res.WriteRune(placeholder) // use the placeholder to replace the tag - } + } // else: unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting // usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma. } From ece0e3f5100de610920c5813595a28e4f10368f7 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sat, 9 Jul 2022 09:21:14 +0800 Subject: [PATCH 11/19] more tests --- services/gitdiff/gitdiff_test.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index ee19385b7bcf3..ba055cdbd5150 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -648,11 +648,16 @@ func TestDiffWithHighlight(t *testing.T) { hcd := NewHighlightCodeDiff() diffs := hcd.diffWithHighlight( "main.v", "", - " run()\n", + " run('<>')\n", " run(db)\n", ) - expected := ` run(db)` + "\n" - output := diffToHTML(nil, diffs, DiffLineAdd) + + expected := ` run('<>')` + "\n" + output := diffToHTML(nil, diffs, DiffLineDel) + assert.Equal(t, expected, output) + + expected = ` run(db)` + "\n" + output = diffToHTML(nil, diffs, DiffLineAdd) assert.Equal(t, expected, output) } From c9a15668a9bc28fa00c24acdb1d60103e1cb75bc Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sat, 9 Jul 2022 09:24:29 +0800 Subject: [PATCH 12/19] private function & private struct --- services/gitdiff/gitdiff.go | 22 +++++++++++----------- services/gitdiff/gitdiff_test.go | 6 +++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index a934805c20972..9cf281e744fae 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -283,12 +283,12 @@ func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline { return DiffInline{EscapeStatus: status, Content: template.HTML(content)} } -// HighlightCodeDiff is used to do diff with highlighted HTML code. +// highlightCodeDiff is used to do diff with highlighted HTML code. // The HTML tags will be replaced by Unicode placeholders: "{TEXT}" => "\uE000{TEXT}\uE001" // These Unicode placeholders are friendly to the diff. // Then after diff, the placeholders in diff result will be recovered to the HTML tags. // It's guaranteed that the tags in final diff result are paired correctly. -type HighlightCodeDiff struct { +type highlightCodeDiff struct { placeholderBegin rune placeholderMaxCount int placeholderIndex int @@ -298,8 +298,8 @@ type HighlightCodeDiff struct { lineWrapperTags []string } -func NewHighlightCodeDiff() *HighlightCodeDiff { - return &HighlightCodeDiff{ +func newHighlightCodeDiff() *highlightCodeDiff { + return &highlightCodeDiff{ placeholderBegin: rune(0xE000), // Private Use Unicode: U+E000..U+F8FF, BMP(0), 6400 placeholderMaxCount: 6400, placeholderTagMap: map[rune]string{}, @@ -310,7 +310,7 @@ func NewHighlightCodeDiff() *HighlightCodeDiff { // nextPlaceholder returns 0 if no more placeholder can be used // the diff is done line by line, usually there are only a few (no more than 10) placeholders in one line // so the placeholderMaxCount is impossible to be exhausted in real cases. -func (hcd *HighlightCodeDiff) nextPlaceholder() rune { +func (hcd *highlightCodeDiff) nextPlaceholder() rune { for hcd.placeholderIndex < hcd.placeholderMaxCount { r := hcd.placeholderBegin + rune(hcd.placeholderIndex) hcd.placeholderIndex++ @@ -322,11 +322,11 @@ func (hcd *HighlightCodeDiff) nextPlaceholder() rune { return 0 // no more available placeholder } -func (hcd *HighlightCodeDiff) isInPlaceholderRange(r rune) bool { +func (hcd *highlightCodeDiff) isInPlaceholderRange(r rune) bool { return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount) } -func (hcd *HighlightCodeDiff) collectUsedRunes(code string) { +func (hcd *highlightCodeDiff) collectUsedRunes(code string) { for _, r := range code { if hcd.isInPlaceholderRange(r) { // put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore. @@ -335,7 +335,7 @@ func (hcd *HighlightCodeDiff) collectUsedRunes(code string) { } } -func (hcd *HighlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff { +func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff { hcd.collectUsedRunes(codeA) hcd.collectUsedRunes(codeB) @@ -354,7 +354,7 @@ func (hcd *HighlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB return diffs } -func (hcd *HighlightCodeDiff) convertToPlaceholders(htmlCode string) string { +func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { var tagStack []string res := strings.Builder{} @@ -419,7 +419,7 @@ func (hcd *HighlightCodeDiff) convertToPlaceholders(htmlCode string) string { return res.String() } -func (hcd *HighlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { +func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { sb := strings.Builder{} var tagStack []string @@ -501,7 +501,7 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content) } - hcd := NewHighlightCodeDiff() + hcd := newHighlightCodeDiff() diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:]) // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back // if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)" diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index ba055cdbd5150..9346f422a1b2e 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -645,7 +645,7 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) { } func TestDiffWithHighlight(t *testing.T) { - hcd := NewHighlightCodeDiff() + hcd := newHighlightCodeDiff() diffs := hcd.diffWithHighlight( "main.v", "", " run('<>')\n", @@ -662,7 +662,7 @@ func TestDiffWithHighlight(t *testing.T) { } func TestDiffWithHighlightPlaceholder(t *testing.T) { - hcd := NewHighlightCodeDiff() + hcd := newHighlightCodeDiff() diffs := hcd.diffWithHighlight( "main.js", "", "a='\uE000'", @@ -675,7 +675,7 @@ func TestDiffWithHighlightPlaceholder(t *testing.T) { output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel) assert.Equal(t, expected, output) - hcd = NewHighlightCodeDiff() + hcd = newHighlightCodeDiff() diffs = hcd.diffWithHighlight( "main.js", "", "a='\uE000'", From f21f422baf56f32a9b6656eacd056ebdc7ad4c62 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sat, 9 Jul 2022 20:39:20 +0800 Subject: [PATCH 13/19] demo for why the tags are all closed --- services/gitdiff/gitdiff.go | 8 ++++++-- services/gitdiff/gitdiff_test.go | 23 +++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 9cf281e744fae..8b1085b855135 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -295,6 +295,8 @@ type highlightCodeDiff struct { placeholderTagMap map[rune]string tagPlaceholderMap map[string]rune + placeholderOverflowCount int + lineWrapperTags []string } @@ -410,9 +412,11 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { if placeholder != 0 { res.WriteRune(placeholder) // use the placeholder to replace the tag + } else { + // unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting + // usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma. + hcd.placeholderOverflowCount++ } - // else: unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting - // usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma. } // write the remaining string res.WriteString(htmlCode) diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index 9346f422a1b2e..0fad46ba2f097 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -684,6 +684,29 @@ func TestDiffWithHighlightPlaceholder(t *testing.T) { expected = fmt.Sprintf(`a='%s'`, "\uF8FF") output = diffToHTML(nil, diffs, DiffLineAdd) assert.Equal(t, expected, output) + + totalOverflow := 0 + for i := 0; i < 100; i++ { + hcd = newHighlightCodeDiff() + hcd.placeholderMaxCount = i + diffs = hcd.diffWithHighlight( + "main.js", "", + "a='1'", + "b='2'", + ) + totalOverflow += hcd.placeholderOverflowCount + + output = diffToHTML(nil, diffs, DiffLineDel) + c1 := strings.Count(output, " Date: Sat, 9 Jul 2022 20:47:50 +0800 Subject: [PATCH 14/19] add comment --- services/gitdiff/gitdiff.go | 1 + 1 file changed, 1 insertion(+) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 8b1085b855135..e1de9574703f6 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -284,6 +284,7 @@ func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline { } // highlightCodeDiff is used to do diff with highlighted HTML code. +// It totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes. // The HTML tags will be replaced by Unicode placeholders: "{TEXT}" => "\uE000{TEXT}\uE001" // These Unicode placeholders are friendly to the diff. // Then after diff, the placeholders in diff result will be recovered to the HTML tags. From 90c644096b89f78cd1c5e5f1305adda9e76eb7f4 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sat, 9 Jul 2022 21:29:22 +0800 Subject: [PATCH 15/19] placeholder for html entity --- services/gitdiff/gitdiff.go | 181 -------------------- services/gitdiff/gitdiff_test.go | 65 -------- services/gitdiff/highlightdiff.go | 220 +++++++++++++++++++++++++ services/gitdiff/highlightdiff_test.go | 93 +++++++++++ 4 files changed, 313 insertions(+), 246 deletions(-) create mode 100644 services/gitdiff/highlightdiff.go create mode 100644 services/gitdiff/highlightdiff_test.go diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index e1de9574703f6..6dd237bbc8d47 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -283,187 +283,6 @@ func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline { return DiffInline{EscapeStatus: status, Content: template.HTML(content)} } -// highlightCodeDiff is used to do diff with highlighted HTML code. -// It totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes. -// The HTML tags will be replaced by Unicode placeholders: "{TEXT}" => "\uE000{TEXT}\uE001" -// These Unicode placeholders are friendly to the diff. -// Then after diff, the placeholders in diff result will be recovered to the HTML tags. -// It's guaranteed that the tags in final diff result are paired correctly. -type highlightCodeDiff struct { - placeholderBegin rune - placeholderMaxCount int - placeholderIndex int - placeholderTagMap map[rune]string - tagPlaceholderMap map[string]rune - - placeholderOverflowCount int - - lineWrapperTags []string -} - -func newHighlightCodeDiff() *highlightCodeDiff { - return &highlightCodeDiff{ - placeholderBegin: rune(0xE000), // Private Use Unicode: U+E000..U+F8FF, BMP(0), 6400 - placeholderMaxCount: 6400, - placeholderTagMap: map[rune]string{}, - tagPlaceholderMap: map[string]rune{}, - } -} - -// nextPlaceholder returns 0 if no more placeholder can be used -// the diff is done line by line, usually there are only a few (no more than 10) placeholders in one line -// so the placeholderMaxCount is impossible to be exhausted in real cases. -func (hcd *highlightCodeDiff) nextPlaceholder() rune { - for hcd.placeholderIndex < hcd.placeholderMaxCount { - r := hcd.placeholderBegin + rune(hcd.placeholderIndex) - hcd.placeholderIndex++ - // only use non-existing (not used by code) rune as placeholders - if _, ok := hcd.placeholderTagMap[r]; !ok { - return r - } - } - return 0 // no more available placeholder -} - -func (hcd *highlightCodeDiff) isInPlaceholderRange(r rune) bool { - return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount) -} - -func (hcd *highlightCodeDiff) collectUsedRunes(code string) { - for _, r := range code { - if hcd.isInPlaceholderRange(r) { - // put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore. - hcd.placeholderTagMap[r] = "" - } - } -} - -func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff { - hcd.collectUsedRunes(codeA) - hcd.collectUsedRunes(codeB) - - highlightCodeA := highlight.Code(filename, language, codeA) - highlightCodeB := highlight.Code(filename, language, codeB) - - highlightCodeA = hcd.convertToPlaceholders(highlightCodeA) - highlightCodeB = hcd.convertToPlaceholders(highlightCodeB) - - diffs := diffMatchPatch.DiffMain(highlightCodeA, highlightCodeB, true) - diffs = diffMatchPatch.DiffCleanupEfficiency(diffs) - - for i := range diffs { - hcd.recoverOneDiff(&diffs[i]) - } - return diffs -} - -func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { - var tagStack []string - res := strings.Builder{} - - firstRunForLineTags := hcd.lineWrapperTags == nil - - // the standard chroma highlight HTML is " ... " - for { - // find the next HTML tag - pos1 := strings.IndexByte(htmlCode, '<') - pos2 := strings.IndexByte(htmlCode, '>') - if pos1 == -1 || pos2 == -1 || pos2 < pos1 { - break - } - tag := htmlCode[pos1 : pos2+1] - - // write the content before the tag into result string, and consume the tag in the string - res.WriteString(htmlCode[:pos1]) - htmlCode = htmlCode[pos2+1:] - - // the line wrapper tags should be removed before diff - if strings.HasPrefix(tag, `") - continue - } - - var tagInMap string - if tag[1] == '/' { // for closed tag - if len(tagStack) == 0 { - break // invalid diff result, no open tag but see close tag - } - // make sure the closed tag in map is related to the open tag, to make the diff algorithm can match the open/closed tags - // the closed tag will be recorded in the map by key "" for "" - tagInMap = tag + "" - tagStack = tagStack[:len(tagStack)-1] - } else { // for open tag - tagInMap = tag - tagStack = append(tagStack, tag) - } - - // remember the placeholder and tag in the map - placeholder, ok := hcd.tagPlaceholderMap[tagInMap] - if !ok { - placeholder = hcd.nextPlaceholder() - if placeholder != 0 { - hcd.tagPlaceholderMap[tagInMap] = placeholder - hcd.placeholderTagMap[placeholder] = tagInMap - } - } - - if placeholder != 0 { - res.WriteRune(placeholder) // use the placeholder to replace the tag - } else { - // unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting - // usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma. - hcd.placeholderOverflowCount++ - } - } - // write the remaining string - res.WriteString(htmlCode) - return res.String() -} - -func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { - sb := strings.Builder{} - var tagStack []string - - for _, r := range diff.Text { - tag, ok := hcd.placeholderTagMap[r] - if !ok || tag == "" { - sb.WriteRune(r) // if the rune is not a placeholder, write it as it is - continue - } - var tagToRecover string - if tag[1] == '/' { // Closing tag - // only get the tag itself, ignore the trailing comment (for how the comment is generated, see the code in `convert` function) - tagToRecover = tag[:strings.IndexByte(tag, '>')+1] - if len(tagStack) == 0 { - continue // if no open tag in stack yet, skip the closed tag - } - tagStack = tagStack[:len(tagStack)-1] - } else { - tagToRecover = tag - tagStack = append(tagStack, tag) - } - sb.WriteString(tagToRecover) - } - - if len(tagStack) > 0 { - // close all open tags - for i := len(tagStack) - 1; i >= 0; i-- { - tagToClose := tagStack[i] - // get the closed tag "" from "" or "" - pos := strings.IndexAny(tagToClose, " >") - if pos != -1 { - sb.WriteString("") - } // else: impossible. every tag was pushed into the stack by the code above and is valid HTML open tag - } - } - - diff.Text = sb.String() -} - // GetComputedInlineDiffFor computes inline diff for the given line. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) DiffInline { if setting.Git.DisableDiffHighlight { diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index 0fad46ba2f097..e88d831759b7b 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -644,71 +644,6 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) { } } -func TestDiffWithHighlight(t *testing.T) { - hcd := newHighlightCodeDiff() - diffs := hcd.diffWithHighlight( - "main.v", "", - " run('<>')\n", - " run(db)\n", - ) - - expected := ` run('<>')` + "\n" - output := diffToHTML(nil, diffs, DiffLineDel) - assert.Equal(t, expected, output) - - expected = ` run(db)` + "\n" - output = diffToHTML(nil, diffs, DiffLineAdd) - assert.Equal(t, expected, output) -} - -func TestDiffWithHighlightPlaceholder(t *testing.T) { - hcd := newHighlightCodeDiff() - diffs := hcd.diffWithHighlight( - "main.js", "", - "a='\uE000'", - "a='\uF8FF'", - ) - assert.Equal(t, "", hcd.placeholderTagMap[0xE000]) - assert.Equal(t, "", hcd.placeholderTagMap[0xF8FF]) - - expected := fmt.Sprintf(`a='%s'`, "\uE000") - output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel) - assert.Equal(t, expected, output) - - hcd = newHighlightCodeDiff() - diffs = hcd.diffWithHighlight( - "main.js", "", - "a='\uE000'", - "a='\uF8FF'", - ) - expected = fmt.Sprintf(`a='%s'`, "\uF8FF") - output = diffToHTML(nil, diffs, DiffLineAdd) - assert.Equal(t, expected, output) - - totalOverflow := 0 - for i := 0; i < 100; i++ { - hcd = newHighlightCodeDiff() - hcd.placeholderMaxCount = i - diffs = hcd.diffWithHighlight( - "main.js", "", - "a='1'", - "b='2'", - ) - totalOverflow += hcd.placeholderOverflowCount - - output = diffToHTML(nil, diffs, DiffLineDel) - c1 := strings.Count(output, "') + if pos2 == -1 { + return "", "", s, false + } + return s[:pos1], s[pos1 : pos1+pos2+1], s[pos1+pos2+1:], true + } else if s[pos1] == '&' { + pos2 := strings.IndexByte(s[pos1:], ';') + if pos2 == -1 { + return "", "", s, false + } + return s[:pos1], s[pos1 : pos1+pos2+1], s[pos1+pos2+1:], true + } + } + return "", "", s, true +} + +// highlightCodeDiff is used to do diff with highlighted HTML code. +// It totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes. +// The HTML tags will be replaced by Unicode placeholders: "{TEXT}" => "\uE000{TEXT}\uE001" +// These Unicode placeholders are friendly to the diff. +// Then after diff, the placeholders in diff result will be recovered to the HTML tags. +// It's guaranteed that the tags in final diff result are paired correctly. +type highlightCodeDiff struct { + placeholderBegin rune + placeholderMaxCount int + placeholderIndex int + placeholderTokenMap map[rune]string + tokenPlaceholderMap map[string]rune + + placeholderOverflowCount int + + lineWrapperTags []string +} + +func newHighlightCodeDiff() *highlightCodeDiff { + return &highlightCodeDiff{ + placeholderBegin: rune(0x100000), // Plane 16: Supplementary Private Use Area B (U+100000..U+10FFFD) + placeholderMaxCount: 64000, + placeholderTokenMap: map[rune]string{}, + tokenPlaceholderMap: map[string]rune{}, + } +} + +// nextPlaceholder returns 0 if no more placeholder can be used +// the diff is done line by line, usually there are only a few (no more than 10) placeholders in one line +// so the placeholderMaxCount is impossible to be exhausted in real cases. +func (hcd *highlightCodeDiff) nextPlaceholder() rune { + for hcd.placeholderIndex < hcd.placeholderMaxCount { + r := hcd.placeholderBegin + rune(hcd.placeholderIndex) + hcd.placeholderIndex++ + // only use non-existing (not used by code) rune as placeholders + if _, ok := hcd.placeholderTokenMap[r]; !ok { + return r + } + } + return 0 // no more available placeholder +} + +func (hcd *highlightCodeDiff) isInPlaceholderRange(r rune) bool { + return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount) +} + +func (hcd *highlightCodeDiff) collectUsedRunes(code string) { + for _, r := range code { + if hcd.isInPlaceholderRange(r) { + // put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore. + hcd.placeholderTokenMap[r] = "" + } + } +} + +func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff { + hcd.collectUsedRunes(codeA) + hcd.collectUsedRunes(codeB) + + highlightCodeA := highlight.Code(filename, language, codeA) + highlightCodeB := highlight.Code(filename, language, codeB) + + highlightCodeA = hcd.convertToPlaceholders(highlightCodeA) + highlightCodeB = hcd.convertToPlaceholders(highlightCodeB) + + diffs := diffMatchPatch.DiffMain(highlightCodeA, highlightCodeB, true) + diffs = diffMatchPatch.DiffCleanupEfficiency(diffs) + + for i := range diffs { + hcd.recoverOneDiff(&diffs[i]) + } + return diffs +} + +func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { + var tagStack []string + res := strings.Builder{} + + firstRunForLineTags := hcd.lineWrapperTags == nil + + var beforeToken, token string + var valid bool + + // the standard chroma highlight HTML is " ... " + for { + beforeToken, token, htmlCode, valid = extractHTMLToken(htmlCode) + if !valid || token == "" { + break + } + // write the content before the tag into result string, and consume the tag in the string + res.WriteString(beforeToken) + + // the line wrapper tags should be removed before diff + if strings.HasPrefix(token, `") + continue + } + + var tokenInMap string + if strings.HasSuffix(token, "" for "" + tokenInMap = token + "" + tagStack = tagStack[:len(tagStack)-1] + } else if strings.HasPrefix(token, "<") { // for open tag + tokenInMap = token + tagStack = append(tagStack, token) + } else if strings.HasPrefix(token, "&") { // for html entity + tokenInMap = token + } + + // remember the placeholder and tag in the map + placeholder, ok := hcd.tokenPlaceholderMap[tokenInMap] + if !ok { + placeholder = hcd.nextPlaceholder() + if placeholder != 0 { + hcd.tokenPlaceholderMap[tokenInMap] = placeholder + hcd.placeholderTokenMap[placeholder] = tokenInMap + } + } + + if placeholder != 0 { + res.WriteRune(placeholder) // use the placeholder to replace the tag + } else { + // unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting + // usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma. + hcd.placeholderOverflowCount++ + if strings.HasPrefix(token, "&") { + // when the token is a html entity, something must be outputted even if there is no placeholder. + res.WriteRune(0xFFFD) // replacement character TODO: how to handle this case more gracefully? + } + } + } + + // write the remaining string + res.WriteString(htmlCode) + return res.String() +} + +func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { + sb := strings.Builder{} + var tagStack []string + + for _, r := range diff.Text { + token, ok := hcd.placeholderTokenMap[r] + if !ok || token == "" { + sb.WriteRune(r) // if the rune is not a placeholder, write it as it is + continue + } + var tokenToRecover string + if token[1] == '/' { // Closing tag + // only get the tag itself, ignore the trailing comment (for how the comment is generated, see the code in `convert` function) + tokenToRecover = token[:strings.IndexByte(token, '>')+1] + if len(tagStack) == 0 { + continue // if no open tag in stack yet, skip the closed tag + } + tagStack = tagStack[:len(tagStack)-1] + } else if token[0] == '<' { + tokenToRecover = token + tagStack = append(tagStack, token) + } else { + tokenToRecover = token + } + sb.WriteString(tokenToRecover) + } + + if len(tagStack) > 0 { + // close all open tags + for i := len(tagStack) - 1; i >= 0; i-- { + tagToClose := tagStack[i] + // get the closed tag "" from "" or "" + pos := strings.IndexAny(tagToClose, " >") + if pos != -1 { + sb.WriteString("") + } // else: impossible. every tag was pushed into the stack by the code above and is valid HTML open tag + } + } + + diff.Text = sb.String() +} diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go new file mode 100644 index 0000000000000..c011cd52b4bf8 --- /dev/null +++ b/services/gitdiff/highlightdiff_test.go @@ -0,0 +1,93 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gitdiff + +import ( + "fmt" + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDiffWithHighlight(t *testing.T) { + hcd := newHighlightCodeDiff() + diffs := hcd.diffWithHighlight( + "main.v", "", + " run('<>')\n", + " run(db)\n", + ) + + expected := ` run('<>')` + "\n" + output := diffToHTML(nil, diffs, DiffLineDel) + assert.Equal(t, expected, output) + + expected = ` run(db)` + "\n" + output = diffToHTML(nil, diffs, DiffLineAdd) + assert.Equal(t, expected, output) +} + +func TestDiffWithHighlightPlaceholder(t *testing.T) { + hcd := newHighlightCodeDiff() + diffs := hcd.diffWithHighlight( + "main.js", "", + "a='\U00100000'", + "a='\U0010FFFD''", + ) + assert.Equal(t, "", hcd.placeholderTokenMap[0x00100000]) + assert.Equal(t, "", hcd.placeholderTokenMap[0x0010FFFD]) + + expected := fmt.Sprintf(`a='%s'`, "\U00100000") + output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel) + assert.Equal(t, expected, output) + + hcd = newHighlightCodeDiff() + diffs = hcd.diffWithHighlight( + "main.js", "", + "a='\U00100000'", + "a='\U0010FFFD'", + ) + expected = fmt.Sprintf(`a='%s'`, "\U0010FFFD") + output = diffToHTML(nil, diffs, DiffLineAdd) + assert.Equal(t, expected, output) +} + +func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) { + hcd := newHighlightCodeDiff() + hcd.placeholderMaxCount = 0 + diffs := hcd.diffWithHighlight( + "main.js", "", + "'", + ``, + ) + output := diffToHTML(nil, diffs, DiffLineDel) + expected := fmt.Sprintf(`%s`, "\uFFFD") + assert.Equal(t, expected, output) +} + +func TestDiffWithHighlightTagMatch(t *testing.T) { + totalOverflow := 0 + for i := 0; i < 100; i++ { + hcd := newHighlightCodeDiff() + hcd.placeholderMaxCount = i + diffs := hcd.diffWithHighlight( + "main.js", "", + "a='1'", + "b='2'", + ) + totalOverflow += hcd.placeholderOverflowCount + + output := diffToHTML(nil, diffs, DiffLineDel) + c1 := strings.Count(output, " Date: Sat, 9 Jul 2022 22:01:54 +0800 Subject: [PATCH 16/19] fine tune comments --- services/gitdiff/highlightdiff.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/services/gitdiff/highlightdiff.go b/services/gitdiff/highlightdiff.go index e387d85edf642..c532c439a5ca9 100644 --- a/services/gitdiff/highlightdiff.go +++ b/services/gitdiff/highlightdiff.go @@ -12,6 +12,7 @@ import ( "github.com/sergi/go-diff/diffmatchpatch" ) +// token is a html tag or entity, eg: "", "", "<" func extractHTMLToken(s string) (before, token, after string, valid bool) { for pos1 := 0; pos1 < len(s); pos1++ { if s[pos1] == '<' { @@ -33,9 +34,9 @@ func extractHTMLToken(s string) (before, token, after string, valid bool) { // highlightCodeDiff is used to do diff with highlighted HTML code. // It totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes. -// The HTML tags will be replaced by Unicode placeholders: "{TEXT}" => "\uE000{TEXT}\uE001" +// The HTML tags and entities will be replaced by Unicode placeholders: "{TEXT}" => "\uE000{TEXT}\uE001" // These Unicode placeholders are friendly to the diff. -// Then after diff, the placeholders in diff result will be recovered to the HTML tags. +// Then after diff, the placeholders in diff result will be recovered to the HTML tags and entities. // It's guaranteed that the tags in final diff result are paired correctly. type highlightCodeDiff struct { placeholderBegin rune @@ -105,6 +106,7 @@ func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB return diffs } +// convertToPlaceholders totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes. func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { var tagStack []string res := strings.Builder{} @@ -120,7 +122,7 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { if !valid || token == "" { break } - // write the content before the tag into result string, and consume the tag in the string + // write the content before the token into result string, and consume the token in the string res.WriteString(beforeToken) // the line wrapper tags should be removed before diff @@ -149,7 +151,7 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { tokenInMap = token } - // remember the placeholder and tag in the map + // remember the placeholder and token in the map placeholder, ok := hcd.tokenPlaceholderMap[tokenInMap] if !ok { placeholder = hcd.nextPlaceholder() @@ -160,7 +162,7 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { } if placeholder != 0 { - res.WriteRune(placeholder) // use the placeholder to replace the tag + res.WriteRune(placeholder) // use the placeholder to replace the token } else { // unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting // usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma. @@ -198,7 +200,7 @@ func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { } else if token[0] == '<' { tokenToRecover = token tagStack = append(tagStack, token) - } else { + } else { // html entity tokenToRecover = token } sb.WriteString(tokenToRecover) From 4afb1ccf002c51397791fd692d0f97b494e3a4fe Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sat, 9 Jul 2022 22:15:28 +0800 Subject: [PATCH 17/19] always output the code/name part of the html entities if the placeholders are exhausted. --- services/gitdiff/highlightdiff.go | 3 ++- services/gitdiff/highlightdiff_test.go | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/services/gitdiff/highlightdiff.go b/services/gitdiff/highlightdiff.go index c532c439a5ca9..83fb5bb6cbc6c 100644 --- a/services/gitdiff/highlightdiff.go +++ b/services/gitdiff/highlightdiff.go @@ -169,7 +169,8 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { hcd.placeholderOverflowCount++ if strings.HasPrefix(token, "&") { // when the token is a html entity, something must be outputted even if there is no placeholder. - res.WriteRune(0xFFFD) // replacement character TODO: how to handle this case more gracefully? + res.WriteRune(0xFFFD) // replacement character TODO: how to handle this case more gracefully? + res.WriteString(token[1:]) // still output the entity code part, otherwise there will be no diff result. } } } diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go index c011cd52b4bf8..65b4ecc9b66e0 100644 --- a/services/gitdiff/highlightdiff_test.go +++ b/services/gitdiff/highlightdiff_test.go @@ -63,7 +63,22 @@ func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) { ``, ) output := diffToHTML(nil, diffs, DiffLineDel) - expected := fmt.Sprintf(`%s`, "\uFFFD") + expected := fmt.Sprintf(`%s#39;`, "\uFFFD") + assert.Equal(t, expected, output) + + hcd = newHighlightCodeDiff() + hcd.placeholderMaxCount = 0 + diffs = hcd.diffWithHighlight( + "main.js", "", + "a < b", + "a > b", + ) + output = diffToHTML(nil, diffs, DiffLineDel) + expected = fmt.Sprintf(`a %slt; b`, "\uFFFD") + assert.Equal(t, expected, output) + + output = diffToHTML(nil, diffs, DiffLineAdd) + expected = fmt.Sprintf(`a %sgt; b`, "\uFFFD") assert.Equal(t, expected, output) } From 4d1d8a4a527396c00cc5cf9848573af17f21ee4d Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sat, 9 Jul 2022 22:43:28 +0800 Subject: [PATCH 18/19] fine tune comments --- services/gitdiff/highlightdiff.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/services/gitdiff/highlightdiff.go b/services/gitdiff/highlightdiff.go index 83fb5bb6cbc6c..4ceada4d7ec95 100644 --- a/services/gitdiff/highlightdiff.go +++ b/services/gitdiff/highlightdiff.go @@ -136,20 +136,20 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { } var tokenInMap string - if strings.HasSuffix(token, "" for "" + // make sure the closing tag in map is related to the open tag, to make the diff algorithm can match the opening/closing tags + // the closing tag will be recorded in the map by key "" for "" tokenInMap = token + "" tagStack = tagStack[:len(tagStack)-1] - } else if strings.HasPrefix(token, "<") { // for open tag + } else if token[0] == '<' { // for opening tag tokenInMap = token tagStack = append(tagStack, token) - } else if strings.HasPrefix(token, "&") { // for html entity + } else if token[0] == '&' { // for html entity tokenInMap = token - } + } // else: impossible // remember the placeholder and token in the map placeholder, ok := hcd.tokenPlaceholderMap[tokenInMap] @@ -191,31 +191,31 @@ func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { continue } var tokenToRecover string - if token[1] == '/' { // Closing tag + if strings.HasPrefix(token, "')+1] if len(tagStack) == 0 { - continue // if no open tag in stack yet, skip the closed tag + continue // if no opening tag in stack yet, skip the closing tag } tagStack = tagStack[:len(tagStack)-1] - } else if token[0] == '<' { + } else if token[0] == '<' { // for opening tag tokenToRecover = token tagStack = append(tagStack, token) - } else { // html entity + } else if token[0] == '&' { // for html entity tokenToRecover = token - } + } // else: impossible sb.WriteString(tokenToRecover) } if len(tagStack) > 0 { - // close all open tags + // close all opening tags for i := len(tagStack) - 1; i >= 0; i-- { tagToClose := tagStack[i] - // get the closed tag "" from "" or "" + // get the closing tag "" from "" or "" pos := strings.IndexAny(tagToClose, " >") if pos != -1 { sb.WriteString("") - } // else: impossible. every tag was pushed into the stack by the code above and is valid HTML open tag + } // else: impossible. every tag was pushed into the stack by the code above and is valid HTML opening tag } } From 9cf358b01a089fea78d476d1714265dda5f23204 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sun, 10 Jul 2022 00:36:19 +0800 Subject: [PATCH 19/19] demo why the tags are always balanced --- services/gitdiff/highlightdiff_test.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go index 65b4ecc9b66e0..1cd78bc94272d 100644 --- a/services/gitdiff/highlightdiff_test.go +++ b/services/gitdiff/highlightdiff_test.go @@ -9,6 +9,7 @@ import ( "strings" "testing" + "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" ) @@ -27,6 +28,23 @@ func TestDiffWithHighlight(t *testing.T) { expected = ` run(db)` + "\n" output = diffToHTML(nil, diffs, DiffLineAdd) assert.Equal(t, expected, output) + + hcd = newHighlightCodeDiff() + hcd.placeholderTokenMap['O'] = "" + hcd.placeholderTokenMap['C'] = "" + diff := diffmatchpatch.Diff{} + + diff.Text = "OC" + hcd.recoverOneDiff(&diff) + assert.Equal(t, "", diff.Text) + + diff.Text = "O" + hcd.recoverOneDiff(&diff) + assert.Equal(t, "", diff.Text) + + diff.Text = "C" + hcd.recoverOneDiff(&diff) + assert.Equal(t, "", diff.Text) } func TestDiffWithHighlightPlaceholder(t *testing.T) {