Skip to content

Commit a34826b

Browse files
lafriksCirnoT
andauthored
Change language statistics to save size instead of percentage (#11681) (#11690)
* Change language statistics to save size instead of percentage (#11681) * Change language statistics to save size instead of percentage in database Co-Authored-By: Cirno the Strongest <[email protected]> * Do not exclude if only language * Fix edge cases with special langauges Co-authored-by: Cirno the Strongest <[email protected]> * Fix language stat calculation (#11692) * Fix language stat calculation * Group languages and ignore 0 size files * remove unneeded code Co-authored-by: Cirno the Strongest <[email protected]>
1 parent 70739c3 commit a34826b

File tree

5 files changed

+139
-39
lines changed

5 files changed

+139
-39
lines changed

models/migrations/migrations.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ var migrations = []Migration{
212212
NewMigration("Add ResolveDoerID to Comment table", addResolveDoerIDCommentColumn),
213213
// v139 -> v140
214214
NewMigration("prepend refs/heads/ to issue refs", prependRefsHeadsToIssueRefs),
215+
// v140 -> v141
216+
NewMigration("Save detected language file size to database instead of percent", fixLanguageStatsToSaveSize),
215217
}
216218

217219
// GetCurrentDBVersion returns the current db version

models/migrations/v140.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright 2020 The Gitea Authors. All rights reserved.
2+
// Use of this source code is governed by a MIT-style
3+
// license that can be found in the LICENSE file.
4+
5+
package migrations
6+
7+
import (
8+
"fmt"
9+
10+
"code.gitea.io/gitea/modules/setting"
11+
12+
"xorm.io/xorm"
13+
)
14+
15+
func fixLanguageStatsToSaveSize(x *xorm.Engine) error {
16+
// LanguageStat see models/repo_language_stats.go
17+
type LanguageStat struct {
18+
Size int64 `xorm:"NOT NULL DEFAULT 0"`
19+
}
20+
21+
// RepoIndexerType specifies the repository indexer type
22+
type RepoIndexerType int
23+
24+
const (
25+
// RepoIndexerTypeCode code indexer
26+
RepoIndexerTypeCode RepoIndexerType = iota // 0
27+
// RepoIndexerTypeStats repository stats indexer
28+
RepoIndexerTypeStats // 1
29+
)
30+
31+
// RepoIndexerStatus see models/repo_indexer.go
32+
type RepoIndexerStatus struct {
33+
IndexerType RepoIndexerType `xorm:"INDEX(s) NOT NULL DEFAULT 0"`
34+
}
35+
36+
if err := x.Sync2(new(LanguageStat)); err != nil {
37+
return fmt.Errorf("Sync2: %v", err)
38+
}
39+
40+
x.Delete(&RepoIndexerStatus{IndexerType: RepoIndexerTypeStats})
41+
42+
// Delete language stat statuses
43+
truncExpr := "TRUNCATE TABLE"
44+
if setting.Database.UseSQLite3 {
45+
truncExpr = "DELETE FROM"
46+
}
47+
48+
// Delete language stats
49+
if _, err := x.Exec(fmt.Sprintf("%s language_stat", truncExpr)); err != nil {
50+
return err
51+
}
52+
53+
sess := x.NewSession()
54+
defer sess.Close()
55+
return dropTableColumns(sess, "language_stat", "percentage")
56+
}

models/repo_language_stats.go

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ type LanguageStat struct {
2020
CommitID string
2121
IsPrimary bool
2222
Language string `xorm:"VARCHAR(30) UNIQUE(s) INDEX NOT NULL"`
23-
Percentage float32 `xorm:"NUMERIC(5,2) NOT NULL DEFAULT 0"`
23+
Percentage float32 `xorm:"-"`
24+
Size int64 `xorm:"NOT NULL DEFAULT 0"`
2425
Color string `xorm:"-"`
2526
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
2627
}
@@ -34,12 +35,36 @@ func (stats LanguageStatList) loadAttributes() {
3435
}
3536
}
3637

38+
func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
39+
langPerc := make(map[string]float32)
40+
var otherPerc float32 = 100
41+
var total int64
42+
43+
for _, stat := range stats {
44+
total += stat.Size
45+
}
46+
if total > 0 {
47+
for _, stat := range stats {
48+
perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
49+
if perc <= 0.1 {
50+
continue
51+
}
52+
otherPerc -= perc
53+
langPerc[stat.Language] = perc
54+
}
55+
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
56+
}
57+
if otherPerc > 0 {
58+
langPerc["other"] = otherPerc
59+
}
60+
return langPerc
61+
}
62+
3763
func (repo *Repository) getLanguageStats(e Engine) (LanguageStatList, error) {
3864
stats := make(LanguageStatList, 0, 6)
39-
if err := e.Where("`repo_id` = ?", repo.ID).Desc("`percentage`").Find(&stats); err != nil {
65+
if err := e.Where("`repo_id` = ?", repo.ID).Desc("`size`").Find(&stats); err != nil {
4066
return nil, err
4167
}
42-
stats.loadAttributes()
4368
return stats, nil
4469
}
4570

@@ -54,13 +79,18 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error)
5479
if err != nil {
5580
return nil, err
5681
}
82+
perc := stats.getLanguagePercentages()
5783
topstats := make(LanguageStatList, 0, limit)
5884
var other float32
5985
for i := range stats {
86+
if _, ok := perc[stats[i].Language]; !ok {
87+
continue
88+
}
6089
if stats[i].Language == "other" || len(topstats) >= limit {
61-
other += stats[i].Percentage
90+
other += perc[stats[i].Language]
6291
continue
6392
}
93+
stats[i].Percentage = perc[stats[i].Language]
6494
topstats = append(topstats, stats[i])
6595
}
6696
if other > 0 {
@@ -71,11 +101,12 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error)
71101
Percentage: float32(math.Round(float64(other)*10) / 10),
72102
})
73103
}
104+
topstats.loadAttributes()
74105
return topstats, nil
75106
}
76107

77108
// UpdateLanguageStats updates the language statistics for repository
78-
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]float32) error {
109+
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]int64) error {
79110
sess := x.NewSession()
80111
if err := sess.Begin(); err != nil {
81112
return err
@@ -87,24 +118,24 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
87118
return err
88119
}
89120
var topLang string
90-
var p float32
91-
for lang, perc := range stats {
92-
if perc > p {
93-
p = perc
121+
var s int64
122+
for lang, size := range stats {
123+
if size > s {
124+
s = size
94125
topLang = strings.ToLower(lang)
95126
}
96127
}
97128

98-
for lang, perc := range stats {
129+
for lang, size := range stats {
99130
upd := false
100131
llang := strings.ToLower(lang)
101132
for _, s := range oldstats {
102133
// Update already existing language
103134
if strings.ToLower(s.Language) == llang {
104135
s.CommitID = commitID
105136
s.IsPrimary = llang == topLang
106-
s.Percentage = perc
107-
if _, err := sess.ID(s.ID).Cols("`commit_id`", "`percentage`", "`is_primary`").Update(s); err != nil {
137+
s.Size = size
138+
if _, err := sess.ID(s.ID).Cols("`commit_id`", "`size`", "`is_primary`").Update(s); err != nil {
108139
return err
109140
}
110141
upd = true
@@ -114,11 +145,11 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
114145
// Insert new language
115146
if !upd {
116147
if _, err := sess.Insert(&LanguageStat{
117-
RepoID: repo.ID,
118-
CommitID: commitID,
119-
IsPrimary: llang == topLang,
120-
Language: lang,
121-
Percentage: perc,
148+
RepoID: repo.ID,
149+
CommitID: commitID,
150+
IsPrimary: llang == topLang,
151+
Language: lang,
152+
Size: size,
122153
}); err != nil {
123154
return err
124155
}
@@ -153,7 +184,7 @@ func CopyLanguageStat(originalRepo, destRepo *Repository) error {
153184
return err
154185
}
155186
RepoLang := make(LanguageStatList, 0, 6)
156-
if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`percentage`").Find(&RepoLang); err != nil {
187+
if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`size`").Find(&RepoLang); err != nil {
157188
return err
158189
}
159190
if len(RepoLang) > 0 {

modules/git/repo_language_stats.go

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import (
88
"bytes"
99
"io"
1010
"io/ioutil"
11-
"math"
1211

1312
"code.gitea.io/gitea/modules/analyze"
1413

@@ -20,8 +19,22 @@ import (
2019

2120
const fileSizeLimit int64 = 16 * 1024 * 1024
2221

22+
// specialLanguages defines list of languages that are excluded from the calculation
23+
// unless they are the only language present in repository. Only languages which under
24+
// normal circumstances are not considered to be code should be listed here.
25+
var specialLanguages = []string{
26+
"XML",
27+
"JSON",
28+
"TOML",
29+
"YAML",
30+
"INI",
31+
"SVG",
32+
"Text",
33+
"Markdown",
34+
}
35+
2336
// GetLanguageStats calculates language stats for git repository at specified commit
24-
func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, error) {
37+
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
2538
r, err := git.PlainOpen(repo.Path)
2639
if err != nil {
2740
return nil, err
@@ -43,9 +56,8 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
4356
}
4457

4558
sizes := make(map[string]int64)
46-
var total int64
4759
err = tree.Files().ForEach(func(f *object.File) error {
48-
if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
60+
if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
4961
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
5062
return nil
5163
}
@@ -63,30 +75,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
6375
return nil
6476
}
6577

78+
// group languages, such as Pug -> HTML; SCSS -> CSS
79+
group := enry.GetLanguageGroup(language)
80+
if group != "" {
81+
language = group
82+
}
83+
6684
sizes[language] += f.Size
67-
total += f.Size
6885

6986
return nil
7087
})
7188
if err != nil {
7289
return nil, err
7390
}
7491

75-
stats := make(map[string]float32)
76-
var otherPerc float32 = 100
77-
for language, size := range sizes {
78-
perc := float32(math.Round(float64(size)/float64(total)*1000) / 10)
79-
if perc <= 0.1 {
80-
continue
92+
// filter special languages unless they are the only language
93+
if len(sizes) > 1 {
94+
for _, language := range specialLanguages {
95+
delete(sizes, language)
8196
}
82-
otherPerc -= perc
83-
stats[language] = perc
84-
}
85-
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
86-
if otherPerc > 0 {
87-
stats["other"] = otherPerc
8897
}
89-
return stats, nil
98+
99+
return sizes, nil
90100
}
91101

92102
func readFile(f *object.File, limit int64) ([]byte, error) {

modules/indexer/stats/indexer_test.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,10 @@ func TestRepoStatsIndex(t *testing.T) {
3434

3535
repo, err := models.GetRepositoryByID(1)
3636
assert.NoError(t, err)
37+
status, err := repo.GetIndexerStatus(models.RepoIndexerTypeStats)
38+
assert.NoError(t, err)
39+
assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
3740
langs, err := repo.GetTopLanguageStats(5)
3841
assert.NoError(t, err)
39-
assert.Len(t, langs, 1)
40-
assert.Equal(t, "other", langs[0].Language)
41-
assert.Equal(t, float32(100), langs[0].Percentage)
42+
assert.Empty(t, langs)
4243
}

0 commit comments

Comments
 (0)