Skip to content

function: use new caches from go-mysql-server #957

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 29 additions & 28 deletions internal/function/language.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
package function

import (
"encoding/binary"
"fmt"
"hash/crc32"
"os"
"strconv"
"sync"

lru "github.com/hashicorp/golang-lru"
enry "github.com/src-d/enry/v2"
"github.com/src-d/go-mysql-server/sql"
)
Expand All @@ -27,14 +26,21 @@ func languageCacheSize() int {
return size
}

var languageCache *lru.TwoQueueCache
var (
languageMut sync.Mutex
languageCache sql.KeyValueCache
)

func init() {
var err error
languageCache, err = lru.New2Q(languageCacheSize())
if err != nil {
panic(fmt.Errorf("cannot initialize language cache: %s", err))
func getLanguageCache(ctx *sql.Context) sql.KeyValueCache {
languageMut.Lock()
defer languageMut.Unlock()
if languageCache == nil {
// Dispose function is ignored because the cache will never be disposed
// until the program dies.
languageCache, _ = ctx.Memory.NewLRUCache(uint(languageCacheSize()))
}

return languageCache
}

// Language gets the language of a file given its path and
Expand Down Expand Up @@ -136,11 +142,13 @@ func (f *Language) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
blob = right.([]byte)
}

var hash [8]byte
languageCache := getLanguageCache(ctx)

var hash uint64
if len(blob) > 0 {
hash = languageHash(path, blob)
value, ok := languageCache.Get(hash)
if ok {
value, err := languageCache.Get(hash)
if err == nil {
return value, nil
}
}
Expand All @@ -151,38 +159,31 @@ func (f *Language) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
}

if len(blob) > 0 {
languageCache.Add(hash, lang)
if err := languageCache.Put(hash, lang); err != nil {
return nil, err
}
}

return lang, nil
}

func languageHash(filename string, blob []byte) [8]byte {
func languageHash(filename string, blob []byte) uint64 {
fh := filenameHash(filename)
bh := blobHash(blob)

var result [8]byte
copy(result[:], fh)
copy(result[4:], bh)
return result
return uint64(fh)<<32 | uint64(bh)
}

func blobHash(blob []byte) []byte {
func blobHash(blob []byte) uint32 {
if len(blob) == 0 {
return nil
return 0
}

n := crc32.ChecksumIEEE(blob)
hash := make([]byte, 4)
binary.LittleEndian.PutUint32(hash, n)
return hash
return crc32.ChecksumIEEE(blob)
}

func filenameHash(filename string) []byte {
n := crc32.ChecksumIEEE([]byte(filename))
hash := make([]byte, 4)
binary.LittleEndian.PutUint32(hash, n)
return hash
func filenameHash(filename string) uint32 {
return crc32.ChecksumIEEE([]byte(filename))
}

// Children implements the Expression interface.
Expand Down
21 changes: 14 additions & 7 deletions internal/function/loc.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ var languages = gocloc.NewDefinedLanguages()

var errEmptyInputValues = errors.New("empty input values")

// LOC is a function that returns the count of different types of lines of code.
type LOC struct {
Left sql.Expression
Right sql.Expression
Expand Down Expand Up @@ -74,7 +75,11 @@ func (f *LOC) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
return nil, err
}

lang := f.getLanguage(path, blob)
lang, err := f.getLanguage(path, blob)
if err != nil {
return nil, err
}

if lang == "" || languages.Langs[lang] == nil {
return nil, nil
}
Expand Down Expand Up @@ -137,20 +142,22 @@ func (f *LOC) getInputValues(ctx *sql.Context, row sql.Row) (string, []byte, err
return path, blob, nil
}

func (f *LOC) getLanguage(path string, blob []byte) string {
func (f *LOC) getLanguage(path string, blob []byte) (string, error) {
hash := languageHash(path, blob)

value, ok := languageCache.Get(hash)
if ok {
return value.(string)
value, err := languageCache.Get(hash)
if err == nil {
return value.(string), nil
}

lang := enry.GetLanguage(path, blob)
if len(blob) > 0 {
languageCache.Add(hash, lang)
if err := languageCache.Put(hash, lang); err != nil {
return "", err
}
}

return lang
return lang, nil
}

// Children implements the Expression interface.
Expand Down
60 changes: 40 additions & 20 deletions internal/function/uast.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package function

import (
"crypto/sha1"
"encoding/json"
"fmt"
"hash"
Expand All @@ -16,7 +15,6 @@ import (
"github.com/bblfsh/sdk/v3/uast"
"github.com/bblfsh/sdk/v3/uast/nodes"
"github.com/go-kit/kit/metrics/discard"
lru "github.com/hashicorp/golang-lru"
"github.com/sirupsen/logrus"

"github.com/src-d/go-mysql-server/sql"
Expand Down Expand Up @@ -53,8 +51,24 @@ func observeQuery(lang, xpath string, t time.Time) func(bool) {
}
}

var uastCache *lru.Cache
var uastMaxBlobSize int
var (
uastmut sync.Mutex
uastCache sql.KeyValueCache
uastCacheSize int
uastMaxBlobSize int
)

func getUASTCache(ctx *sql.Context) sql.KeyValueCache {
uastmut.Lock()
defer uastmut.Unlock()
if uastCache == nil {
// Dispose function is ignored because the cache will never be disposed
// until the program dies.
uastCache, _ = ctx.Memory.NewLRUCache(uint(uastCacheSize))
}

return uastCache
}

func init() {
s := os.Getenv(uastCacheSizeKey)
Expand All @@ -63,10 +77,7 @@ func init() {
size = defaultUASTCacheSize
}

uastCache, err = lru.New(size)
if err != nil {
panic(fmt.Errorf("cannot initialize UAST cache: %s", err))
}
uastCacheSize = size

uastMaxBlobSize, err = strconv.Atoi(os.Getenv(uastMaxBlobSizeKey))
if err != nil {
Expand All @@ -83,7 +94,7 @@ type uastFunc struct {
Lang sql.Expression
XPath sql.Expression

h hash.Hash
h hash.Hash64
m sync.Mutex
}

Expand Down Expand Up @@ -151,7 +162,7 @@ func (u *uastFunc) WithChildren(children ...sql.Expression) (sql.Expression, err
Blob: blob,
XPath: xpath,
Lang: lang,
h: sha1.New(),
h: newHash(),
}, nil
}

Expand Down Expand Up @@ -234,6 +245,13 @@ func (u *uastFunc) Eval(ctx *sql.Context, row sql.Row) (out interface{}, err err
return u.getUAST(ctx, bytes, lang, xpath, mode)
}

func (u *uastFunc) computeKey(mode, lang string, blob []byte) (uint64, error) {
u.m.Lock()
defer u.m.Unlock()

return computeKey(u.h, mode, lang, blob)
}

func (u *uastFunc) getUAST(
ctx *sql.Context,
blob []byte,
Expand All @@ -242,17 +260,17 @@ func (u *uastFunc) getUAST(
) (interface{}, error) {
finish := observeQuery(lang, xpath, time.Now())

u.m.Lock()
key, err := computeKey(u.h, mode.String(), lang, blob)
u.m.Unlock()

key, err := u.computeKey(mode.String(), lang, blob)
if err != nil {
return nil, err
}

uastCache := getUASTCache(ctx)

var node nodes.Node
value, ok := uastCache.Get(key)
if ok {
value, err := uastCache.Get(key)
cacheMiss := err != nil
if !cacheMiss {
node = value.(nodes.Node)
} else {
var err error
Expand All @@ -265,7 +283,9 @@ func (u *uastFunc) getUAST(
return nil, err
}

uastCache.Add(key, node)
if err := uastCache.Put(key, node); err != nil {
return nil, err
}
}

var nodeArray nodes.Array
Expand All @@ -288,7 +308,7 @@ func (u *uastFunc) getUAST(
return nil, nil
}

finish(ok)
finish(!cacheMiss)

return result, nil
}
Expand Down Expand Up @@ -321,7 +341,7 @@ func NewUAST(args ...sql.Expression) (sql.Expression, error) {
Blob: blob,
Lang: lang,
XPath: xpath,
h: sha1.New(),
h: newHash(),
}}, nil
}

Expand Down Expand Up @@ -380,7 +400,7 @@ func NewUASTMode(mode, blob, lang sql.Expression) sql.Expression {
Blob: blob,
Lang: lang,
XPath: nil,
h: sha1.New(),
h: newHash(),
}}
}

Expand Down
19 changes: 13 additions & 6 deletions internal/function/uast_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@ import (
"bytes"
"fmt"
"hash"
"hash/crc64"

"github.com/bblfsh/go-client/v4/tools"
"github.com/bblfsh/sdk/v3/uast/nodes/nodesproto"

"github.com/sirupsen/logrus"
"github.com/src-d/gitbase"
bblfsh "github.com/bblfsh/go-client/v4"
"github.com/bblfsh/sdk/v3/uast/nodes"
errors "gopkg.in/src-d/go-errors.v1"
"github.com/sirupsen/logrus"
"github.com/src-d/gitbase"
"github.com/src-d/go-mysql-server/sql"
errors "gopkg.in/src-d/go-errors.v1"
)

var (
Expand Down Expand Up @@ -53,17 +54,23 @@ func exprToString(
return x.(string), nil
}

func computeKey(h hash.Hash, mode, lang string, blob []byte) (string, error) {
var crcTable = crc64.MakeTable(crc64.ISO)

func newHash() hash.Hash64 {
return crc64.New(crcTable)
}

func computeKey(h hash.Hash64, mode, lang string, blob []byte) (uint64, error) {
h.Reset()
if err := writeToHash(h, [][]byte{
[]byte(mode),
[]byte(lang),
blob,
}); err != nil {
return "", err
return 0, err
}

return string(h.Sum(nil)), nil
return h.Sum64(), nil
}

func writeToHash(h hash.Hash, elements [][]byte) error {
Expand Down