Skip to content
This repository was archived by the owner on Jan 28, 2021. It is now read-only.

Commit 8e04eac

Browse files
committed
function: implement regexp_matches
Signed-off-by: Miguel Molina <[email protected]>
1 parent 550cc54 commit 8e04eac

File tree

5 files changed

+380
-0
lines changed

5 files changed

+380
-0
lines changed

Diff for: README.md

+1
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ We support and actively test against certain third-party clients to ensure compa
103103
|`NOW()`|Returns the current timestamp.|
104104
|`NULLIF(expr1, expr2)`|Returns NULL if expr1 = expr2 is true, otherwise returns expr1.|
105105
|`POW(X, Y)`|Returns the value of X raised to the power of Y.|
106+
|`REGEXP_MATCHES(text, pattern, [flags])`|Returns an array with the matches of the pattern in the given text. Flags can be given to control certain behaviours of the regular expression. Currently, only the `i` flag is supported, to make the comparison case insensitive.|
106107
|`REPEAT(str, count)`|Returns a string consisting of the string str repeated count times.|
107108
|`REPLACE(str,from_str,to_str)`|Returns the string str with all occurrences of the string from_str replaced by the string to_str.|
108109
|`REVERSE(str)`|Returns the string str with the order of the characters reversed.|

Diff for: engine_test.go

+28
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,34 @@ var queries = []struct {
14541454
ORDER BY table_type, table_schema, table_name`,
14551455
[]sql.Row{{"mydb", "mytable", "TABLE"}},
14561456
},
1457+
{
1458+
`SELECT REGEXP_MATCHES("bopbeepbop", "bop")`,
1459+
[]sql.Row{{[]interface{}{"bop", "bop"}}},
1460+
},
1461+
{
1462+
`SELECT EXPLODE(REGEXP_MATCHES("bopbeepbop", "bop"))`,
1463+
[]sql.Row{{"bop"}, {"bop"}},
1464+
},
1465+
{
1466+
`SELECT EXPLODE(REGEXP_MATCHES("helloworld", "bop"))`,
1467+
[]sql.Row{},
1468+
},
1469+
{
1470+
`SELECT EXPLODE(REGEXP_MATCHES("", ""))`,
1471+
[]sql.Row{{""}},
1472+
},
1473+
{
1474+
`SELECT REGEXP_MATCHES(NULL, "")`,
1475+
[]sql.Row{{nil}},
1476+
},
1477+
{
1478+
`SELECT REGEXP_MATCHES("", NULL)`,
1479+
[]sql.Row{{nil}},
1480+
},
1481+
{
1482+
`SELECT REGEXP_MATCHES("", "", NULL)`,
1483+
[]sql.Row{{nil}},
1484+
},
14571485
}
14581486

14591487
func TestQueries(t *testing.T) {

Diff for: sql/expression/function/regexp_matches.go

+204
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
package function
2+
3+
import (
4+
"fmt"
5+
"regexp"
6+
"strings"
7+
8+
"github.com/src-d/go-mysql-server/sql"
9+
"github.com/src-d/go-mysql-server/sql/expression"
10+
errors "gopkg.in/src-d/go-errors.v1"
11+
)
12+
13+
// RegexpMatches returns the matches of a regular expression.
14+
type RegexpMatches struct {
15+
Text sql.Expression
16+
Pattern sql.Expression
17+
Flags sql.Expression
18+
19+
cacheable bool
20+
re *regexp.Regexp
21+
}
22+
23+
// NewRegexpMatches creates a new RegexpMatches expression.
24+
func NewRegexpMatches(args ...sql.Expression) (sql.Expression, error) {
25+
var r RegexpMatches
26+
switch len(args) {
27+
case 3:
28+
r.Flags = args[2]
29+
fallthrough
30+
case 2:
31+
r.Text = args[0]
32+
r.Pattern = args[1]
33+
default:
34+
return nil, sql.ErrInvalidArgumentNumber.New("regexp_matches", "2 or 3", len(args))
35+
}
36+
37+
if canBeCached(r.Pattern) && (r.Flags == nil || canBeCached(r.Flags)) {
38+
r.cacheable = true
39+
}
40+
41+
return &r, nil
42+
}
43+
44+
// Type implements the sql.Expression interface.
45+
func (r *RegexpMatches) Type() sql.Type { return sql.Array(sql.Text) }
46+
47+
// IsNullable implements the sql.Expression interface.
48+
func (r *RegexpMatches) IsNullable() bool { return true }
49+
50+
// Children implements the sql.Expression interface.
51+
func (r *RegexpMatches) Children() []sql.Expression {
52+
var result = []sql.Expression{r.Text, r.Pattern}
53+
if r.Flags != nil {
54+
result = append(result, r.Flags)
55+
}
56+
return result
57+
}
58+
59+
// Resolved implements the sql.Expression interface.
60+
func (r *RegexpMatches) Resolved() bool {
61+
return r.Text.Resolved() && r.Pattern.Resolved() && (r.Flags == nil || r.Flags.Resolved())
62+
}
63+
64+
// WithChildren implements the sql.Expression interface.
65+
func (r *RegexpMatches) WithChildren(children ...sql.Expression) (sql.Expression, error) {
66+
required := 2
67+
if r.Flags != nil {
68+
required = 3
69+
}
70+
71+
if len(children) != required {
72+
return nil, sql.ErrInvalidChildrenNumber.New(r, len(children), required)
73+
}
74+
75+
return NewRegexpMatches(children...)
76+
}
77+
78+
func (r *RegexpMatches) String() string {
79+
var args []string
80+
for _, e := range r.Children() {
81+
args = append(args, e.String())
82+
}
83+
return fmt.Sprintf("regexp_matches(%s)", strings.Join(args, ", "))
84+
}
85+
86+
// Eval implements the sql.Expression interface.
87+
func (r *RegexpMatches) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
88+
span, ctx := ctx.Span("function.RegexpMatches")
89+
defer span.Finish()
90+
91+
var re *regexp.Regexp
92+
var err error
93+
if r.cacheable {
94+
if r.re == nil {
95+
r.re, err = r.compileRegex(ctx, nil)
96+
if err != nil {
97+
return nil, err
98+
}
99+
100+
if r.re == nil {
101+
return nil, nil
102+
}
103+
}
104+
re = r.re
105+
} else {
106+
re, err = r.compileRegex(ctx, row)
107+
if err != nil {
108+
return nil, err
109+
}
110+
111+
if re == nil {
112+
return nil, nil
113+
}
114+
}
115+
116+
text, err := r.Text.Eval(ctx, row)
117+
if err != nil {
118+
return nil, err
119+
}
120+
121+
if text == nil {
122+
return nil, nil
123+
}
124+
125+
text, err = sql.Text.Convert(text)
126+
if err != nil {
127+
return nil, err
128+
}
129+
130+
matches := re.FindAllStringSubmatch(text.(string), -1)
131+
if len(matches) == 0 {
132+
return nil, nil
133+
}
134+
135+
var result []interface{}
136+
for _, m := range matches {
137+
for _, sm := range m {
138+
result = append(result, sm)
139+
}
140+
}
141+
142+
return result, nil
143+
}
144+
145+
func (r *RegexpMatches) compileRegex(ctx *sql.Context, row sql.Row) (*regexp.Regexp, error) {
146+
pattern, err := r.Pattern.Eval(ctx, row)
147+
if err != nil {
148+
return nil, err
149+
}
150+
151+
if pattern == nil {
152+
return nil, nil
153+
}
154+
155+
pattern, err = sql.Text.Convert(pattern)
156+
if err != nil {
157+
return nil, err
158+
}
159+
160+
var flags string
161+
if r.Flags != nil {
162+
f, err := r.Flags.Eval(ctx, row)
163+
if err != nil {
164+
return nil, err
165+
}
166+
167+
if f == nil {
168+
return nil, nil
169+
}
170+
171+
f, err = sql.Text.Convert(f)
172+
if err != nil {
173+
return nil, err
174+
}
175+
176+
flags = f.(string)
177+
for _, f := range flags {
178+
if !validRegexpFlags[f] {
179+
return nil, errInvalidRegexpFlag.New(f)
180+
}
181+
}
182+
183+
flags = fmt.Sprintf("(?%s)", flags)
184+
}
185+
186+
return regexp.Compile(flags + pattern.(string))
187+
}
188+
189+
var errInvalidRegexpFlag = errors.NewKind("invalid regexp flag: %v")
190+
191+
var validRegexpFlags = map[rune]bool{
192+
'i': true,
193+
}
194+
195+
func canBeCached(e sql.Expression) bool {
196+
var hasCols bool
197+
expression.Inspect(e, func(e sql.Expression) bool {
198+
if _, ok := e.(*expression.GetField); ok {
199+
hasCols = true
200+
}
201+
return true
202+
})
203+
return !hasCols
204+
}

Diff for: sql/expression/function/regexp_matches_test.go

+146
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
package function
2+
3+
import (
4+
"testing"
5+
6+
"github.com/src-d/go-mysql-server/sql"
7+
"github.com/src-d/go-mysql-server/sql/expression"
8+
"github.com/stretchr/testify/require"
9+
10+
errors "gopkg.in/src-d/go-errors.v1"
11+
)
12+
13+
func TestRegexpMatches(t *testing.T) {
14+
testCases := []struct {
15+
pattern interface{}
16+
text interface{}
17+
flags interface{}
18+
expected interface{}
19+
err *errors.Kind
20+
}{
21+
{
22+
`^foobar(.*)bye$`,
23+
"foobarhellobye",
24+
"",
25+
[]interface{}{"foobarhellobye", "hello"},
26+
nil,
27+
},
28+
{
29+
"bop",
30+
"bopbeepbop",
31+
"",
32+
[]interface{}{"bop", "bop"},
33+
nil,
34+
},
35+
{
36+
"bop",
37+
"bopbeepBop",
38+
"i",
39+
[]interface{}{"bop", "Bop"},
40+
nil,
41+
},
42+
{
43+
"bop",
44+
"helloworld",
45+
"",
46+
nil,
47+
nil,
48+
},
49+
{
50+
"foo",
51+
"",
52+
"",
53+
nil,
54+
nil,
55+
},
56+
{
57+
"",
58+
"",
59+
"",
60+
[]interface{}{""},
61+
nil,
62+
},
63+
{
64+
"bop",
65+
nil,
66+
"",
67+
nil,
68+
nil,
69+
},
70+
{
71+
"bop",
72+
"beep",
73+
nil,
74+
nil,
75+
nil,
76+
},
77+
{
78+
nil,
79+
"bop",
80+
"",
81+
nil,
82+
nil,
83+
},
84+
{
85+
"bop",
86+
"bopbeepBop",
87+
"ix",
88+
nil,
89+
errInvalidRegexpFlag,
90+
},
91+
}
92+
93+
t.Run("cacheable", func(t *testing.T) {
94+
for _, tt := range testCases {
95+
var flags sql.Expression
96+
if tt.flags != "" {
97+
flags = expression.NewLiteral(tt.flags, sql.Text)
98+
}
99+
f, err := NewRegexpMatches(
100+
expression.NewLiteral(tt.text, sql.Text),
101+
expression.NewLiteral(tt.pattern, sql.Text),
102+
flags,
103+
)
104+
require.NoError(t, err)
105+
106+
t.Run(f.String(), func(t *testing.T) {
107+
require := require.New(t)
108+
result, err := f.Eval(sql.NewEmptyContext(), nil)
109+
if tt.err == nil {
110+
require.NoError(err)
111+
require.Equal(tt.expected, result)
112+
} else {
113+
require.Error(err)
114+
require.True(tt.err.Is(err))
115+
}
116+
})
117+
}
118+
})
119+
120+
t.Run("not cacheable", func(t *testing.T) {
121+
for _, tt := range testCases {
122+
var flags sql.Expression
123+
if tt.flags != "" {
124+
flags = expression.NewGetField(2, sql.Text, "x", false)
125+
}
126+
f, err := NewRegexpMatches(
127+
expression.NewGetField(0, sql.Text, "x", false),
128+
expression.NewGetField(1, sql.Text, "x", false),
129+
flags,
130+
)
131+
require.NoError(t, err)
132+
133+
t.Run(f.String(), func(t *testing.T) {
134+
require := require.New(t)
135+
result, err := f.Eval(sql.NewEmptyContext(), sql.Row{tt.text, tt.pattern, tt.flags})
136+
if tt.err == nil {
137+
require.NoError(err)
138+
require.Equal(tt.expected, result)
139+
} else {
140+
require.Error(err)
141+
require.True(tt.err.Is(err))
142+
}
143+
})
144+
}
145+
})
146+
}

0 commit comments

Comments
 (0)