Skip to content

Commit a019c46

Browse files
feat: Pattern generation
1 parent 1494de3 commit a019c46

File tree

6 files changed

+72
-5
lines changed

6 files changed

+72
-5
lines changed

Diff for: Makefile

+3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ data_match:
3333
data_each_match:
3434
go run dg.go -c ./examples/each_match_test/config.yaml -o ./csvs/each_match -i import.sql
3535

36+
data_pattern:
37+
go run dg.go -c ./examples/pattern_test/config.yaml -o ./csvs/pattern_test -i import.sql
38+
3639
data: data_many_to_many data_person data_range_test data_input_test data_unique_test data_const_test
3740
echo "done"
3841

Diff for: examples/pattern_test/config.yaml

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
tables:
2+
# Generate data for a person table using the pattern generator.
3+
- name: person
4+
count: 1000
5+
columns:
6+
- name: id
7+
type: gen
8+
processor:
9+
value: ${uuid}
10+
- name: mobile
11+
type: gen
12+
processor:
13+
pattern: \d{3}-\d{3}-\d{4}

Diff for: go.mod

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ require (
1111

1212
require (
1313
github.com/davecgh/go-spew v1.1.1 // indirect
14+
github.com/lucasjones/reggen v0.0.0-20200904144131-37ba4fa293bb // indirect
1415
github.com/pmezard/go-difflib v1.0.0 // indirect
1516
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect
1617
)

Diff for: go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ github.com/brianvoe/gofakeit/v6 v6.22.0 h1:BzOsDot1o3cufTfOk+fWKE9nFYojyDV+XHdCW
22
github.com/brianvoe/gofakeit/v6 v6.22.0/go.mod h1:Ow6qC71xtwm79anlwKRlWZW6zVq9D2XHE4QSSMP/rU8=
33
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
44
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
5+
github.com/lucasjones/reggen v0.0.0-20200904144131-37ba4fa293bb h1:w1g9wNDIE/pHSTmAaUhv4TZQuPBS6GV3mMz5hkgziIU=
6+
github.com/lucasjones/reggen v0.0.0-20200904144131-37ba4fa293bb/go.mod h1:5ELEyG+X8f+meRWHuqUOewBOhvHkl7M76pdGEansxW4=
57
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
68
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
79
github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM=

Diff for: internal/pkg/generator/gen_generator.go

+23-2
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,23 @@
11
package generator
22

33
import (
4+
"fmt"
45
"strings"
56

67
"github.com/codingconcepts/dg/internal/pkg/model"
78
"github.com/codingconcepts/dg/internal/pkg/random"
9+
"github.com/lucasjones/reggen"
810
"github.com/samber/lo"
911
)
1012

1113
// GenGenerator provides additional context to a gen column.
1214
type GenGenerator struct {
1315
Value string `yaml:"value"`
16+
Pattern string `yaml:"pattern"`
1417
NullPercentage int `yaml:"null_percentage"`
1518
Format string `yaml:"format"`
19+
20+
patternGenerator *reggen.Generator
1621
}
1722

1823
func (g GenGenerator) GetFormat() string {
@@ -21,27 +26,43 @@ func (g GenGenerator) GetFormat() string {
2126

2227
// Generate random data for a given column.
2328
func (g GenGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error {
29+
if g.Value == "" && g.Pattern == "" {
30+
return fmt.Errorf("gen must have either 'value' or 'pattern'")
31+
}
32+
2433
if t.Count == 0 {
2534
t.Count = len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool {
2635
return len(a) > len(b)
2736
}))
2837
}
2938

39+
if g.Pattern != "" {
40+
var err error
41+
if g.patternGenerator, err = reggen.NewGenerator(g.Pattern); err != nil {
42+
return fmt.Errorf("creating regex generator: %w", err)
43+
}
44+
}
45+
3046
var line []string
3147
for i := 0; i < t.Count; i++ {
32-
line = append(line, g.replacePlaceholders())
48+
s := g.generate()
49+
line = append(line, s)
3350
}
3451

3552
AddTable(t, c.Name, line, files)
3653
return nil
3754
}
3855

39-
func (pg GenGenerator) replacePlaceholders() string {
56+
func (pg GenGenerator) generate() string {
4057
r := random.Intn(100)
4158
if r < pg.NullPercentage {
4259
return ""
4360
}
4461

62+
if pg.Pattern != "" {
63+
return pg.patternGenerator.Generate(255)
64+
}
65+
4566
s := pg.Value
4667

4768
// Look for quick single-replacements.

Diff for: internal/pkg/generator/gen_generator_test.go

+30-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
package generator
22

33
import (
4+
"regexp"
45
"strconv"
56
"strings"
67
"testing"
78
"time"
89

910
"github.com/codingconcepts/dg/internal/pkg/model"
11+
"github.com/lucasjones/reggen"
1012

1113
"github.com/stretchr/testify/assert"
1214
)
@@ -15,6 +17,7 @@ func TestGenerateGenColumn(t *testing.T) {
1517
cases := []struct {
1618
name string
1719
value string
20+
pattern string
1821
format string
1922
expShapeFunc func(val string) bool
2023
}{
@@ -40,11 +43,19 @@ func TestGenerateGenColumn(t *testing.T) {
4043
expShapeFunc: func(val string) bool {
4144
_, err := strconv.Atoi(val)
4245
if err != nil {
43-
(panic(err))
46+
t.Fatal(err)
4447
}
4548
return err == nil
4649
},
4750
},
51+
{
52+
name: "pattern",
53+
pattern: `[a-z]{3}-[A-Z]{3}-\d{3}`,
54+
expShapeFunc: func(val string) bool {
55+
re := regexp.MustCompile(`[a-z]{3}-[A-Z]{3}-\d{3}`)
56+
return re.MatchString(val)
57+
},
58+
},
4859
}
4960

5061
for _, c := range cases {
@@ -59,8 +70,9 @@ func TestGenerateGenColumn(t *testing.T) {
5970
}
6071

6172
g := GenGenerator{
62-
Value: c.value,
63-
Format: c.format,
73+
Value: c.value,
74+
Pattern: c.pattern,
75+
Format: c.format,
6476
}
6577

6678
files := map[string]model.CSVFile{}
@@ -70,3 +82,18 @@ func TestGenerateGenColumn(t *testing.T) {
7082
})
7183
}
7284
}
85+
86+
func BenchmarkGeneratePattern(b *testing.B) {
87+
pattern := `[a-z]{3}-[A-Z]{3}-\d{3}`
88+
patternGenerator, err := reggen.NewGenerator(pattern)
89+
assert.NoError(b, err)
90+
91+
g := GenGenerator{
92+
Pattern: pattern,
93+
patternGenerator: patternGenerator,
94+
}
95+
96+
for i := 0; i < b.N; i++ {
97+
g.generate()
98+
}
99+
}

0 commit comments

Comments
 (0)