Skip to content

Commit 00e1ca8

Browse files
committed
Breake words by dots
1 parent 833e5b2 commit 00e1ca8

File tree

4 files changed

+54
-3
lines changed

4 files changed

+54
-3
lines changed

Diff for: .gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*.o
2+
*.so
3+
results

Diff for: expected/pg_tsparser.out

+28-1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ SELECT * FROM ts_parse('tsparser', '345 [email protected] '' http://www.com/ http://aew.
4040
12 | '
4141
14 | http://
4242
6 | www.com
43+
1 | www
44+
12 | .
45+
1 | com
4346
12 | /
4447
14 | http://
4548
5 | aew.werc.ewr/?ad=qwe&dw
@@ -51,6 +54,12 @@ SELECT * FROM ts_parse('tsparser', '345 [email protected] '' http://www.com/ http://aew.
5154
18 | /?ad=qwe&dw
5255
12 |
5356
6 | 2aew.werc.ewr
57+
3 | 2aew
58+
12 | .
59+
6 | werc.ewr
60+
1 | werc
61+
12 | .
62+
1 | ewr
5463
12 |
5564
14 | http://
5665
5 | 3aew.werc.ewr/?ad=qwe&dw
@@ -59,6 +68,12 @@ SELECT * FROM ts_parse('tsparser', '345 [email protected] '' http://www.com/ http://aew.
5968
12 |
6069
14 | http://
6170
6 | 4aew.werc.ewr
71+
3 | 4aew
72+
12 | .
73+
6 | werc.ewr
74+
1 | werc
75+
12 | .
76+
1 | ewr
6277
12 |
6378
14 | http://
6479
5 | 5aew.werc.ewr:8100/?
@@ -177,7 +192,7 @@ SELECT * FROM ts_parse('tsparser', '345 [email protected] '' http://www.com/ http://aew.
177192
12 |
178193
12 | <>
179194
1 | qwerty
180-
(143 rows)
195+
(158 rows)
181196

182197
-- Test text search configuration with parser
183198
CREATE TEXT SEARCH CONFIGURATION english_ts (
@@ -209,3 +224,15 @@ SELECT to_tsvector('english_ts', '12-abc');
209224
'12':2 '12-abc':1 'abc':3
210225
(1 row)
211226

227+
SELECT to_tsvector('english_ts', 'test.com');
228+
to_tsvector
229+
-------------------------------
230+
'com':3 'test':2 'test.com':1
231+
(1 row)
232+
233+
SELECT to_tsvector('english_ts', 'test2.com');
234+
to_tsvector
235+
---------------------------------
236+
'com':3 'test2':2 'test2.com':1
237+
(1 row)
238+

Diff for: sql/pg_tsparser.sql

+2
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,5 @@ ALTER TEXT SEARCH CONFIGURATION english_ts
2424
SELECT to_tsvector('english_ts', 'pg_trgm');
2525
SELECT to_tsvector('english_ts', '12_abc');
2626
SELECT to_tsvector('english_ts', '12-abc');
27+
SELECT to_tsvector('english_ts', 'test.com');
28+
SELECT to_tsvector('english_ts', 'test2.com');

Diff for: tsparser.c

+21-2
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ typedef enum
171171
TPS_InPort,
172172
TPS_InHostFirstAN,
173173
TPS_InHost,
174+
TPS_InHostAsciiWord,
175+
TPS_InHostNumWord,
174176
TPS_InEmail,
175177
TPS_InFileFirst,
176178
TPS_InFileTwiddle,
@@ -1443,7 +1445,7 @@ static const TParserStateActionItem actionTPS_InHostDomainSecond[] = {
14431445
};
14441446

14451447
static const TParserStateActionItem actionTPS_InHostDomain[] = {
1446-
{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL},
1448+
{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InHostAsciiWord, HOST, SpecialHyphen},
14471449
{p_isasclet, 0, A_NEXT, TPS_InHostDomain, 0, NULL},
14481450
{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
14491451
{p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL},
@@ -1454,7 +1456,22 @@ static const TParserStateActionItem actionTPS_InHostDomain[] = {
14541456
{p_isdigit, 0, A_POP, TPS_Null, 0, NULL},
14551457
{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL},
14561458
{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
1457-
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
1459+
{NULL, 0, A_BINGO | A_CLRALL, TPS_InHostAsciiWord, HOST, SpecialHyphen}
1460+
};
1461+
1462+
static const TParserStateActionItem actionTPS_InHostAsciiWord[] = {
1463+
{p_isEOF, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL},
1464+
{p_isasclet, 0, A_NEXT, TPS_Null, 0, NULL},
1465+
{p_isdigit, 0, A_NEXT, TPS_InHostNumWord, 0, NULL},
1466+
{p_isalpha, 0, A_NEXT, TPS_InWord, 0, NULL},
1467+
{p_isspecial, 0, A_NEXT, TPS_InWord, 0, NULL},
1468+
{NULL, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL}
1469+
};
1470+
1471+
static const TParserStateActionItem actionTPS_InHostNumWord[] = {
1472+
{p_isEOF, 0, A_BINGO, TPS_Base, NUMWORD, NULL},
1473+
{p_isalnum, 0, A_NEXT, TPS_InHostNumWord, 0, NULL},
1474+
{NULL, 0, A_BINGO, TPS_Base, NUMWORD, NULL}
14581475
};
14591476

14601477
static const TParserStateActionItem actionTPS_InPortFirst[] = {
@@ -1782,6 +1799,8 @@ static const TParserStateAction Actions[] = {
17821799
TPARSERSTATEACTION(TPS_InPort),
17831800
TPARSERSTATEACTION(TPS_InHostFirstAN),
17841801
TPARSERSTATEACTION(TPS_InHost),
1802+
TPARSERSTATEACTION(TPS_InHostAsciiWord),
1803+
TPARSERSTATEACTION(TPS_InHostNumWord),
17851804
TPARSERSTATEACTION(TPS_InEmail),
17861805
TPARSERSTATEACTION(TPS_InFileFirst),
17871806
TPARSERSTATEACTION(TPS_InFileTwiddle),

0 commit comments

Comments
 (0)