Skip to content

Commit 9ac931a

Browse files
committed
fix support for phrase search in <=> operator. PGPRO-618
1 parent ead7ae4 commit 9ac931a

File tree

3 files changed

+82
-27
lines changed

3 files changed

+82
-27
lines changed

Diff for: expected/rum.out

+12
Original file line numberDiff line numberDiff line change
@@ -350,3 +350,15 @@ SELECT a <=> to_tsquery('pg_catalog.english', 'b:*'), *
350350
16.4493 | the few that escaped destruction in 1693. It is a beautiful, highly | '1693':7 'beauti':11 'destruct':5 'escap':4 'high':12
351351
(20 rows)
352352

353+
select 'bjarn:6237 stroustrup:6238'::tsvector <=> 'bjarn <-> stroustrup'::tsquery;
354+
?column?
355+
----------
356+
8.22467
357+
(1 row)
358+
359+
SELECT 'stroustrup:5508B,6233B,6238B bjarn:6235B,6237B' <=> 'bjarn <-> stroustrup'::tsquery;
360+
?column?
361+
----------
362+
2.05617
363+
(1 row)
364+

Diff for: sql/rum.sql

+2
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,5 @@ SELECT a <=> to_tsquery('pg_catalog.english', 'b:*'), *
115115
WHERE a @@ to_tsquery('pg_catalog.english', 'b:*')
116116
ORDER BY a <=> to_tsquery('pg_catalog.english', 'b:*');
117117

118+
select 'bjarn:6237 stroustrup:6238'::tsvector <=> 'bjarn <-> stroustrup'::tsquery;
119+
SELECT 'stroustrup:5508B,6233B,6238B bjarn:6235B,6237B' <=> 'bjarn <-> stroustrup'::tsquery;

Diff for: src/rum_ts_utils.c

+68-27
Original file line numberDiff line numberDiff line change
@@ -99,14 +99,21 @@ typedef struct
9999
int32 pos;
100100
} DocRepresentation;
101101

102+
typedef struct
103+
{
104+
bool operandexist;
105+
WordEntryPos pos;
106+
}
107+
QueryRepresentationOperand;
108+
102109
typedef struct
103110
{
104111
TSQuery query;
105112
/* Used in rum_tsquery_distance() */
106113
int *map_item_operand;
107114

108-
bool *operandexist;
109-
int lenght;
115+
QueryRepresentationOperand *operandData;
116+
int length;
110117
} QueryRepresentation;
111118

112119
typedef struct
@@ -135,8 +142,8 @@ static WordEntryPosVector POSNULL = {
135142
#define RANK_NORM_RDIVRPLUS1 0x20
136143
#define DEF_NORM_METHOD RANK_NO_NORM
137144

138-
#define QR_GET_OPERAND_EXISTS(q, v) ( (q)->operandexist[ ((QueryItem*)(v)) - GETQUERY((q)->query) ] )
139-
#define QR_SET_OPERAND_EXISTS(q, v) QR_GET_OPERAND_EXISTS(q,v) = true
145+
#define QR_GET_OPERAND(q, v) \
146+
(&((q)->operandData[ ((QueryItem*)(v)) - GETQUERY((q)->query) ]))
140147

141148
static bool
142149
pre_checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data)
@@ -826,15 +833,23 @@ checkcondition_QueryOperand(void *checkval, QueryOperand *val,
826833
ExecPhraseData *data)
827834
{
828835
QueryRepresentation *qr = (QueryRepresentation *) checkval;
836+
QueryRepresentationOperand *qro;
829837

830838
/* Check for rum_tsquery_distance() */
831839
if (qr->map_item_operand != NULL)
840+
qro = qr->operandData +
841+
qr->map_item_operand[(QueryItem *) val - GETQUERY(qr->query)];
842+
else
843+
qro = QR_GET_OPERAND(qr, val);
844+
845+
if (data && qro->operandexist)
832846
{
833-
int i = (QueryItem *) val - GETQUERY(qr->query);
834-
return qr->operandexist[qr->map_item_operand[i]];
847+
data->npos = 1;
848+
data->pos = &qro->pos;
849+
data->allocated = false;
835850
}
836851

837-
return QR_GET_OPERAND_EXISTS(qr, val);
852+
return qro->operandexist;
838853
}
839854

840855
static bool
@@ -850,7 +865,7 @@ Cover(DocRepresentation *doc, uint32 len, QueryRepresentation *qr,
850865
lastpos = ext->pos;
851866
found = false;
852867

853-
memset(qr->operandexist, 0, sizeof(bool) * qr->lenght);
868+
memset(qr->operandData, 0, sizeof(qr->operandData[0]) * qr->length);
854869

855870
ext->p = 0x7fffffff;
856871
ext->q = 0;
@@ -859,16 +874,28 @@ Cover(DocRepresentation *doc, uint32 len, QueryRepresentation *qr,
859874
/* find upper bound of cover from current position, move up */
860875
while (ptr - doc < len)
861876
{
877+
QueryRepresentationOperand *qro;
878+
862879
if (qr->map_item_operand != NULL)
863880
{
864-
qr->operandexist[ptr->data.key.keyn] = true;
881+
qro = qr->operandData + ptr->data.key.keyn;
882+
qro->operandexist = true;
883+
WEP_SETPOS(qro->pos, ptr->pos);
884+
WEP_SETWEIGHT(qro->pos, ptr->wclass);
865885
}
866886
else
867887
{
868888
for (i = 0; i < ptr->data.item.nitem; i++)
869-
QR_SET_OPERAND_EXISTS(qr, ptr->data.item.item[i]);
889+
{
890+
qro = QR_GET_OPERAND(qr, ptr->data.item.item[i]);
891+
qro->operandexist = true;
892+
WEP_SETPOS(qro->pos, ptr->pos);
893+
WEP_SETWEIGHT(qro->pos, ptr->wclass);
894+
}
870895
}
871-
if (TS_execute(GETQUERY(qr->query), (void *) qr, false,
896+
897+
898+
if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_EMPTY,
872899
checkcondition_QueryOperand))
873900
{
874901
if (ptr->pos > ext->q)
@@ -886,7 +913,7 @@ Cover(DocRepresentation *doc, uint32 len, QueryRepresentation *qr,
886913
if (!found)
887914
return false;
888915

889-
memset(qr->operandexist, 0, sizeof(bool) * qr->lenght);
916+
memset(qr->operandData, 0, sizeof(qr->operandData[0]) * qr->length);
890917

891918
ptr = doc + lastpos;
892919

@@ -895,14 +922,21 @@ Cover(DocRepresentation *doc, uint32 len, QueryRepresentation *qr,
895922
{
896923
if (qr->map_item_operand != NULL)
897924
{
898-
qr->operandexist[ptr->data.key.keyn] = true;
925+
qr->operandData[ptr->data.key.keyn].operandexist = true;
899926
}
900927
else
901928
{
902929
for (i = 0; i < ptr->data.item.nitem; i++)
903-
QR_SET_OPERAND_EXISTS(qr, ptr->data.item.item[i]);
930+
{
931+
QueryRepresentationOperand *qro =
932+
QR_GET_OPERAND(qr, ptr->data.item.item[i]);
933+
934+
qro->operandexist = true;
935+
WEP_SETPOS(qro->pos, ptr->pos);
936+
WEP_SETWEIGHT(qro->pos, ptr->wclass);
937+
}
904938
}
905-
if (TS_execute(GETQUERY(qr->query), (void *) qr, true,
939+
if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_CALC_NOT,
906940
checkcondition_QueryOperand))
907941
{
908942
if (ptr->pos < ext->p)
@@ -1083,7 +1117,7 @@ get_docrep(TSVector txt, QueryRepresentation *qr, uint32 *doclen)
10831117

10841118
curoperand = &item[i].qoperand;
10851119

1086-
if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
1120+
if (QR_GET_OPERAND(qr, &item[i])->operandexist)
10871121
continue;
10881122

10891123
firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
@@ -1128,14 +1162,21 @@ get_docrep(TSVector txt, QueryRepresentation *qr, uint32 *doclen)
11281162
(item[k].type == QI_VAL &&
11291163
compareQueryOperand(&kptr, &iptr, operand) == 0))
11301164
{
1165+
QueryRepresentationOperand *qro;
1166+
11311167
/*
11321168
* if k == i, we've already checked above that
11331169
* it's type == Q_VAL
11341170
*/
11351171
doc[cur].data.item.item[doc[cur].data.item.nitem] =
11361172
item + k;
11371173
doc[cur].data.item.nitem++;
1138-
QR_SET_OPERAND_EXISTS(qr, item + k);
1174+
1175+
qro = QR_GET_OPERAND(qr, item + k);
1176+
1177+
qro->operandexist = true;
1178+
qro->pos = post[j];
1179+
11391180
}
11401181
}
11411182
}
@@ -1236,8 +1277,8 @@ calc_score_docr(float4 *arrdata, DocRepresentation *doc, uint32 doclen,
12361277
cover_keys[new_cover_idx] = new_cover_key;
12371278

12381279
/* Compute the number of query terms in the cover */
1239-
for (i = 0; i < qr->lenght; i++)
1240-
if (qr->operandexist[i])
1280+
for (i = 0; i < qr->length; i++)
1281+
if (qr->operandData[i].operandexist)
12411282
nitems++;
12421283

12431284
Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
@@ -1298,20 +1339,20 @@ calc_score_addinfo(float4 *arrdata, bool *check, TSQuery query,
12981339

12991340
qr.query = query;
13001341
qr.map_item_operand = map_item_operand;
1301-
qr.operandexist = (bool *) palloc0(sizeof(bool) * nkeys);
1302-
qr.lenght = nkeys;
1342+
qr.operandData = palloc0(sizeof(qr.operandData[0]) * nkeys);
1343+
qr.length = nkeys;
13031344

13041345
doc = get_docrep_addinfo(check, &qr, addInfo, addInfoIsNull, &doclen);
13051346
if (!doc)
13061347
{
1307-
pfree(qr.operandexist);
1348+
pfree(qr.operandData);
13081349
return 0.0;
13091350
}
13101351

13111352
Wdoc = calc_score_docr(arrdata, doc, doclen, &qr, DEF_NORM_METHOD);
13121353

13131354
pfree(doc);
1314-
pfree(qr.operandexist);
1355+
pfree(qr.operandData);
13151356

13161357
return (float4) Wdoc;
13171358
}
@@ -1327,13 +1368,13 @@ calc_score(float4 *arrdata, TSVector txt, TSQuery query, int method)
13271368

13281369
qr.query = query;
13291370
qr.map_item_operand = NULL;
1330-
qr.operandexist = (bool *) palloc0(sizeof(bool) * query->size);
1331-
qr.lenght = query->size;
1371+
qr.operandData = palloc0(sizeof(qr.operandData[0]) * query->size);
1372+
qr.length = query->size;
13321373

13331374
doc = get_docrep(txt, &qr, &doclen);
13341375
if (!doc)
13351376
{
1336-
pfree(qr.operandexist);
1377+
pfree(qr.operandData);
13371378
return 0.0;
13381379
}
13391380

@@ -1356,7 +1397,7 @@ calc_score(float4 *arrdata, TSVector txt, TSQuery query, int method)
13561397
Wdoc /= log((double) (txt->size + 1)) / log(2.0);
13571398

13581399
pfree(doc);
1359-
pfree(qr.operandexist);
1400+
pfree(qr.operandData);
13601401

13611402
return (float4) Wdoc;
13621403
}

0 commit comments

Comments
 (0)