@@ -33,6 +33,9 @@ namespace {
33
33
} else if (input.Ptr ()->IsCallable (" FromPg" )) {
34
34
auto child = TExprBase (input.Ptr ()->ChildRef (0 ));
35
35
return IsAttribute (child, attributeName);
36
+ } else if (auto exists = input.Maybe <TCoExists>()) {
37
+ auto child = TExprBase (input.Ptr ()->ChildRef (0 ));
38
+ return IsAttribute (child, attributeName);
36
39
}
37
40
38
41
return false ;
@@ -182,30 +185,47 @@ namespace {
182
185
}
183
186
}
184
187
188
+ template <typename T>
189
+ TExprNode::TPtr FindNode (const TExprBase& input) {
190
+ for (const auto & child : input.Ptr ()->Children ()) {
191
+ if (TExprBase (child).Maybe <T>()) {
192
+ return child;
193
+ }
194
+
195
+ auto tmp = FindNode<T>(TExprBase (child));
196
+ if (tmp != nullptr ) {
197
+ return tmp;
198
+ }
199
+ }
200
+
201
+ return nullptr ;
202
+ }
203
+
185
204
/* *
186
205
* Compute the selectivity of a predicate given statistics about the input it operates on
187
206
*/
188
207
double NYql::NDq::ComputePredicateSelectivity (const TExprBase& input, const std::shared_ptr<TOptimizerStatistics>& stats) {
189
- double result = 1.0 ;
208
+ std::optional< double > resSelectivity ;
190
209
191
210
// Process OptionalIf, just return the predicate statistics
192
211
if (auto optIf = input.Maybe <TCoOptionalIf>()) {
193
- result = ComputePredicateSelectivity (optIf.Cast ().Predicate (), stats);
212
+ resSelectivity = ComputePredicateSelectivity (optIf.Cast ().Predicate (), stats);
194
213
}
195
214
196
215
// Same with Coalesce
197
216
else if (auto coalesce = input.Maybe <TCoCoalesce>()) {
198
- result = ComputePredicateSelectivity (coalesce.Cast ().Predicate (), stats);
217
+ resSelectivity = ComputePredicateSelectivity (coalesce.Cast ().Predicate (), stats);
199
218
}
200
219
201
- else if (input.Ptr ()->IsCallable (" FromPg" )) {
220
+ else if (
221
+ input.Ptr ()->IsCallable (" FromPg" ) ||
222
+ input.Ptr ()->IsCallable (" Exists" ) ||
223
+ input.Ptr ()->IsCallable (" AssumeStrict" ) ||
224
+ input.Ptr ()->IsCallable (" Apply" ) ||
225
+ input.Ptr ()->IsCallable (" Udf" )
226
+ ) {
202
227
auto child = TExprBase (input.Ptr ()->ChildRef (0 ));
203
- result = ComputePredicateSelectivity (child, stats);
204
- }
205
-
206
- else if (input.Ptr ()->IsCallable (" Exists" )) {
207
- auto child = TExprBase (input.Ptr ()->ChildRef (0 ));
208
- result = ComputePredicateSelectivity (child, stats);
228
+ resSelectivity = ComputePredicateSelectivity (child, stats);
209
229
}
210
230
211
231
else if (input.Ptr ()->IsCallable (" Find" ) || input.Ptr ()->IsCallable (" StringContains" )) {
@@ -214,7 +234,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std:
214
234
215
235
TString attributeName;
216
236
if (IsAttribute (member, attributeName) && IsConstantExpr (stringPred.Ptr ())) {
217
- result = 0.1 ;
237
+ resSelectivity = 0.1 ;
218
238
}
219
239
}
220
240
@@ -224,35 +244,35 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std:
224
244
// In case of NOT we subtract the argument's selectivity from 1.0
225
245
226
246
else if (auto andNode = input.Maybe <TCoAnd>()) {
227
- double res = 1.0 ;
247
+ double tmpSelectivity = 1.0 ;
228
248
for (size_t i = 0 ; i < andNode.Cast ().ArgCount (); i++) {
229
- res *= ComputePredicateSelectivity (andNode.Cast ().Arg (i), stats);
249
+ tmpSelectivity *= ComputePredicateSelectivity (andNode.Cast ().Arg (i), stats);
230
250
}
231
- result = res ;
251
+ resSelectivity = tmpSelectivity ;
232
252
} else if (auto orNode = input.Maybe <TCoOr>()) {
233
- double res = 0.0 ;
253
+ double tmpSelectivity = 0.0 ;
234
254
for (size_t i = 0 ; i < orNode.Cast ().ArgCount (); i++) {
235
- res += ComputePredicateSelectivity (orNode.Cast ().Arg (i), stats);
255
+ tmpSelectivity += ComputePredicateSelectivity (orNode.Cast ().Arg (i), stats);
236
256
}
237
- result = std::max (res, 1.0 ) ;
257
+ resSelectivity = tmpSelectivity ;
238
258
} else if (auto notNode = input.Maybe <TCoNot>()) {
239
259
double argSel = ComputePredicateSelectivity (notNode.Cast ().Value (), stats);
240
- result = 1.0 - (argSel == 1.0 ? 0.95 : argSel);
260
+ resSelectivity = 1.0 - (argSel == 1.0 ? 0.95 : argSel);
241
261
}
242
262
243
263
// Process the equality predicate
244
264
else if (auto equality = input.Maybe <TCoCmpEqual>()) {
245
265
auto left = equality.Cast ().Left ();
246
266
auto right = equality.Cast ().Right ();
247
267
248
- result = ComputeEqualitySelectivity (left, right, stats);
268
+ resSelectivity = ComputeEqualitySelectivity (left, right, stats);
249
269
}
250
270
251
271
else if (input.Ptr ()->IsCallable (" PgResolvedOp" ) && input.Ptr ()->ChildPtr (0 )->Content ()==" =" ) {
252
272
auto left = TExprBase (input.Ptr ()->ChildPtr (2 ));
253
273
auto right = TExprBase (input.Ptr ()->ChildPtr (3 ));
254
274
255
- result = ComputeEqualitySelectivity (left, right, stats);
275
+ resSelectivity = ComputeEqualitySelectivity (left, right, stats);
256
276
}
257
277
258
278
// Process the not equal predicate
@@ -261,55 +281,78 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std:
261
281
auto right = equality.Cast ().Right ();
262
282
263
283
double eqSel = ComputeEqualitySelectivity (left, right, stats);
264
- result = 1.0 - (eqSel == 1.0 ? 0.95 : eqSel);
284
+ resSelectivity = 1.0 - (eqSel == 1.0 ? 0.95 : eqSel);
265
285
}
266
286
267
287
else if (input.Ptr ()->IsCallable (" PgResolvedOp" ) && input.Ptr ()->ChildPtr (0 )->Content ()==" <>" ) {
268
288
auto left = TExprBase (input.Ptr ()->ChildPtr (2 ));
269
289
auto right = TExprBase (input.Ptr ()->ChildPtr (3 ));
270
290
271
291
double eqSel = ComputeEqualitySelectivity (left, right, stats);
272
- result = 1.0 - (eqSel == 1.0 ? 0.95 : eqSel);
292
+ resSelectivity = 1.0 - (eqSel == 1.0 ? 0.95 : eqSel);
273
293
}
274
294
275
295
// Process all other comparison predicates
276
296
else if (auto comparison = input.Maybe <TCoCompare>()) {
277
297
auto left = comparison.Cast ().Left ();
278
298
auto right = comparison.Cast ().Right ();
279
299
280
- result = ComputeComparisonSelectivity (left, right, stats);
300
+ resSelectivity = ComputeComparisonSelectivity (left, right, stats);
281
301
}
282
302
283
303
else if (input.Ptr ()->IsCallable (" PgResolvedOp" ) && PgInequalityPreds.contains (input.Ptr ()->ChildPtr (0 )->Content ())){
284
304
auto left = TExprBase (input.Ptr ()->ChildPtr (2 ));
285
305
auto right = TExprBase (input.Ptr ()->ChildPtr (3 ));
286
306
287
- result = ComputeComparisonSelectivity (left, right, stats);
307
+ resSelectivity = ComputeComparisonSelectivity (left, right, stats);
288
308
}
289
309
290
310
// Process SqlIn
291
311
else if (input.Ptr ()->IsCallable (" SqlIn" )) {
292
- auto left = TExprBase (input.Ptr ()->ChildPtr (0 ));
293
- auto right = TExprBase (input.Ptr ()->ChildPtr (1 ));
312
+ auto list = input.Ptr ()->ChildPtr (0 );
294
313
295
- TString attributeName;
314
+ double tmpSelectivity = 0.0 ;
315
+ auto lhs = TExprBase (input.Ptr ()->ChildPtr (1 ));
316
+ for (const auto & child: list->Children ()) {
317
+ TExprBase rhs = TExprBase (child);
318
+ tmpSelectivity += ComputeEqualitySelectivity (lhs, rhs, stats);
319
+ }
320
+ resSelectivity = tmpSelectivity;
321
+ }
296
322
297
- if (IsAttribute (right, attributeName) && IsConstantExpr (left.Ptr ())) {
298
- std::swap (left, right);
323
+ else if (input.Maybe <TCoAtom>()) {
324
+ auto atom = input.Cast <TCoAtom>();
325
+ // regexp
326
+ if (atom.StringValue ().StartsWith (" Re2" )) {
327
+ resSelectivity = 0.5 ;
299
328
}
329
+ }
300
330
301
- if (IsAttribute (left, attributeName) && IsConstantExpr (right.Ptr ())) {
302
- if (right.Ptr ()->IsCallable (" AsList" )) {
303
- auto size = right.Ptr ()->Child (0 )->ChildrenSize ();
304
- if (stats->KeyColumns && stats->KeyColumns ->Data .size ()==1 && attributeName==stats->KeyColumns ->Data [0 ]) {
305
- result = size / stats->Nrows ;
306
- } else {
307
- result = 0.1 + 0.2 / (1 + std::exp (size));
331
+ else if (auto maybeIfExpr = input.Maybe <TCoIf>()) {
332
+ auto ifExpr = maybeIfExpr.Cast ();
333
+
334
+ // attr in ('a', 'b', 'c' ...)
335
+ if (ifExpr.Predicate ().Maybe <TCoExists>() && ifExpr.ThenValue ().Maybe <TCoJust>() && ifExpr.ElseValue ().Maybe <TCoNothing>()) {
336
+ auto list = FindNode<TExprList>(ifExpr.ThenValue ());
337
+
338
+ if (list != nullptr ) {
339
+ double tmpSelectivity = 0.0 ;
340
+ TExprBase lhs = ifExpr.Predicate ();
341
+ for (const auto & child: list->Children ()) {
342
+ TExprBase rhs = TExprBase (child);
343
+ tmpSelectivity += ComputeEqualitySelectivity (lhs, rhs, stats);
308
344
}
309
345
346
+ resSelectivity = tmpSelectivity;
310
347
}
311
348
}
312
349
}
313
350
314
- return result;
351
+ if (!resSelectivity.has_value ()) {
352
+ auto dumped = input.Raw ()->Dump ();
353
+ YQL_CLOG (WARN, CoreDq) << " ComputePredicateSelectivity NOT FOUND : " << dumped;
354
+ return 1.0 ;
355
+ }
356
+
357
+ return std::min (1.0 , resSelectivity.value ());
315
358
}
0 commit comments