|
3 | 3 | from functools import reduce
|
4 | 4 | from typing import TYPE_CHECKING
|
5 | 5 |
|
| 6 | +from dask.utils_test import hlg_layer |
| 7 | + |
6 | 8 | from dask_sql.datacontainer import DataContainer
|
7 | 9 | from dask_sql.physical.rel.base import BaseRelPlugin
|
8 | 10 | from dask_sql.physical.rel.logical.filter import filter_or_scalar
|
9 | 11 | from dask_sql.physical.rex import RexConverter
|
| 12 | +from dask_sql.physical.utils.filter import attempt_predicate_pushdown |
10 | 13 |
|
11 | 14 | if TYPE_CHECKING:
|
12 | 15 | import dask_sql
|
@@ -81,25 +84,45 @@ def _apply_filters(self, table_scan, rel, dc, context):
|
81 | 84 | conjunctive_dnf_filters = table_scan.getDNFFilters().filtered_exprs
|
82 | 85 | non_dnf_filters = table_scan.getDNFFilters().io_unfilterable_exprs
|
83 | 86 | # All filters here are applied in conjunction (&)
|
84 |
| - if non_dnf_filters or conjunctive_dnf_filters: |
85 |
| - df_condition = reduce( |
86 |
| - operator.and_, |
87 |
| - [ |
88 |
| - RexConverter.convert(rel, rex, dc, context=context) |
89 |
| - for rex in non_dnf_filters |
90 |
| - ], |
91 |
| - ) |
92 |
| - df = filter_or_scalar( |
93 |
| - df, df_condition, conjunctive_filters=conjunctive_dnf_filters |
94 |
| - ) |
95 |
| - if conjunctive_dnf_filters: |
| 87 | + if conjunctive_dnf_filters: |
| 88 | + if non_dnf_filters: |
96 | 89 | df_condition = reduce(
|
97 | 90 | operator.and_,
|
98 | 91 | [
|
99 | 92 | RexConverter.convert(rel, rex, dc, context=context)
|
100 |
| - for rex in all_filters |
| 93 | + for rex in non_dnf_filters |
101 | 94 | ],
|
102 | 95 | )
|
103 |
| - df = filter_or_scalar(df, df_condition) |
| 96 | + df = filter_or_scalar( |
| 97 | + df, df_condition, conjunctive_filters=conjunctive_dnf_filters |
| 98 | + ) |
| 99 | + else: |
| 100 | + df = attempt_predicate_pushdown( |
| 101 | + df, conjunctive_filters=conjunctive_dnf_filters |
| 102 | + ) |
| 103 | + |
| 104 | + df_condition = reduce( |
| 105 | + operator.and_, |
| 106 | + [ |
| 107 | + RexConverter.convert( |
| 108 | + rel, rex, DataContainer(df, cc), context=context |
| 109 | + ) |
| 110 | + for rex in all_filters |
| 111 | + ], |
| 112 | + ) |
| 113 | + df = filter_or_scalar(df, df_condition) |
| 114 | + elif all_filters: |
| 115 | + df_condition = reduce( |
| 116 | + operator.and_, |
| 117 | + [ |
| 118 | + RexConverter.convert(rel, rex, dc, context=context) |
| 119 | + for rex in all_filters |
| 120 | + ], |
| 121 | + ) |
| 122 | + df = filter_or_scalar(df, df_condition) |
| 123 | + try: |
| 124 | + logger.debug(hlg_layer(df.dask, "read-parquet").creation_info) |
| 125 | + except KeyError: |
| 126 | + pass |
104 | 127 |
|
105 | 128 | return DataContainer(df, cc)
|
0 commit comments