Skip to content

Commit 9a31a22

Browse files
Merge branch 'main' into web_benchmarks
2 parents 79d88db + 31dc138 commit 9a31a22

File tree

588 files changed

+15633
-26362
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

588 files changed

+15633
-26362
lines changed

.github/actions/run-tests/action.yml

-5
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,6 @@ runs:
2020
path: test-data.xml
2121
if: failure()
2222

23-
- name: Report Coverage
24-
run: coverage report -m
25-
shell: bash -el {0}
26-
if: failure()
27-
2823
- name: Upload coverage to Codecov
2924
uses: codecov/codecov-action@v3
3025
with:

.github/workflows/code-checks.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ jobs:
8686
if: ${{ steps.build.outcome == 'success' && always() }}
8787

8888
- name: Typing + pylint
89-
uses: pre-commit/[email protected].0
89+
uses: pre-commit/[email protected].1
9090
with:
9191
extra_args: --verbose --hook-stage manual --all-files
9292
if: ${{ steps.build.outcome == 'success' && always() }}

.github/workflows/comment-commands.yml

+4-6
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,10 @@ jobs:
2424
concurrency:
2525
group: ${{ github.actor }}-preview-docs
2626
steps:
27-
- run: |
28-
if curl --output /dev/null --silent --head --fail "https://pandas.pydata.org/preview/${{ github.event.issue.number }}/"; then
29-
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"body": "Website preview of this PR available at: https://pandas.pydata.org/preview/${{ github.event.issue.number }}/"}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/comments
30-
else
31-
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"body": "No preview found for PR #${{ github.event.issue.number }}. Did the docs build complete?"}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/comments
32-
fi
27+
- uses: pandas-dev/[email protected]
28+
with:
29+
previewer-server: "https://pandas.pydata.org/preview"
30+
artifact-job: "Doc Build and Upload"
3331
asv_run:
3432
runs-on: ubuntu-22.04
3533
# TODO: Support more benchmarking options later, against different branches, against self, etc

.github/workflows/docbuild-and-upload.yml

-7
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,3 @@ jobs:
9090
name: website
9191
path: web/build
9292
retention-days: 14
93-
94-
- name: Trigger web/doc preview
95-
run: curl -X POST https://pandas.pydata.org/preview/submit/$RUN_ID/$PR_ID/
96-
env:
97-
RUN_ID: ${{ github.run_id }}
98-
PR_ID: ${{ github.event.pull_request.number }}
99-
if: github.event_name == 'pull_request'

.github/workflows/package-checks.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
runs-on: ubuntu-22.04
2525
strategy:
2626
matrix:
27-
extra: ["test", "performance", "computation", "fss", "aws", "gcp", "excel", "parquet", "feather", "hdf5", "spss", "postgresql", "mysql", "sql-other", "html", "xml", "plot", "output-formatting", "clipboard", "compression", "consortium-standard", "all"]
27+
extra: ["test", "pyarrow", "performance", "computation", "fss", "aws", "gcp", "excel", "parquet", "feather", "hdf5", "spss", "postgresql", "mysql", "sql-other", "html", "xml", "plot", "output-formatting", "clipboard", "compression", "all"]
2828
fail-fast: false
2929
name: Install Extras - ${{ matrix.extra }}
3030
concurrency:

.github/workflows/unit-tests.yml

+7-38
Original file line numberDiff line numberDiff line change
@@ -57,45 +57,14 @@ jobs:
5757
# Also install zh_CN (its encoding is gb2312) but do not activate it.
5858
# It will be temporarily activated during tests with locale.setlocale
5959
extra_loc: "zh_CN"
60-
- name: "Copy-on-Write 3.9"
61-
env_file: actions-39.yaml
62-
pattern: "not slow and not network and not single_cpu"
63-
pandas_copy_on_write: "1"
64-
- name: "Copy-on-Write 3.10"
65-
env_file: actions-310.yaml
66-
pattern: "not slow and not network and not single_cpu"
67-
pandas_copy_on_write: "1"
68-
- name: "Copy-on-Write 3.11"
69-
env_file: actions-311.yaml
70-
pattern: "not slow and not network and not single_cpu"
71-
pandas_copy_on_write: "1"
72-
- name: "Copy-on-Write 3.12"
73-
env_file: actions-312.yaml
74-
pattern: "not slow and not network and not single_cpu"
75-
pandas_copy_on_write: "1"
76-
- name: "Copy-on-Write 3.11 (warnings)"
77-
env_file: actions-311.yaml
78-
pattern: "not slow and not network and not single_cpu"
79-
pandas_copy_on_write: "warn"
80-
- name: "Copy-on-Write 3.10 (warnings)"
81-
env_file: actions-310.yaml
82-
pattern: "not slow and not network and not single_cpu"
83-
pandas_copy_on_write: "warn"
84-
- name: "Copy-on-Write 3.9 (warnings)"
85-
env_file: actions-39.yaml
86-
pattern: "not slow and not network and not single_cpu"
87-
pandas_copy_on_write: "warn"
8860
- name: "Pypy"
8961
env_file: actions-pypy-39.yaml
9062
pattern: "not slow and not network and not single_cpu"
9163
test_args: "--max-worker-restart 0"
9264
- name: "Numpy Dev"
9365
env_file: actions-311-numpydev.yaml
9466
pattern: "not slow and not network and not single_cpu"
95-
# Currently restricted the warnings that error to Deprecation Warnings from numpy
96-
# done since pyarrow isn't compatible with numpydev always
97-
# TODO: work with pyarrow to revert this?
98-
test_args: "-W error::DeprecationWarning:numpy -W error::FutureWarning:numpy"
67+
test_args: "-W error::DeprecationWarning -W error::FutureWarning"
9968
- name: "Pyarrow Nightly"
10069
env_file: actions-311-pyarrownightly.yaml
10170
pattern: "not slow and not network and not single_cpu"
@@ -113,7 +82,6 @@ jobs:
11382
PATTERN: ${{ matrix.pattern }}
11483
LANG: ${{ matrix.lang || 'C.UTF-8' }}
11584
LC_ALL: ${{ matrix.lc_all || '' }}
116-
PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
11785
PANDAS_CI: ${{ matrix.pandas_ci || '1' }}
11886
TEST_ARGS: ${{ matrix.test_args || '' }}
11987
PYTEST_WORKERS: ${{ matrix.pytest_workers || 'auto' }}
@@ -123,7 +91,7 @@ jobs:
12391
QT_QPA_PLATFORM: offscreen
12492
concurrency:
12593
# https://github.community/t/concurrecy-not-work-for-push/183068/7
126-
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
94+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}}
12795
cancel-in-progress: true
12896

12997
services:
@@ -214,7 +182,8 @@ jobs:
214182
timeout-minutes: 90
215183
strategy:
216184
matrix:
217-
os: [macos-latest, windows-latest]
185+
# Note: Don't use macOS latest since macos 14 appears to be arm64 only
186+
os: [macos-13, macos-14, windows-latest]
218187
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml, actions-312.yaml]
219188
fail-fast: false
220189
runs-on: ${{ matrix.os }}
@@ -227,8 +196,7 @@ jobs:
227196
PANDAS_CI: 1
228197
PYTEST_TARGET: pandas
229198
PATTERN: "not slow and not db and not network and not single_cpu"
230-
# GH 47443: PYTEST_WORKERS > 0 crashes Windows builds with memory related errors
231-
PYTEST_WORKERS: ${{ matrix.os == 'macos-latest' && 'auto' || '0' }}
199+
PYTEST_WORKERS: 'auto'
232200

233201
steps:
234202
- name: Checkout
@@ -354,7 +322,8 @@ jobs:
354322
strategy:
355323
fail-fast: false
356324
matrix:
357-
os: [ubuntu-22.04, macOS-latest, windows-latest]
325+
# Separate out macOS 13 and 14, since macOS 14 is arm64 only
326+
os: [ubuntu-22.04, macOS-13, macOS-14, windows-latest]
358327

359328
timeout-minutes: 90
360329

.github/workflows/wheels.yml

+10-8
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,9 @@ jobs:
9494
buildplat:
9595
- [ubuntu-22.04, manylinux_x86_64]
9696
- [ubuntu-22.04, musllinux_x86_64]
97-
- [macos-12, macosx_*]
97+
- [macos-12, macosx_x86_64]
98+
# Note: M1 images on Github Actions start from macOS 14
99+
- [macos-14, macosx_arm64]
98100
- [windows-2022, win_amd64]
99101
# TODO: support PyPy?
100102
python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"]]
@@ -128,7 +130,7 @@ jobs:
128130
# Python version used to build sdist doesn't matter
129131
# wheel will be built from sdist with the correct version
130132
- name: Unzip sdist (macOS)
131-
if: ${{ matrix.buildplat[1] == 'macosx_*' }}
133+
if: ${{ startsWith(matrix.buildplat[1], 'macosx') }}
132134
run: |
133135
tar -xzf ./dist/${{ env.sdist_name }} -C ./dist
134136
@@ -139,18 +141,18 @@ jobs:
139141

140142
- name: Build normal wheels
141143
if: ${{ (env.IS_SCHEDULE_DISPATCH != 'true' || env.IS_PUSH == 'true') }}
142-
uses: pypa/[email protected].4
144+
uses: pypa/[email protected].5
143145
with:
144-
package-dir: ./dist/${{ matrix.buildplat[1] == 'macosx_*' && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
146+
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
145147
env:
146148
CIBW_PRERELEASE_PYTHONS: True
147149
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
148150

149151
- name: Build nightly wheels (with NumPy pre-release)
150152
if: ${{ (env.IS_SCHEDULE_DISPATCH == 'true' && env.IS_PUSH != 'true') }}
151-
uses: pypa/[email protected].4
153+
uses: pypa/[email protected].5
152154
with:
153-
package-dir: ./dist/${{ matrix.buildplat[1] == 'macosx_*' && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
155+
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
154156
env:
155157
# The nightly wheels should be build witht he NumPy 2.0 pre-releases
156158
# which requires the additional URL.
@@ -183,15 +185,15 @@ jobs:
183185
$TST_CMD = @"
184186
python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0;
185187
python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
186-
python -c `'import pandas as pd; pd.test(extra_args=[\"`\"--no-strict-data-files`\"\", \"`\"-m not clipboard and not single_cpu and not slow and not network and not db`\"\"])`';
188+
python -c `'import pandas as pd; pd.test(extra_args=[`\"--no-strict-data-files`\", `\"-m not clipboard and not single_cpu and not slow and not network and not db`\"])`';
187189
"@
188190
# add rc to the end of the image name if the Python version is unreleased
189191
docker pull python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }}
190192
docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD
191193
192194
- uses: actions/upload-artifact@v4
193195
with:
194-
name: ${{ matrix.python[0] }}-${{ startsWith(matrix.buildplat[1], 'macosx') && 'macosx' || matrix.buildplat[1] }}
196+
name: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
195197
path: ./wheelhouse/*.whl
196198

197199
- name: Upload wheels & sdist

.pre-commit-config.yaml

+6-40
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pylint, pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.1.6
22+
rev: v0.1.13
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -31,8 +31,7 @@ repos:
3131
exclude: ^pandas/tests
3232
args: [--select, "ANN001,ANN2", --fix-only, --exit-non-zero-on-fix]
3333
- id: ruff-format
34-
# TODO: "." not needed in ruff 0.1.8
35-
args: ["."]
34+
exclude: ^scripts
3635
- repo: https://github.com/jendrikseipp/vulture
3736
rev: 'v2.10'
3837
hooks:
@@ -54,13 +53,11 @@ repos:
5453
- repo: https://github.com/pre-commit/pre-commit-hooks
5554
rev: v4.5.0
5655
hooks:
57-
- id: check-ast
5856
- id: check-case-conflict
5957
- id: check-toml
6058
- id: check-xml
6159
- id: check-yaml
6260
exclude: ^ci/meta.yaml$
63-
- id: debug-statements
6461
- id: end-of-file-fixer
6562
exclude: \.txt$
6663
- id: mixed-line-ending
@@ -101,8 +98,6 @@ repos:
10198
- repo: https://github.com/pre-commit/pygrep-hooks
10299
rev: v1.10.0
103100
hooks:
104-
- id: python-check-blanket-noqa
105-
- id: python-check-blanket-type-ignore
106101
- id: rst-backticks
107102
- id: rst-directive-colons
108103
types: [text] # overwrite types: [rst]
@@ -132,7 +127,7 @@ repos:
132127
types: [python]
133128
stages: [manual]
134129
additional_dependencies: &pyright_dependencies
135-
130+
136131
- id: pyright
137132
# note: assumes python env is setup and activated
138133
name: pyright reportGeneralTypeIssues
@@ -211,39 +206,22 @@ repos:
211206
language: pygrep
212207
entry: |
213208
(?x)
214-
# pytest.xfail instead of pytest.mark.xfail
215-
pytest\.xfail
216-
217209
# imports from pandas._testing instead of `import pandas._testing as tm`
218-
|from\ pandas\._testing\ import
210+
from\ pandas\._testing\ import
219211
|from\ pandas\ import\ _testing\ as\ tm
220212
221-
# No direct imports from conftest
222-
|conftest\ import
223-
|import\ conftest
224-
225213
# pandas.testing instead of tm
226214
|pd\.testing\.
227215
228216
# pd.api.types instead of from pandas.api.types import ...
229217
|(pd|pandas)\.api\.types\.
230218
231-
# np.testing, np.array_equal
232-
|(numpy|np)(\.testing|\.array_equal)
233-
234-
# unittest.mock (use pytest builtin monkeypatch fixture instead)
235-
|(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)
219+
# np.array_equal
220+
|(numpy|np)\.array_equal
236221
237222
# pytest raises without context
238223
|\s\ pytest.raises
239224
240-
# TODO
241-
# pytest.warns (use tm.assert_produces_warning instead)
242-
# |pytest\.warns
243-
244-
# os.remove
245-
|os\.remove
246-
247225
# Unseeded numpy default_rng
248226
|default_rng\(\)
249227
files: ^pandas/tests/
@@ -301,18 +279,6 @@ repos:
301279
files: ^pandas/core/
302280
exclude: ^pandas/core/api\.py$
303281
types: [python]
304-
- id: use-io-common-urlopen
305-
name: Use pandas.io.common.urlopen instead of urllib.request.urlopen
306-
language: python
307-
entry: python scripts/use_io_common_urlopen.py
308-
files: ^pandas/
309-
exclude: ^pandas/tests/
310-
types: [python]
311-
- id: no-bool-in-core-generic
312-
name: Use bool_t instead of bool in pandas/core/generic.py
313-
entry: python scripts/no_bool_in_generic.py
314-
language: python
315-
files: ^pandas/core/generic\.py$
316282
- id: no-return-exception
317283
name: Use raise instead of return for exceptions
318284
language: pygrep

README.md

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
<div align="center">
2-
<img src="https://pandas.pydata.org/static/img/pandas.svg"><br>
3-
</div>
1+
<picture align="center">
2+
<source media="(prefers-color-scheme: dark)" srcset="https://pandas.pydata.org/static/img/pandas_white.svg">
3+
<img alt="Pandas Logo" src="https://pandas.pydata.org/static/img/pandas.svg">
4+
</picture>
45

56
-----------------
67

asv_bench/asv.conf.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
// pip (with all the conda available packages installed first,
4242
// followed by the pip installed packages).
4343
"matrix": {
44-
"Cython": ["3.0.5"],
44+
"Cython": ["3.0"],
4545
"matplotlib": [],
4646
"sqlalchemy": [],
4747
"scipy": [],

asv_bench/benchmarks/algos/isin.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ def setup(self, dtype):
5959
elif dtype in ["str", "string[python]", "string[pyarrow]"]:
6060
try:
6161
self.series = Series(
62-
Index([f"i-{i}" for i in range(N)], dtype=object), dtype=dtype
62+
Index([f"i-{i}" for i in range(N)], dtype=object)._values,
63+
dtype=dtype,
6364
)
6465
except ImportError as err:
6566
raise NotImplementedError from err

asv_bench/benchmarks/frame_methods.py

-8
Original file line numberDiff line numberDiff line change
@@ -159,12 +159,6 @@ def setup(self):
159159

160160
def time_items(self):
161161
# (monitor no-copying behaviour)
162-
if hasattr(self.df, "_item_cache"):
163-
self.df._item_cache.clear()
164-
for name, col in self.df.items():
165-
pass
166-
167-
def time_items_cached(self):
168162
for name, col in self.df.items():
169163
pass
170164

@@ -593,8 +587,6 @@ def setup(self):
593587
N = 10000
594588
# this is the worst case, where every column has NaNs.
595589
arr = np.random.randn(N, 100)
596-
# NB: we need to set values in array, not in df.values, otherwise
597-
# the benchmark will be misleading for ArrayManager
598590
arr[::2] = np.nan
599591

600592
self.df = DataFrame(arr)

asv_bench/benchmarks/index_object.py

+4
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,14 @@ def setup(self):
136136
self.int_idxs.append(i_idx)
137137
o_idx = i_idx.astype(str)
138138
self.object_idxs.append(o_idx)
139+
self.same_range_idx = [self.range_idx] * N
139140

140141
def time_append_range_list(self):
141142
self.range_idx.append(self.range_idxs)
142143

144+
def time_append_range_list_same(self):
145+
self.range_idx.append(self.same_range_idx)
146+
143147
def time_append_int_list(self):
144148
self.int_idx.append(self.int_idxs)
145149

0 commit comments

Comments
 (0)