pandas-dev
diff --git a/Diff for: ‎.circleci/setup_env.sh
+1-2 b/Diff for: ‎.circleci/setup_env.sh
+1-2
diff --git a/Diff for: ‎.github/actions/build_pandas/action.yml
+2-4 b/Diff for: ‎.github/actions/build_pandas/action.yml
+2-4
diff --git a/Diff for: ‎.github/actions/setup-conda/action.yml
+1-1 b/Diff for: ‎.github/actions/setup-conda/action.yml
+1-1
diff --git a/Diff for: ‎.github/dependabot.yml
+9 b/Diff for: ‎.github/dependabot.yml
+9
diff --git a/Diff for: ‎.github/workflows/32-bit-linux.yml
+1-1 b/Diff for: ‎.github/workflows/32-bit-linux.yml
+1-1
diff --git a/Diff for: ‎.github/workflows/code-checks.yml
+2-2 b/Diff for: ‎.github/workflows/code-checks.yml
+2-2
diff --git a/Diff for: ‎.github/workflows/python-dev.yml
+1-2 b/Diff for: ‎.github/workflows/python-dev.yml
+1-2
diff --git a/Diff for: ‎.github/workflows/stale-pr.yml
+1-1 b/Diff for: ‎.github/workflows/stale-pr.yml
+1-1
diff --git a/Diff for: ‎.github/workflows/ubuntu.yml
+19-6 b/Diff for: ‎.github/workflows/ubuntu.yml
+19-6
diff --git a/Diff for: ‎.gitignore
+3 b/Diff for: ‎.gitignore
+3
diff --git a/Diff for: ‎.pre-commit-config.yaml
+3-11 b/Diff for: ‎.pre-commit-config.yaml
+3-11
diff --git a/Diff for: ‎MANIFEST.in
+2 b/Diff for: ‎MANIFEST.in
+2
diff --git a/Diff for: ‎asv_bench/benchmarks/arithmetic.py
+4 b/Diff for: ‎asv_bench/benchmarks/arithmetic.py
+4
diff --git a/Diff for: ‎asv_bench/benchmarks/strings.py
-7 b/Diff for: ‎asv_bench/benchmarks/strings.py
-7
diff --git a/Diff for: ‎ci/code_checks.sh
-3 b/Diff for: ‎ci/code_checks.sh
-3
diff --git a/Diff for: ‎doc/source/_static/reshaping_pivot.png
5.17 KB b/Diff for: ‎doc/source/_static/reshaping_pivot.png
5.17 KB
diff --git a/Diff for: ‎doc/source/development/contributing_codebase.rst
+2-1 b/Diff for: ‎doc/source/development/contributing_codebase.rst
+2-1
diff --git a/Diff for: ‎doc/source/development/internals.rst
+3-25 b/Diff for: ‎doc/source/development/internals.rst
+3-25
diff --git a/Diff for: ‎doc/source/getting_started/index.rst
+1-1 b/Diff for: ‎doc/source/getting_started/index.rst
+1-1
diff --git a/Diff for: ‎doc/source/getting_started/tutorials.rst
+1-1 b/Diff for: ‎doc/source/getting_started/tutorials.rst
+1-1
diff --git a/Diff for: ‎doc/source/reference/arrays.rst
+4-3 b/Diff for: ‎doc/source/reference/arrays.rst
+4-3
diff --git a/Diff for: ‎doc/source/user_guide/advanced.rst
+1-1 b/Diff for: ‎doc/source/user_guide/advanced.rst
+1-1
@@ -55,8 +55,7 @@ if pip list | grep -q ^pandas; then
 fi
 
 echo "Build extensions"
-# GH 47305: Parallel build can causes flaky ImportError from pandas/_libs/tslibs
-python setup.py build_ext -q -j1
+python setup.py build_ext -q -j4
 
 echo "Install pandas"
 python -m pip install --no-build-isolation --no-use-pep517 -e .
 
@@ -16,7 +16,5 @@ runs:
         python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
       shell: bash -el {0}
       env:
-        # Cannot use parallel compilation on Windows, see https://github.com/pandas-dev/pandas/issues/30873
-        # GH 47305: Parallel build causes flaky ImportError: /home/runner/work/pandas/pandas/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so: undefined symbol: pandas_datetime_to_datetimestruct
-        N_JOBS: 1
-        #N_JOBS: ${{ runner.os == 'Windows' && 1 || 2 }}
+        # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
+        N_JOBS: ${{ runner.os == 'macOS' && 3 || 2 }}
@@ -30,7 +30,7 @@ runs:
         environment-name: ${{ inputs.environment-name }}
         extra-specs: ${{ inputs.extra-specs }}
         channels: conda-forge
-        channel-priority: ${{ runner.os == 'macOS' && 'flexible' || 'strict' }}
+        channel-priority: 'strict'
         condarc-file: ci/condarc.yml
         cache-env: true
         cache-downloads: true
@@ -0,0 +1,9 @@
+version: 2
+updates:
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: weekly
+    labels:
+      - "CI"
+      - "Dependencies"
@@ -40,7 +40,7 @@ jobs:
           python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
           python -m pip install versioneer[toml] && \
           python -m pip install cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.34.2 && \
-          python setup.py build_ext -q -j1 && \
+          python setup.py build_ext -q -j$(nproc) && \
           python -m pip install --no-build-isolation --no-use-pep517 -e . && \
           python -m pip list && \
           export PANDAS_CI=1 && \
 
@@ -35,7 +35,7 @@ jobs:
         python-version: '3.9'
 
     - name: Run pre-commit
-      uses: pre-commit/action@v2.0.3
+      uses: pre-commit/action@v3.0.0
       with:
         extra_args: --verbose --all-files
 
@@ -93,7 +93,7 @@ jobs:
       if: ${{ steps.build.outcome == 'success' && always() }}
 
     - name: Typing + pylint
-      uses: pre-commit/action@v2.0.3
+      uses: pre-commit/action@v3.0.0
       with:
         extra_args: --verbose --hook-stage manual --all-files
       if: ${{ steps.build.outcome == 'success' && always() }}
 
@@ -82,10 +82,9 @@ jobs:
         python -m pip install python-dateutil pytz cython hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
         python -m pip list
 
-    # GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs
     - name: Build Pandas
       run: |
-        python setup.py build_ext -q -j1
+        python setup.py build_ext -q -j4
         python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
 
     - name: Build Version
 
@@ -13,7 +13,7 @@ jobs:
       pull-requests: write
     runs-on: ubuntu-22.04
     steps:
-    - uses: actions/stale@v4
+    - uses: actions/stale@v8
       with:
         repo-token: ${{ secrets.GITHUB_TOKEN }}
         stale-pr-message: "This pull request is stale because it has been open for thirty days with no activity. Please [update](https://pandas.pydata.org/pandas-docs/stable/development/contributing.html#updating-your-pull-request) and respond to this comment if you're still interested in working on this."
 
@@ -26,9 +26,9 @@ jobs:
     strategy:
       matrix:
         env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml]
-        pattern: ["not single_cpu", "single_cpu"]
+        # Prevent the include jobs from overriding other jobs
+        pattern: [""]
         pyarrow_version: ["8", "9", "10"]
-        pandas_ci: [1]
         include:
           - name: "Downstream Compat"
             env_file: actions-38-downstream_compat.yaml
@@ -75,7 +75,7 @@ jobs:
             test_args: "-W error::DeprecationWarning -W error::FutureWarning"
             # TODO(cython3): Re-enable once next-beta(after beta 1) comes out
             # There are some warnings failing the build with -werror
-            pandas_ci: 0
+            pandas_ci: "0"
         exclude:
           - env_file: actions-38.yaml
             pyarrow_version: "8"
@@ -99,9 +99,9 @@ jobs:
       LC_ALL: ${{ matrix.lc_all || '' }}
       PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }}
       PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
-      PANDAS_CI: ${{ matrix.pandas_ci }}
+      PANDAS_CI: ${{ matrix.pandas_ci || '1' }}
       TEST_ARGS: ${{ matrix.test_args || '' }}
-      PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }}
+      PYTEST_WORKERS: 'auto'
       PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
       IS_PYPY: ${{ contains(matrix.env_file, 'pypy') }}
       # TODO: re-enable coverage on pypy, its slow
@@ -170,9 +170,22 @@ jobs:
         pyarrow-version: ${{ matrix.pyarrow_version }}
 
     - name: Build Pandas
+      id: build
       uses: ./.github/actions/build_pandas
 
-    - name: Test
+    - name: Test (not single_cpu)
       uses: ./.github/actions/run-tests
       # TODO: Don't continue on error for PyPy
       continue-on-error: ${{ env.IS_PYPY == 'true' }}
+      env:
+        # Set pattern to not single_cpu if not already set
+        PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }}
+
+    - name: Test (single_cpu)
+      uses: ./.github/actions/run-tests
+      # TODO: Don't continue on error for PyPy
+      continue-on-error: ${{ env.IS_PYPY == 'true' }}
+      env:
+        PATTERN: 'single_cpu'
+        PYTEST_WORKERS: 1
+      if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}}
@@ -53,6 +53,9 @@ dist
 # type checkers
 pandas/py.typed
 
+# pyenv
+.python-version
+
 # tox testing tool
 .tox
 # rope
 
@@ -28,7 +28,7 @@ repos:
         types_or: [python, pyi]
         additional_dependencies: [black==23.1.0]
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.255
+    rev: v0.0.259
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -392,14 +392,6 @@ repos:
         files: ^pandas/
         exclude: ^(pandas/_libs/|pandas/tests/|pandas/errors/__init__.py$|pandas/_version.py)
         types: [python]
-    -   id: flake8-pyi
-        name: flake8-pyi
-        entry: flake8 --extend-ignore=E301,E302,E305,E701,E704
-        types: [pyi]
-        language: python
-        additional_dependencies:
-        - flake8==5.0.4
-        - flake8-pyi==22.8.1
     -   id: future-annotations
         name: import annotations from __future__
         entry: 'from __future__ import annotations'
@@ -421,8 +413,8 @@ repos:
         language: python
         stages: [manual]
         additional_dependencies:
-        - autotyping==22.9.0
-        - libcst==0.4.7
+        - autotyping==23.3.0
+        - libcst==0.4.9
     -   id: check-test-naming
         name: check that test names start with 'test'
         entry: python -m scripts.check_test_naming
 
@@ -58,3 +58,5 @@ prune pandas/tests/io/parser/data
 # Selectively re-add *.cxx files that were excluded above
 graft pandas/_libs/src
 graft pandas/_libs/tslibs/src
+include pandas/_libs/pd_parser.h
+include pandas/_libs/pd_parser.c
@@ -266,10 +266,14 @@ def setup(self, tz):
         self.ts = self.s[halfway]
 
         self.s2 = Series(date_range("20010101", periods=N, freq="s", tz=tz))
+        self.ts_different_reso = Timestamp("2001-01-02", tz=tz)
 
     def time_series_timestamp_compare(self, tz):
         self.s <= self.ts
 
+    def time_series_timestamp_different_reso_compare(self, tz):
+        self.s <= self.ts_different_reso
+
     def time_timestamp_series_compare(self, tz):
         self.ts >= self.s
 
 
@@ -34,7 +34,6 @@ def setup(self, dtype):
 
         # GH37371. Testing construction of string series/frames from ExtensionArrays
         self.series_cat_arr = Categorical(self.series_arr)
-        self.frame_cat_arr = Categorical(self.frame_arr)
 
     def time_series_construction(self, dtype):
         Series(self.series_arr, dtype=dtype)
@@ -54,12 +53,6 @@ def time_cat_series_construction(self, dtype):
     def peakmem_cat_series_construction(self, dtype):
         Series(self.series_cat_arr, dtype=dtype)
 
-    def time_cat_frame_construction(self, dtype):
-        DataFrame(self.frame_cat_arr, dtype=dtype)
-
-    def peakmem_cat_frame_construction(self, dtype):
-        DataFrame(self.frame_cat_arr, dtype=dtype)
-
 
 class Methods(Dtypes):
     def time_center(self, dtype):
 
@@ -86,8 +86,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
     MSG='Partially validate docstrings (EX01)' ;  echo $MSG
     $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01 --ignore_functions \
         pandas.Series.index \
-        pandas.Series.hasnans \
-        pandas.Series.to_list \
         pandas.Series.__iter__ \
         pandas.Series.keys \
         pandas.Series.item \
@@ -309,7 +307,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas_object \
         pandas.api.interchange.from_dataframe \
         pandas.Index.values \
-        pandas.Index.hasnans \
         pandas.Index.dtype \
         pandas.Index.inferred_type \
         pandas.Index.shape \
 
@@ -812,7 +812,8 @@ install pandas) by typing::
     your installation is probably fine and you can start contributing!
 
 Often it is worth running only a subset of tests first around your changes before running the
-entire suite.
+entire suite (tip: you can use the [pandas-coverage app](https://pandas-coverage.herokuapp.com/)
+to find out which tests hit the lines of code you've modified, and then run only those).
 
 The easiest way to do this is with::
 
 
@@ -31,31 +31,9 @@ There are functions that make the creation of a regular index easy:
 * :func:`period_range`: fixed frequency date range generated from a time rule or
   DateOffset. An ndarray of :class:`Period` objects, representing timespans
 
-The motivation for having an ``Index`` class in the first place was to enable
-different implementations of indexing. This means that it's possible for you,
-the user, to implement a custom ``Index`` subclass that may be better suited to
-a particular application than the ones provided in pandas.
-
-From an internal implementation point of view, the relevant methods that an
-``Index`` must define are one or more of the following (depending on how
-incompatible the new object internals are with the ``Index`` functions):
-
-* :meth:`~Index.get_loc`: returns an "indexer" (an integer, or in some cases a
-  slice object) for a label
-* :meth:`~Index.slice_locs`: returns the "range" to slice between two labels
-* :meth:`~Index.get_indexer`: Computes the indexing vector for reindexing / data
-  alignment purposes. See the source / docstrings for more on this
-* :meth:`~Index.get_indexer_non_unique`: Computes the indexing vector for reindexing / data
-  alignment purposes when the index is non-unique. See the source / docstrings
-  for more on this
-* :meth:`~Index.reindex`: Does any pre-conversion of the input index then calls
-  ``get_indexer``
-* :meth:`~Index.union`, :meth:`~Index.intersection`: computes the union or intersection of two
-  Index objects
-* :meth:`~Index.insert`: Inserts a new label into an Index, yielding a new object
-* :meth:`~Index.delete`: Delete a label, yielding a new object
-* :meth:`~Index.drop`: Deletes a set of labels
-* :meth:`~Index.take`: Analogous to ndarray.take
+.. warning::
+
+   Custom :class:`Index` subclasses are not supported, custom behavior should be implemented using the :class:`ExtensionArray` interface instead.
 
 MultiIndex
 ~~~~~~~~~~
 
@@ -533,7 +533,7 @@ Data sets do not only contain numerical data. pandas provides a wide range of fu
 Coming from...
 --------------
 
-Are you familiar with other software for manipulating tablular data? Learn
+Are you familiar with other software for manipulating tabular data? Learn
 the pandas-equivalent operations compared to software you already know:
 
 .. panels::
 
@@ -113,7 +113,7 @@ Various tutorials
 * `Wes McKinney's (pandas BDFL) blog <https://wesmckinney.com/archives.html>`_
 * `Statistical analysis made easy in Python with SciPy and pandas DataFrames, by Randal Olson <http://www.randalolson.com/2012/08/06/statistical-analysis-made-easy-in-python/>`_
 * `Statistical Data Analysis in Python, tutorial videos, by Christopher Fonnesbeck from SciPy 2013 <https://conference.scipy.org/scipy2013/tutorial_detail.php?id=109>`_
-* `Financial analysis in Python, by Thomas Wiecki <https://nbviewer.ipython.org/github/twiecki/financial-analysis-python-tutorial/blob/master/1.%20Pandas%20Basics.ipynb>`_
+* `Financial analysis in Python, by Thomas Wiecki <https://nbviewer.org/github/twiecki/financial-analysis-python-tutorial/blob/master/1.%20Pandas%20Basics.ipynb>`_
 * `Intro to pandas data structures, by Greg Reda <http://www.gregreda.com/2013/10/26/intro-to-pandas-data-structures/>`_
 * `Pandas and Python: Top 10, by Manish Amde <https://manishamde.github.io/blog/2013/03/07/pandas-and-python-top-10/>`_
 * `Pandas DataFrames Tutorial, by Karlijn Willems <https://www.datacamp.com/community/tutorials/pandas-tutorial-dataframe-python>`_
 
@@ -93,9 +93,10 @@ PyArrow type                                    pandas extension type      NumPy
 
 .. note::
 
-    For string types (``pyarrow.string()``, ``string[pyarrow]``), PyArrow support is still facilitated
-    by :class:`arrays.ArrowStringArray` and ``StringDtype("pyarrow")``. See the :ref:`string section <api.arrays.string>`
-    below.
+    Pyarrow-backed string support is provided by both ``pd.StringDtype("pyarrow")`` and ``pd.ArrowDtype(pa.string())``.
+    ``pd.StringDtype("pyarrow")`` is described below in the :ref:`string section <api.arrays.string>`
+    and will be returned if the string alias ``"string[pyarrow]"`` is specified. ``pd.ArrowDtype(pa.string())``
+    generally has better interoperability with :class:`ArrowDtype` of different types.
 
 While individual values in an :class:`arrays.ArrowExtensionArray` are stored as a PyArrow objects, scalars are **returned**
 as Python scalars corresponding to the data type, e.g. a PyArrow int64 will be returned as Python int, or :class:`NA` for missing
 
@@ -322,7 +322,7 @@ As usual, **both sides** of the slicers are included as this is label indexing.
 .. warning::
 
    You should specify all axes in the ``.loc`` specifier, meaning the indexer for the **index** and
-   for the **columns**. There are some ambiguous cases where the passed indexer could be mis-interpreted
+   for the **columns**. There are some ambiguous cases where the passed indexer could be misinterpreted
    as indexing *both* axes, rather than into say the ``MultiIndex`` for the rows.
 
    You should do this: