feefladder
diff --git a/‎.github/ISSUE_TEMPLATE/submit_question.md
-24 b/‎.github/ISSUE_TEMPLATE/submit_question.md
-24
diff --git a/‎.github/ISSUE_TEMPLATE/submit_question.yml
+43 b/‎.github/ISSUE_TEMPLATE/submit_question.yml
+43
diff --git a/‎.github/workflows/ci.yml
-8 b/‎.github/workflows/ci.yml
-8
diff --git a/‎.github/workflows/python-dev.yml
+1-1 b/‎.github/workflows/python-dev.yml
+1-1
diff --git a/‎.pre-commit-config.yaml
+36-3 b/‎.pre-commit-config.yaml
+36-3
diff --git a/‎asv_bench/asv.conf.json
+1-4 b/‎asv_bench/asv.conf.json
+1-4
diff --git a/‎asv_bench/benchmarks/dtypes.py
+3-3 b/‎asv_bench/benchmarks/dtypes.py
+3-3
diff --git a/‎asv_bench/benchmarks/frame_ctor.py
+8 b/‎asv_bench/benchmarks/frame_ctor.py
+8
diff --git a/‎asv_bench/benchmarks/frame_methods.py
+6-2 b/‎asv_bench/benchmarks/frame_methods.py
+6-2
diff --git a/‎asv_bench/benchmarks/groupby.py
+24-11 b/‎asv_bench/benchmarks/groupby.py
+24-11
diff --git a/‎asv_bench/benchmarks/indexing.py
+9 b/‎asv_bench/benchmarks/indexing.py
+9
diff --git a/‎asv_bench/benchmarks/io/json.py
+10-6 b/‎asv_bench/benchmarks/io/json.py
+10-6
diff --git a/‎asv_bench/benchmarks/io/style.py
+2-2 b/‎asv_bench/benchmarks/io/style.py
+2-2
@@ -0,0 +1,43 @@
+name: Submit Question
+description: Ask a general question about pandas
+title: "QST: "
+labels: [Usage Question, Needs Triage]
+
+body:
+  - type: markdown
+    attributes:
+      value: >
+        Since [StackOverflow](https://stackoverflow.com) is better suited towards answering
+        usage questions, we ask that all usage questions are first asked on StackOverflow.
+  - type: checkboxes
+    attributes:
+      options:
+        - label: >
+            I have searched the [[pandas] tag](https://stackoverflow.com/questions/tagged/pandas)
+            on StackOverflow for similar questions.
+          required: true
+        - label: >
+            I have asked my usage related question on [StackOverflow](https://stackoverflow.com).
+          required: true
+  - type: input
+    id: question-link
+    attributes:
+      label: Link to question on StackOverflow
+    validations:
+      required: true
+  - type: markdown
+    attributes:
+      value: ---
+  - type: textarea
+    id: question
+    attributes:
+      label: Question about pandas
+      description: >
+        **Note**: If you'd still like to submit a question, please read [this guide](
+        https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) detailing
+        how to provide the necessary information for us to reproduce your question.
+      placeholder: |
+        ```python
+        # Your code here, if applicable
+
+        ```
@@ -32,10 +32,6 @@ jobs:
       with:
         fetch-depth: 0
 
-    - name: Looking for unwanted patterns
-      run: ci/code_checks.sh patterns
-      if: always()
-
     - name: Cache conda
       uses: actions/cache@v2
       with:
@@ -52,10 +48,6 @@ jobs:
     - name: Build Pandas
       uses: ./.github/actions/build_pandas
 
-    - name: Linting
-      run: ci/code_checks.sh lint
-      if: always()
-
     - name: Checks on imported code
       run: ci/code_checks.sh code
       if: always()
 
@@ -41,7 +41,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip setuptools wheel
-        pip install git+https://github.com/numpy/numpy.git
+        pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
         pip install git+https://github.com/pytest-dev/pytest.git
         pip install git+https://github.com/nedbat/coveragepy.git
         pip install cython python-dateutil pytz hypothesis pytest-xdist pytest-cov
 
@@ -9,7 +9,7 @@ repos:
     -   id: absolufy-imports
         files: ^pandas/
 -   repo: https://github.com/python/black
-    rev: 21.6b0
+    rev: 21.7b0
     hooks:
     -   id: black
 -   repo: https://github.com/codespell-project/codespell
@@ -44,6 +44,7 @@ repos:
             - flake8-bugbear==21.3.2
             - pandas-dev-flaker==0.2.0
     -   id: flake8
+        alias: flake8-cython
         name: flake8 (cython)
         types: [cython]
         args: [--append-config=flake8/cython.cfg]
@@ -53,11 +54,11 @@ repos:
         types: [text]
         args: [--append-config=flake8/cython-template.cfg]
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.9.2
+    rev: 5.9.3
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.21.0
+    rev: v2.23.3
     hooks:
     -   id: pyupgrade
         args: [--py38-plus]
@@ -102,7 +103,34 @@ repos:
             # Incorrect code-block / IPython directives
             |\.\.\ code-block\ ::
             |\.\.\ ipython\ ::
+
+            # Check for deprecated messages without sphinx directive
+            |(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)
         types_or: [python, cython, rst]
+    -   id: cython-casting
+        name: Check Cython casting is `<type>obj`, not `<type> obj`
+        language: pygrep
+        entry: '[a-zA-Z0-9*]> '
+        files: (\.pyx|\.pxi.in)$
+    -   id: incorrect-backticks
+        name: Check for backticks incorrectly rendering because of missing spaces
+        language: pygrep
+        entry: '[a-zA-Z0-9]\`\`?[a-zA-Z0-9]'
+        types: [rst]
+        files: ^doc/source/
+    -   id: seed-check-asv
+        name: Check for unnecessary random seeds in asv benchmarks
+        language: pygrep
+        entry: 'np\.random\.seed'
+        files: ^asv_bench/benchmarks
+        exclude: ^asv_bench/benchmarks/pandas_vb_common\.py
+    -   id: invalid-ea-testing
+        name: Check for invalid EA testing
+        language: pygrep
+        entry: 'tm\.assert_(series|frame)_equal'
+        files: ^pandas/tests/extension/base
+        types: [python]
+        exclude: ^pandas/tests/extension/base/base\.py
     -   id: pip-to-conda
         name: Generate pip dependency from conda
         description: This hook checks if the conda environment.yml and requirements-dev.txt are equal
@@ -136,3 +164,8 @@ repos:
         entry: python scripts/no_bool_in_generic.py
         language: python
         files: ^pandas/core/generic\.py$
+    -   id: pandas-errors-documented
+        name: Ensure pandas errors are documented in doc/source/reference/general_utility_functions.rst
+        entry: python scripts/pandas_errors_documented.py
+        language: python
+        files: ^pandas/errors/__init__.py$
@@ -46,17 +46,14 @@
         "numba": [],
         "numexpr": [],
         "pytables": [null, ""],  // platform dependent, see excludes below
+        "pyarrow": [],
         "tables": [null, ""],
         "openpyxl": [],
         "xlsxwriter": [],
         "xlrd": [],
         "xlwt": [],
         "odfpy": [],
-        "pytest": [],
         "jinja2": [],
-        // If using Windows with python 2.7 and want to build using the
-        // mingw toolchain (rather than MSVC), uncomment the following line.
-        // "libpython": [],
     },
     "conda_channels": ["defaults", "conda-forge"],
     // Combinations of libraries/python versions can be excluded/included
 
@@ -51,9 +51,9 @@ def time_pandas_dtype_invalid(self, dtype):
 class SelectDtypes:
 
     params = [
-        tm.ALL_INT_DTYPES
-        + tm.ALL_EA_INT_DTYPES
-        + tm.FLOAT_DTYPES
+        tm.ALL_INT_NUMPY_DTYPES
+        + tm.ALL_INT_EA_DTYPES
+        + tm.FLOAT_NUMPY_DTYPES
         + tm.COMPLEX_DTYPES
         + tm.DATETIME64_DTYPES
         + tm.TIMEDELTA64_DTYPES
 
@@ -2,6 +2,7 @@
 
 import pandas as pd
 from pandas import (
+    Categorical,
     DataFrame,
     MultiIndex,
     Series,
@@ -31,6 +32,9 @@ def setup(self):
         self.dict_list = frame.to_dict(orient="records")
         self.data2 = {i: {j: float(j) for j in range(100)} for i in range(2000)}
 
+        # arrays which we wont consolidate
+        self.dict_of_categoricals = {i: Categorical(np.arange(N)) for i in range(K)}
+
     def time_list_of_dict(self):
         DataFrame(self.dict_list)
 
@@ -50,6 +54,10 @@ def time_nested_dict_int64(self):
         # nested dict, integer indexes, regression described in #621
         DataFrame(self.data2)
 
+    def time_dict_of_categoricals(self):
+        # dict of arrays that we wont consolidate
+        DataFrame(self.dict_of_categoricals)
+
 
 class FromSeries:
     def setup(self):
 
@@ -538,8 +538,12 @@ class Interpolate:
     def setup(self, downcast):
         N = 10000
         # this is the worst case, where every column has NaNs.
-        self.df = DataFrame(np.random.randn(N, 100))
-        self.df.values[::2] = np.nan
+        arr = np.random.randn(N, 100)
+        # NB: we need to set values in array, not in df.values, otherwise
+        #  the benchmark will be misleading for ArrayManager
+        arr[::2] = np.nan
+
+        self.df = DataFrame(arr)
 
         self.df2 = DataFrame(
             {
 
@@ -403,7 +403,7 @@ def time_srs_bfill(self):
 
 class GroupByMethods:
 
-    param_names = ["dtype", "method", "application"]
+    param_names = ["dtype", "method", "application", "ncols"]
     params = [
         ["int", "float", "object", "datetime", "uint"],
         [
@@ -443,15 +443,23 @@ class GroupByMethods:
             "var",
         ],
         ["direct", "transformation"],
+        [1, 2, 5, 10],
     ]
 
-    def setup(self, dtype, method, application):
+    def setup(self, dtype, method, application, ncols):
         if method in method_blocklist.get(dtype, {}):
             raise NotImplementedError  # skip benchmark
+
+        if ncols != 1 and method in ["value_counts", "unique"]:
+            # DataFrameGroupBy doesn't have these methods
+            raise NotImplementedError
+
         ngroups = 1000
         size = ngroups * 2
-        rng = np.arange(ngroups)
-        values = rng.take(np.random.randint(0, ngroups, size=size))
+        rng = np.arange(ngroups).reshape(-1, 1)
+        rng = np.broadcast_to(rng, (len(rng), ncols))
+        taker = np.random.randint(0, ngroups, size=size)
+        values = rng.take(taker, axis=0)
         if dtype == "int":
             key = np.random.randint(0, size, size=size)
         elif dtype == "uint":
@@ -465,22 +473,27 @@ def setup(self, dtype, method, application):
         elif dtype == "datetime":
             key = date_range("1/1/2011", periods=size, freq="s")
 
-        df = DataFrame({"values": values, "key": key})
+        cols = [f"values{n}" for n in range(ncols)]
+        df = DataFrame(values, columns=cols)
+        df["key"] = key
+
+        if len(cols) == 1:
+            cols = cols[0]
 
         if application == "transform":
             if method == "describe":
                 raise NotImplementedError
 
-            self.as_group_method = lambda: df.groupby("key")["values"].transform(method)
-            self.as_field_method = lambda: df.groupby("values")["key"].transform(method)
+            self.as_group_method = lambda: df.groupby("key")[cols].transform(method)
+            self.as_field_method = lambda: df.groupby(cols)["key"].transform(method)
         else:
-            self.as_group_method = getattr(df.groupby("key")["values"], method)
-            self.as_field_method = getattr(df.groupby("values")["key"], method)
+            self.as_group_method = getattr(df.groupby("key")[cols], method)
+            self.as_field_method = getattr(df.groupby(cols)["key"], method)
 
-    def time_dtype_as_group(self, dtype, method, application):
+    def time_dtype_as_group(self, dtype, method, application, ncols):
         self.as_group_method()
 
-    def time_dtype_as_field(self, dtype, method, application):
+    def time_dtype_as_field(self, dtype, method, application, ncols):
         self.as_field_method()
 
 
 
@@ -366,11 +366,20 @@ class InsertColumns:
     def setup(self):
         self.N = 10 ** 3
         self.df = DataFrame(index=range(self.N))
+        self.df2 = DataFrame(np.random.randn(self.N, 2))
 
     def time_insert(self):
         for i in range(100):
             self.df.insert(0, i, np.random.randn(self.N), allow_duplicates=True)
 
+    def time_insert_middle(self):
+        # same as time_insert but inserting to a middle column rather than
+        #  front or back (which have fast-paths)
+        for i in range(100):
+            self.df2.insert(
+                1, "colname", np.random.randn(self.N), allow_duplicates=True
+            )
+
     def time_assign_with_setitem(self):
         for i in range(100):
             self.df[i] = np.random.randn(self.N)
 
@@ -172,15 +172,19 @@ def time_to_json(self, orient, frame):
     def peakmem_to_json(self, orient, frame):
         getattr(self, frame).to_json(self.fname, orient=orient)
 
-    def time_to_json_wide(self, orient, frame):
+
+class ToJSONWide(ToJSON):
+    def setup(self, orient, frame):
+        super().setup(orient, frame)
         base_df = getattr(self, frame).copy()
-        df = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1)
-        df.to_json(self.fname, orient=orient)
+        df_wide = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1)
+        self.df_wide = df_wide
+
+    def time_to_json_wide(self, orient, frame):
+        self.df_wide.to_json(self.fname, orient=orient)
 
     def peakmem_to_json_wide(self, orient, frame):
-        base_df = getattr(self, frame).copy()
-        df = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1)
-        df.to_json(self.fname, orient=orient)
+        self.df_wide.to_json(self.fname, orient=orient)
 
 
 class ToJSONISO(BaseIO):
 
@@ -36,11 +36,11 @@ def peakmem_classes_render(self, cols, rows):
 
     def time_format_render(self, cols, rows):
         self._style_format()
-        self.st.render()
+        self.st._render_html(True, True)
 
     def peakmem_format_render(self, cols, rows):
         self._style_format()
-        self.st.render()
+        self.st._render_html(True, True)
 
     def _style_apply(self):
         def _apply_func(s):