From a4057e5649e9d5488f1bc5d2ce8a22e342af6ae2 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Wed, 26 Mar 2025 15:11:43 -0400
Subject: [PATCH 01/19] dev setup

---
 dev_attempts.py      | 39 +++++++++++++++++++++++++++++++
 pandas/core/frame.py | 55 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+)
 create mode 100644 dev_attempts.py

diff --git a/dev_attempts.py b/dev_attempts.py
new file mode 100644
index 0000000000000..b33a08cd82c63
--- /dev/null
+++ b/dev_attempts.py
@@ -0,0 +1,39 @@
+import pandas as pd
+import numpy as np
+import timeit
+np.random.seed(43)
+# also tested for n = 1000, 10_000, 100_000
+n=1_000_000
+cols = list('abcdef')
+df = pd.DataFrame(np.random.randint(0, 10, size=(n,len(cols))), columns=cols)
+df['col'] = np.random.choice(cols, n)
+idx = df['col'].index.to_numpy()
+cols = df['col'].to_numpy()
+
+def og_lookup(idx, cols):
+	return df.lookup(idx, cols,'og')
+
+# def melt_lookup():
+# 	melt = df.melt('col')
+# 	melt = melt.loc[lambda x: x['col']==x['variable'], 'value']
+# 	melt = melt.reset_index(drop=True)
+# 	return melt
+
+# def quan_lookup(idx,cols):
+# 	return df.reindex(cols,axis=1).to_numpy()[np.arange(df.shape[0]), idx]
+
+# def quan_lookup2(idx,cols):
+# 	return df.reindex(cols,axis=1).to_numpy()[np.arange(df.shape[0]), idx]
+
+# def marco_lookup():
+# 	return df.melt('col', ignore_index=False).query('col==variable')['value'].reindex(df.index).to_numpy()
+
+
+timeit.timeit(lambda: og_lookup(idx,cols),number=10)
+# timeit.timeit(lambda: melt_lookup(idx,cols),number=10)
+# timeit.timeit(lambda: quan_lookup(idx,cols),number=10)
+# timeit.timeit(lambda: quan_lookup2(idx,cols),number=10)
+# timeit.timeit(lambda: marco_lookup(idx,cols),number=10)
+
+# idx, cols = pd.factorize(df['col'])
+# df.reindex(cols, axis=1).to_numpy()[np.arange(len(df)), idx]
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8f65277f660f7..11e7be46e1ee9 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -101,6 +101,7 @@
     is_integer_dtype,
     is_iterator,
     is_list_like,
+    is_object_dtype,
     is_scalar,
     is_sequence,
     needs_i8_conversion,
@@ -5135,6 +5136,60 @@ def _series(self):
     # ----------------------------------------------------------------------
     # Reindexing and alignment
 
+    def lookup(self, row_labels, col_labels, dev_version) -> np.ndarray:
+        """
+        Label-based "fancy indexing" function for DataFrame.
+
+
+        Given equal-length arrays of row and column labels, return an
+        array of the values corresponding to each (row, col) pair.
+
+
+        Parameters
+        ----------
+        row_labels : sequence
+            The row labels to use for lookup.
+        col_labels : sequence
+            The column labels to use for lookup.
+
+
+        Returns
+        -------
+        numpy.ndarray
+            The found values.
+        """
+        n = len(row_labels)
+        if n != len(col_labels):
+            raise ValueError("Row labels must have same size as column labels")
+        if not (self.index.is_unique and self.columns.is_unique):
+            # GH#33041
+            raise ValueError("DataFrame.lookup requires unique index and columns")
+
+
+        thresh = 1000
+        if not self._is_mixed_type or n > thresh:
+            values = self.values
+            ridx = self.index.get_indexer(row_labels)
+            cidx = self.columns.get_indexer(col_labels)
+            if (ridx == -1).any():
+                raise KeyError("One or more row labels was not found")
+            if (cidx == -1).any():
+                raise KeyError("One or more column labels was not found")
+            flat_index = ridx * len(self.columns) + cidx
+            result = values.flat[flat_index]
+        else:
+            if dev_version=='og':
+                result = np.empty(n, dtype="O")
+                for i, (r, c) in enumerate(zip(row_labels, col_labels)):
+                    result[i] = self._get_value(r, c)
+
+
+        if is_object_dtype(result):
+            result = lib.maybe_convert_objects(result)
+
+
+        return result
+
     def _reindex_multi(self, axes: dict[str, Index], fill_value) -> DataFrame:
         """
         We are guaranteed non-Nones in the axes.

From 0f5ad86223425eff63e73cd89c7c66155fe67a45 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Wed, 26 Mar 2025 16:32:41 -0400
Subject: [PATCH 02/19] Update dev_attempts.py

---
 dev_attempts.py | 54 +++++++++++++++++--------------------------------
 1 file changed, 19 insertions(+), 35 deletions(-)

diff --git a/dev_attempts.py b/dev_attempts.py
index b33a08cd82c63..833cfa5542271 100644
--- a/dev_attempts.py
+++ b/dev_attempts.py
@@ -2,38 +2,22 @@
 import numpy as np
 import timeit
 np.random.seed(43)
-# also tested for n = 1000, 10_000, 100_000
-n=1_000_000
-cols = list('abcdef')
-df = pd.DataFrame(np.random.randint(0, 10, size=(n,len(cols))), columns=cols)
-df['col'] = np.random.choice(cols, n)
-idx = df['col'].index.to_numpy()
-cols = df['col'].to_numpy()
-
-def og_lookup(idx, cols):
-	return df.lookup(idx, cols,'og')
-
-# def melt_lookup():
-# 	melt = df.melt('col')
-# 	melt = melt.loc[lambda x: x['col']==x['variable'], 'value']
-# 	melt = melt.reset_index(drop=True)
-# 	return melt
-
-# def quan_lookup(idx,cols):
-# 	return df.reindex(cols,axis=1).to_numpy()[np.arange(df.shape[0]), idx]
-
-# def quan_lookup2(idx,cols):
-# 	return df.reindex(cols,axis=1).to_numpy()[np.arange(df.shape[0]), idx]
-
-# def marco_lookup():
-# 	return df.melt('col', ignore_index=False).query('col==variable')['value'].reindex(df.index).to_numpy()
-
-
-timeit.timeit(lambda: og_lookup(idx,cols),number=10)
-# timeit.timeit(lambda: melt_lookup(idx,cols),number=10)
-# timeit.timeit(lambda: quan_lookup(idx,cols),number=10)
-# timeit.timeit(lambda: quan_lookup2(idx,cols),number=10)
-# timeit.timeit(lambda: marco_lookup(idx,cols),number=10)
-
-# idx, cols = pd.factorize(df['col'])
-# df.reindex(cols, axis=1).to_numpy()[np.arange(len(df)), idx]
+for n in [100,100_000]:
+	cols = list('abcdef')
+	df = pd.DataFrame(np.random.randint(0, 10, size=(n,len(cols))), columns=cols)
+	df['col'] = np.random.choice(cols, n)
+	idx = df['col'].index.to_numpy()
+	cols = df['col'].to_numpy()
+	timeit.timeit(lambda: df.lookup(idx, cols,'og'),number=10)
+	timeit.timeit(lambda: df.lookup(idx, cols,'a'),number=10)
+	timeit.timeit(lambda: df.lookup(idx, cols,'b'),number=10)
+	timeit.timeit(lambda: df.lookup(idx, cols,'c'),number=10)
+	df['a'] = df['a'].astype(str)
+	df['a'] = 'a'
+	print('mixed')
+	timeit.timeit(lambda: df.lookup(idx, cols,'og'),number=10)
+	timeit.timeit(lambda: df.lookup(idx, cols,'a'),number=10)
+	timeit.timeit(lambda: df.lookup(idx, cols,'b'),number=10)
+	timeit.timeit(lambda: df.lookup(idx, cols,'c'),number=10)
+	df.lookup(idx, cols,'b')
+	print('\n')
\ No newline at end of file

From 7e301815952e843ab53d32a1bc8fa458e863b243 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Wed, 26 Mar 2025 16:33:04 -0400
Subject: [PATCH 03/19] removed mixed type and threshold

---
 pandas/core/frame.py | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 11e7be46e1ee9..e248a87b2a183 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5165,29 +5165,19 @@ def lookup(self, row_labels, col_labels, dev_version) -> np.ndarray:
             # GH#33041
             raise ValueError("DataFrame.lookup requires unique index and columns")
 
-
-        thresh = 1000
-        if not self._is_mixed_type or n > thresh:
-            values = self.values
-            ridx = self.index.get_indexer(row_labels)
-            cidx = self.columns.get_indexer(col_labels)
-            if (ridx == -1).any():
-                raise KeyError("One or more row labels was not found")
-            if (cidx == -1).any():
-                raise KeyError("One or more column labels was not found")
-            flat_index = ridx * len(self.columns) + cidx
-            result = values.flat[flat_index]
-        else:
-            if dev_version=='og':
-                result = np.empty(n, dtype="O")
-                for i, (r, c) in enumerate(zip(row_labels, col_labels)):
-                    result[i] = self._get_value(r, c)
-
+        values = self.to_numpy()
+        ridx = self.index.get_indexer(row_labels)
+        cidx = self.columns.get_indexer(col_labels)
+        if (ridx == -1).any():
+            raise KeyError("One or more row labels was not found")
+        if (cidx == -1).any():
+            raise KeyError("One or more column labels was not found")
+        flat_index = ridx * len(self.columns) + cidx
+        result = values.flat[flat_index]
 
         if is_object_dtype(result):
             result = lib.maybe_convert_objects(result)
 
-
         return result
 
     def _reindex_multi(self, axes: dict[str, Index], fill_value) -> DataFrame:

From 6fed58dc1cb83a9fdd89fc518d9eaf84fdc40c01 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Wed, 26 Mar 2025 16:41:19 -0400
Subject: [PATCH 04/19] Delete dev_attempts.py

---
 dev_attempts.py | 23 -----------------------
 1 file changed, 23 deletions(-)
 delete mode 100644 dev_attempts.py

diff --git a/dev_attempts.py b/dev_attempts.py
deleted file mode 100644
index 833cfa5542271..0000000000000
--- a/dev_attempts.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import pandas as pd
-import numpy as np
-import timeit
-np.random.seed(43)
-for n in [100,100_000]:
-	cols = list('abcdef')
-	df = pd.DataFrame(np.random.randint(0, 10, size=(n,len(cols))), columns=cols)
-	df['col'] = np.random.choice(cols, n)
-	idx = df['col'].index.to_numpy()
-	cols = df['col'].to_numpy()
-	timeit.timeit(lambda: df.lookup(idx, cols,'og'),number=10)
-	timeit.timeit(lambda: df.lookup(idx, cols,'a'),number=10)
-	timeit.timeit(lambda: df.lookup(idx, cols,'b'),number=10)
-	timeit.timeit(lambda: df.lookup(idx, cols,'c'),number=10)
-	df['a'] = df['a'].astype(str)
-	df['a'] = 'a'
-	print('mixed')
-	timeit.timeit(lambda: df.lookup(idx, cols,'og'),number=10)
-	timeit.timeit(lambda: df.lookup(idx, cols,'a'),number=10)
-	timeit.timeit(lambda: df.lookup(idx, cols,'b'),number=10)
-	timeit.timeit(lambda: df.lookup(idx, cols,'c'),number=10)
-	df.lookup(idx, cols,'b')
-	print('\n')
\ No newline at end of file

From 8156c42b6b02b65ef8f65b3cad5f820533ca0af6 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Wed, 26 Mar 2025 16:57:42 -0400
Subject: [PATCH 05/19] Update indexing.rst

---
 doc/source/user_guide/indexing.rst | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
index ed5c7806b2e23..1a28589621acd 100644
--- a/doc/source/user_guide/indexing.rst
+++ b/doc/source/user_guide/indexing.rst
@@ -1456,24 +1456,15 @@ default value.
 
 .. _indexing.lookup:
 
-Looking up values by index/column labels
+The :meth:`~pandas.DataFrame.lookup` method
 ----------------------------------------
+ Sometimes you want to extract a set of values given a sequence of row labels
+ and column labels, and the ``lookup`` method allows for this and returns a
+ NumPy array.  For instance:
 
-Sometimes you want to extract a set of values given a sequence of row labels
-and column labels, this can be achieved by ``pandas.factorize``  and NumPy indexing.
-For instance:
-
-.. ipython:: python
-
-    df = pd.DataFrame({'col': ["A", "A", "B", "B"],
-                       'A': [80, 23, np.nan, 22],
-                       'B': [80, 55, 76, 67]})
-    df
-    idx, cols = pd.factorize(df['col'])
-    df.reindex(cols, axis=1).to_numpy()[np.arange(len(df)), idx]
-
-Formerly this could be achieved with the dedicated ``DataFrame.lookup`` method
-which was deprecated in version 1.2.0 and removed in version 2.0.0.
+ .. ipython:: python
+   dflookup = pd.DataFrame(np.random.rand(20, 4), columns = ['A', 'B', 'C', 'D'])
+   dflookup.lookup(list(range(0, 10, 2)), ['B', 'C', 'A', 'B', 'D'])
 
 .. _indexing.class:
 

From c17a020f494a4595f39b2f96e11a315a77a44b28 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Wed, 26 Mar 2025 17:09:48 -0400
Subject: [PATCH 06/19] bringing tests back from 1.1.x

---
 pandas/tests/frame/indexing/test_indexing.py | 66 ++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 0c99b08cb30c4..89bb60f3cc114 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -1414,6 +1414,72 @@ def test_loc_named_tuple_for_midx(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_lookup_float(self, float_frame):
+        df = float_frame
+        rows = list(df.index) * len(df.columns)
+        cols = list(df.columns) * len(df.index)
+        result = df.lookup(rows, cols)
+
+        expected = np.array([df.loc[r, c] for r, c in zip(rows, cols)])
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_lookup_mixed(self, float_string_frame):
+        df = float_string_frame
+        rows = list(df.index) * len(df.columns)
+        cols = list(df.columns) * len(df.index)
+        result = df.lookup(rows, cols)
+
+        expected = np.array(
+            [df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_
+        )
+        tm.assert_almost_equal(result, expected)
+
+    def test_lookup_bool(self):
+        df = DataFrame(
+            {
+                "label": ["a", "b", "a", "c"],
+                "mask_a": [True, True, False, True],
+                "mask_b": [True, False, False, False],
+                "mask_c": [False, True, False, True],
+            }
+        )
+        df["mask"] = df.lookup(df.index, "mask_" + df["label"])
+
+        exp_mask = np.array(
+            [df.loc[r, c] for r, c in zip(df.index, "mask_" + df["label"])]
+        )
+
+        tm.assert_series_equal(df["mask"], Series(exp_mask, name="mask"))
+        assert df["mask"].dtype == np.bool_
+
+    def test_lookup_raises(self, float_frame):
+        with pytest.raises(KeyError, match="'One or more row labels was not found'"):
+            float_frame.lookup(["xyz"], ["A"])
+
+        with pytest.raises(KeyError, match="'One or more column labels was not found'"):
+            float_frame.lookup([float_frame.index[0]], ["xyz"])
+
+        with pytest.raises(ValueError, match="same size"):
+            float_frame.lookup(["a", "b", "c"], ["a"])
+
+    def test_lookup_requires_unique_axes(self):
+        # GH#33041 raise with a helpful error message
+        df = DataFrame(np.random.Generator(6).reshape(3, 2), columns=["A", "A"])
+
+        rows = [0, 1]
+        cols = ["A", "A"]
+
+        # homogeneous-dtype case
+        with pytest.raises(ValueError, match="requires unique index and columns"):
+            df.lookup(rows, cols)
+        with pytest.raises(ValueError, match="requires unique index and columns"):
+            df.T.lookup(cols, rows)
+
+        # heterogeneous dtype
+        df["B"] = 0
+        with pytest.raises(ValueError, match="requires unique index and columns"):
+            df.lookup(rows, cols)
+
     @pytest.mark.parametrize("indexer", [["a"], "a"])
     @pytest.mark.parametrize("col", [{}, {"b": 1}])
     def test_set_2d_casting_date_to_int(self, col, indexer):

From 4a0b85605d8e5e808caba5b805c5fe2333b3dd5d Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Wed, 26 Mar 2025 17:30:57 -0400
Subject: [PATCH 07/19] extend underline

---
 doc/source/user_guide/indexing.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
index 1a28589621acd..a3f7e03f692b4 100644
--- a/doc/source/user_guide/indexing.rst
+++ b/doc/source/user_guide/indexing.rst
@@ -1457,7 +1457,7 @@ default value.
 .. _indexing.lookup:
 
 The :meth:`~pandas.DataFrame.lookup` method
-----------------------------------------
+-------------------------------------------
  Sometimes you want to extract a set of values given a sequence of row labels
  and column labels, and the ``lookup`` method allows for this and returns a
  NumPy array.  For instance:

From e0b0b57ffcc5fe2031777d7b2054e9bb0c146246 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Wed, 26 Mar 2025 17:34:20 -0400
Subject: [PATCH 08/19] spacing

---
 doc/source/user_guide/indexing.rst | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
index a3f7e03f692b4..be49f95fed26a 100644
--- a/doc/source/user_guide/indexing.rst
+++ b/doc/source/user_guide/indexing.rst
@@ -1458,11 +1458,13 @@ default value.
 
 The :meth:`~pandas.DataFrame.lookup` method
 -------------------------------------------
- Sometimes you want to extract a set of values given a sequence of row labels
- and column labels, and the ``lookup`` method allows for this and returns a
- NumPy array.  For instance:
 
- .. ipython:: python
+Sometimes you want to extract a set of values given a sequence of row labels
+and column labels, and the ``lookup`` method allows for this and returns a
+NumPy array.  For instance:
+
+.. ipython:: python
+
    dflookup = pd.DataFrame(np.random.rand(20, 4), columns = ['A', 'B', 'C', 'D'])
    dflookup.lookup(list(range(0, 10, 2)), ['B', 'C', 'A', 'B', 'D'])
 

From 2a6dfaef0178feb176f3a0ca6a87f9126ec378b1 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Wed, 26 Mar 2025 17:48:03 -0400
Subject: [PATCH 09/19] remove dev_version

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e248a87b2a183..e5820a8b533e8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5136,7 +5136,7 @@ def _series(self):
     # ----------------------------------------------------------------------
     # Reindexing and alignment
 
-    def lookup(self, row_labels, col_labels, dev_version) -> np.ndarray:
+    def lookup(self, row_labels, col_labels) -> np.ndarray:
         """
         Label-based "fancy indexing" function for DataFrame.
 

From 21280ed6df9a18f4bde74d7350af8c0d713a0756 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Wed, 26 Mar 2025 18:13:29 -0400
Subject: [PATCH 10/19] fixed test_lookup_requires_unique_axes

np.random.Generator.random, not np.random.Generator
---
 pandas/tests/frame/indexing/test_indexing.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 89bb60f3cc114..f79475b770302 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -1464,7 +1464,9 @@ def test_lookup_raises(self, float_frame):
 
     def test_lookup_requires_unique_axes(self):
         # GH#33041 raise with a helpful error message
-        df = DataFrame(np.random.Generator(6).reshape(3, 2), columns=["A", "A"])
+        df = DataFrame(
+            np.random.default_rng(2).standard_normal((3, 2)), columns=["A", "A"]
+        )
 
         rows = [0, 1]
         cols = ["A", "A"]

From 48f1cde41abe811bf7b88a993cbeea30b237e34a Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Thu, 27 Mar 2025 12:19:02 -0400
Subject: [PATCH 11/19] Reduce columns to those in lookup

---
 pandas/core/frame.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e5820a8b533e8..7c864b36fea61 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5165,14 +5165,20 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
             # GH#33041
             raise ValueError("DataFrame.lookup requires unique index and columns")
 
-        values = self.to_numpy()
         ridx = self.index.get_indexer(row_labels)
         cidx = self.columns.get_indexer(col_labels)
         if (ridx == -1).any():
             raise KeyError("One or more row labels was not found")
         if (cidx == -1).any():
             raise KeyError("One or more column labels was not found")
-        flat_index = ridx * len(self.columns) + cidx
+        if len(set(col_labels)) < len(self.columns):
+            sub = self.take(np.unique(cidx), axis=1)
+            values = sub.to_numpy()
+            cidx = sub.columns.get_indexer(col_labels)
+            flat_index = ridx * len(sub.columns) + cidx
+        else:
+            values = self.to_numpy()
+            flat_index = ridx * len(self.columns) + cidx
         result = values.flat[flat_index]
 
         if is_object_dtype(result):

From 9c060a8d2b0f585991bc0957fe2c30756b90aa28 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Thu, 27 Mar 2025 14:39:09 -0400
Subject: [PATCH 12/19] Update frame.py

---
 pandas/core/frame.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e5820a8b533e8..a59bdfbf1a76e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5165,14 +5165,22 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
             # GH#33041
             raise ValueError("DataFrame.lookup requires unique index and columns")
 
-        values = self.to_numpy()
+        # values = self.to_numpy()
         ridx = self.index.get_indexer(row_labels)
         cidx = self.columns.get_indexer(col_labels)
         if (ridx == -1).any():
             raise KeyError("One or more row labels was not found")
         if (cidx == -1).any():
             raise KeyError("One or more column labels was not found")
-        flat_index = ridx * len(self.columns) + cidx
+
+        sub = self.take(np.unique(cidx), axis=1)
+        if sub._is_mixed_type:
+            sub = sub.take(np.unique(ridx), axis=0)
+            ridx = sub.index.get_indexer(row_labels)
+        values = sub.to_numpy()
+        cidx = sub.columns.get_indexer(col_labels)
+        flat_index = ridx * len(sub.columns) + cidx
+
         result = values.flat[flat_index]
 
         if is_object_dtype(result):

From 4e0c17ff5681be4c64de7fa76b338b36ddd5b3a8 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Fri, 28 Mar 2025 15:46:29 -0400
Subject: [PATCH 13/19] one line to separate sections

---
 pandas/core/frame.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index fd8479906796b..a6f4990defa50 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5140,11 +5140,9 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
         """
         Label-based "fancy indexing" function for DataFrame.
 
-
         Given equal-length arrays of row and column labels, return an
         array of the values corresponding to each (row, col) pair.
 
-
         Parameters
         ----------
         row_labels : sequence
@@ -5152,7 +5150,6 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
         col_labels : sequence
             The column labels to use for lookup.
 
-
         Returns
         -------
         numpy.ndarray

From a5e379b958beeff26cb89f5a4b2e6365008f7c47 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Fri, 28 Mar 2025 15:49:19 -0400
Subject: [PATCH 14/19] Update v3.0.0.rst

pandas.DataFrame.lookup
---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index da2a9bdada469..cb25079cee917 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -30,6 +30,7 @@ Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
 - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
+- :meth:`pandas.DataFrame.lookup` returns with optimizations for looking up values by list of row/column pairs  (:issue:`40140`)
 - :meth:`pandas.api.interchange.from_dataframe` now uses the `PyCapsule Interface <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html>`_ if available, only falling back to the Dataframe Interchange Protocol if that fails (:issue:`60739`)
 - Added :meth:`.Styler.to_typst` to write Styler objects to file, buffer or string in Typst format (:issue:`57617`)
 - Added missing :meth:`pandas.Series.info` to API reference (:issue:`60926`)

From 47e0b1bcb66262aadfb0661ea066e3ba7372cda3 Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Fri, 28 Mar 2025 17:05:39 -0400
Subject: [PATCH 15/19] Adding an example

---
 pandas/core/frame.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a6f4990defa50..56545368a3800 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5154,6 +5154,23 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
         -------
         numpy.ndarray
             The found values.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "Math_Sem1": [85, 92, 78, 88, 95],
+        ...         "Math_Sem2": [88, 90, 82, 85, 93],
+        ...         "Science_Sem1": [90, 85, 92, 79, 87],
+        ...         "Science_Sem2": [92, 87, 90, 83, 89],
+        ...         "English_Sem1": [95, 80, 85, 90, 82],
+        ...         "English_Sem2": [93, 82, 87, 88, 80],
+        ...     },
+        ...     index=["Alice", "Bob", "Charlie", "David", "Eve"],
+        ... )
+        >>> student_top = df.rank(1).idxmax(1)  # Column name for student's top score
+        >>> df.lookup(df.index, student_top)
+        array([95, 92, 92, 90, 95])
         """
         n = len(row_labels)
         if n != len(col_labels):

From 0c04e97d6b49f9c0e0dc0db6019c3d501f1e178b Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Fri, 28 Mar 2025 17:16:40 -0400
Subject: [PATCH 16/19] Update frame.py

expanded example
---
 pandas/core/frame.py | 57 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 56545368a3800..1a4acf1e95efd 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5157,7 +5157,7 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
 
         Examples
         --------
-        >>> df = pd.DataFrame(
+        >>> grades = pd.DataFrame(
         ...     {
         ...         "Math_Sem1": [85, 92, 78, 88, 95],
         ...         "Math_Sem2": [88, 90, 82, 85, 93],
@@ -5168,9 +5168,58 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
         ...     },
         ...     index=["Alice", "Bob", "Charlie", "David", "Eve"],
         ... )
-        >>> student_top = df.rank(1).idxmax(1)  # Column name for student's top score
-        >>> df.lookup(df.index, student_top)
-        array([95, 92, 92, 90, 95])
+        >>> feedback = pd.DataFrame(
+        ...     {
+        ...         "Math_Sem1": [
+        ...             "Strong analytical skills",
+        ...             "Excellent problem-solving",
+        ...             "Needs more practice",
+        ...             "Solid understanding",
+        ...             "Exceptional reasoning",
+        ...         ],
+        ...         "Math_Sem2": [
+        ...             "Improved advanced techniques",
+        ...             "Consistent high performance",
+        ...             "Significant progress",
+        ...             "Steady improvement",
+        ...             "Consistently exceptional",
+        ...         ],
+        ...         "Science_Sem1": [
+        ...             "Excellent inquiry skills",
+        ...             "Good theoretical concepts",
+        ...             "Strong methodological interest",
+        ...             "Needs focus",
+        ...             "Outstanding curiosity",
+        ...         ],
+        ...         "Science_Sem2": [
+        ...             "Advanced scientific principles",
+        ...             "Improved practical skills",
+        ...             "Growing scientific reasoning",
+        ...             "Better lab engagement",
+        ...             "Continued excellence",
+        ...         ],
+        ...         "English_Sem1": [
+        ...             "Exceptional writing",
+        ...             "Strong language use",
+        ...             "Needs confident expression",
+        ...             "Solid literary analysis",
+        ...             "Creative insights",
+        ...         ],
+        ...         "English_Sem2": [
+        ...             "Refined writing techniques",
+        ...             "Improved expression",
+        ...             "More confident analysis",
+        ...             "Developing writing style",
+        ...             "Maintained high-level writing",
+        ...         ],
+        ...     },
+        ...     index=["Alice", "Bob", "Charlie", "David", "Eve"],
+        ... )
+        >>> student_top = grades.rank(1).idxmax(1)  #  student's top score
+        >>> feedback.lookup(student_top.index, student_top)
+        array(['Exceptional writing', 'Excellent problem-solving',
+               'Strong methodological interest', 'Solid literary analysis',
+               'Exceptional reasoning'], dtype=object)
         """
         n = len(row_labels)
         if n != len(col_labels):

From 7d6dea53e63574fbc5a96530fdee1c53956939ca Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Mon, 31 Mar 2025 13:03:09 -0400
Subject: [PATCH 17/19] shorter example

---
 pandas/core/frame.py | 44 ++++++--------------------------------------
 1 file changed, 6 insertions(+), 38 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1a4acf1e95efd..cda775258322a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5159,66 +5159,34 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
         --------
         >>> grades = pd.DataFrame(
         ...     {
-        ...         "Math_Sem1": [85, 92, 78, 88, 95],
-        ...         "Math_Sem2": [88, 90, 82, 85, 93],
-        ...         "Science_Sem1": [90, 85, 92, 79, 87],
-        ...         "Science_Sem2": [92, 87, 90, 83, 89],
-        ...         "English_Sem1": [95, 80, 85, 90, 82],
-        ...         "English_Sem2": [93, 82, 87, 88, 80],
+        ...         "Math": [85, 92, 78, 88, 95],
+        ...         "Science": [90, 85, 92, 79, 87],
         ...     },
         ...     index=["Alice", "Bob", "Charlie", "David", "Eve"],
         ... )
         >>> feedback = pd.DataFrame(
         ...     {
-        ...         "Math_Sem1": [
+        ...         "Math": [
         ...             "Strong analytical skills",
         ...             "Excellent problem-solving",
         ...             "Needs more practice",
         ...             "Solid understanding",
         ...             "Exceptional reasoning",
         ...         ],
-        ...         "Math_Sem2": [
-        ...             "Improved advanced techniques",
-        ...             "Consistent high performance",
-        ...             "Significant progress",
-        ...             "Steady improvement",
-        ...             "Consistently exceptional",
-        ...         ],
-        ...         "Science_Sem1": [
+        ...         "Science": [
         ...             "Excellent inquiry skills",
         ...             "Good theoretical concepts",
         ...             "Strong methodological interest",
         ...             "Needs focus",
         ...             "Outstanding curiosity",
         ...         ],
-        ...         "Science_Sem2": [
-        ...             "Advanced scientific principles",
-        ...             "Improved practical skills",
-        ...             "Growing scientific reasoning",
-        ...             "Better lab engagement",
-        ...             "Continued excellence",
-        ...         ],
-        ...         "English_Sem1": [
-        ...             "Exceptional writing",
-        ...             "Strong language use",
-        ...             "Needs confident expression",
-        ...             "Solid literary analysis",
-        ...             "Creative insights",
-        ...         ],
-        ...         "English_Sem2": [
-        ...             "Refined writing techniques",
-        ...             "Improved expression",
-        ...             "More confident analysis",
-        ...             "Developing writing style",
-        ...             "Maintained high-level writing",
-        ...         ],
         ...     },
         ...     index=["Alice", "Bob", "Charlie", "David", "Eve"],
         ... )
         >>> student_top = grades.rank(1).idxmax(1)  #  student's top score
         >>> feedback.lookup(student_top.index, student_top)
-        array(['Exceptional writing', 'Excellent problem-solving',
-               'Strong methodological interest', 'Solid literary analysis',
+        array(['Excellent inquiry skills', 'Excellent problem-solving',
+               'Strong methodological interest', 'Solid understanding',
                'Exceptional reasoning'], dtype=object)
         """
         n = len(row_labels)

From 50183653f7f08092934a9949629ec02758deeb7e Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Mon, 31 Mar 2025 15:20:41 -0400
Subject: [PATCH 18/19] Update frame.py

potential compromise
---
 pandas/core/frame.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index cda775258322a..38c6e2a7524ca 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5136,7 +5136,7 @@ def _series(self):
     # ----------------------------------------------------------------------
     # Reindexing and alignment
 
-    def lookup(self, row_labels, col_labels) -> np.ndarray:
+    def lookup(self, row_labels, col_labels) -> ExtensionArray | np.ndarray:
         """
         Label-based "fancy indexing" function for DataFrame.
 
@@ -5207,7 +5207,7 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
         if sub._is_mixed_type:
             sub = sub.take(np.unique(ridx), axis=0)
             ridx = sub.index.get_indexer(row_labels)
-        values = sub.to_numpy()
+        values = sub.values
         cidx = sub.columns.get_indexer(col_labels)
         flat_index = ridx * len(sub.columns) + cidx
 

From 6aa621897ab9db000a3758cd46d52f4cb41ca80c Mon Sep 17 00:00:00 2001
From: stevenae <stevenalonzoellis@gmail.com>
Date: Mon, 31 Mar 2025 16:29:04 -0400
Subject: [PATCH 19/19] rewrite to preserve types

---
 pandas/core/frame.py                         | 15 +++++----------
 pandas/tests/frame/indexing/test_indexing.py | 20 ++++++++++----------
 2 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 38c6e2a7524ca..69106d70ccf1b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -101,7 +101,6 @@
     is_integer_dtype,
     is_iterator,
     is_list_like,
-    is_object_dtype,
     is_scalar,
     is_sequence,
     needs_i8_conversion,
@@ -5204,17 +5203,13 @@ def lookup(self, row_labels, col_labels) -> ExtensionArray | np.ndarray:
             raise KeyError("One or more column labels was not found")
 
         sub = self.take(np.unique(cidx), axis=1)
-        if sub._is_mixed_type:
-            sub = sub.take(np.unique(ridx), axis=0)
-            ridx = sub.index.get_indexer(row_labels)
-        values = sub.values
+        sub = sub.take(np.unique(ridx), axis=0)
+        ridx = sub.index.get_indexer(row_labels)
+        values = sub.melt()["value"]
         cidx = sub.columns.get_indexer(col_labels)
-        flat_index = ridx * len(sub.columns) + cidx
+        flat_index = ridx + cidx * len(sub)
 
-        result = values.flat[flat_index]
-
-        if is_object_dtype(result):
-            result = lib.maybe_convert_objects(result)
+        result = values[flat_index]
 
         return result
 
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index f79475b770302..bb924acb2bf4e 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -1420,8 +1420,8 @@ def test_lookup_float(self, float_frame):
         cols = list(df.columns) * len(df.index)
         result = df.lookup(rows, cols)
 
-        expected = np.array([df.loc[r, c] for r, c in zip(rows, cols)])
-        tm.assert_numpy_array_equal(result, expected)
+        expected = Series([df.loc[r, c] for r, c in zip(rows, cols)])
+        tm.assert_series_equal(result, expected, check_index=False, check_names=False)
 
     def test_lookup_mixed(self, float_string_frame):
         df = float_string_frame
@@ -1429,10 +1429,8 @@ def test_lookup_mixed(self, float_string_frame):
         cols = list(df.columns) * len(df.index)
         result = df.lookup(rows, cols)
 
-        expected = np.array(
-            [df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_
-        )
-        tm.assert_almost_equal(result, expected)
+        expected = Series([df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_)
+        tm.assert_series_equal(result, expected, check_index=False, check_names=False)
 
     def test_lookup_bool(self):
         df = DataFrame(
@@ -1443,14 +1441,16 @@ def test_lookup_bool(self):
                 "mask_c": [False, True, False, True],
             }
         )
-        df["mask"] = df.lookup(df.index, "mask_" + df["label"])
+        df_mask = df.lookup(df.index, "mask_" + df["label"])
 
-        exp_mask = np.array(
+        exp_mask = Series(
             [df.loc[r, c] for r, c in zip(df.index, "mask_" + df["label"])]
         )
 
-        tm.assert_series_equal(df["mask"], Series(exp_mask, name="mask"))
-        assert df["mask"].dtype == np.bool_
+        tm.assert_series_equal(
+            df_mask, Series(exp_mask, name="mask"), check_index=False, check_names=False
+        )
+        assert df_mask.dtype == np.bool_
 
     def test_lookup_raises(self, float_frame):
         with pytest.raises(KeyError, match="'One or more row labels was not found'"):