Return column as string if not parsable as numeric

matt-saenz · matt-saenz · commit ac69b50605aa · 2023-10-06T11:42:30.000-05:00
diff --git a/src/pycps/get_data.py b/src/pycps/get_data.py
@@ -142,11 +142,16 @@ def _get_data(url: str) -> pd.DataFrame:
 def _build_df(raw_data: list[list[str]]) -> pd.DataFrame:
     """Build DataFrame out of parsed response content."""
 
-    col_names = [col_name.lower() for col_name in raw_data[0]]
-    cols = raw_data[1:]
+    column_names = [column_name.lower() for column_name in raw_data[0]]
+    rows = raw_data[1:]
 
-    df = pd.DataFrame(data=cols, columns=col_names)
-    df = df.apply(pd.to_numeric)
+    df = pd.DataFrame(data=rows, columns=column_names)
+
+    # Set errors to "ignore" so that if column fails to parse as
+    # numeric, it will remain a string
+    # Originally flagged in:
+    # https://github.com/matt-saenz/PyCPS/pull/3
+    df = df.apply(pd.to_numeric, errors="ignore")
 
     return df
 
diff --git a/tests/test_get_data.py b/tests/test_get_data.py
@@ -58,20 +58,45 @@ def test_make_url() -> None:
     assert actual == expected
 
 
-def test_build_df() -> None:
-    raw_data = [
-        ["SOME_COLUMN", "ANOTHER_COLUMN"],
-        ["9", "7.3"],
-        ["5", "1.4"],
-    ]
-
-    expected_df = pd.DataFrame(
-        {
-            "some_column": [9, 5],
-            "another_column": [7.3, 1.4],
-        }
-    )
-
+@pytest.mark.parametrize(
+    "raw_data,expected_df",
+    [
+        (
+            # All columns parsable as numeric
+            [
+                ["SOME_COLUMN", "ANOTHER_COLUMN"],
+                ["9", "7.3"],
+                ["5", "1.4"],
+            ],
+            pd.DataFrame(
+                {
+                    "some_column": [9, 5],
+                    "another_column": [7.3, 1.4],
+                }
+            ),
+        ),
+        (
+            # One column not parsable as numeric
+            # Should remain a string
+            [
+                ["SOME_COLUMN", "ANOTHER_COLUMN", "STRING_COLUMN"],
+                ["9", "7.3", "a"],
+                ["5", "1.4", "b"],
+            ],
+            pd.DataFrame(
+                {
+                    "some_column": [9, 5],
+                    "another_column": [7.3, 1.4],
+                    "string_column": ["a", "b"],
+                }
+            ),
+        ),
+    ],
+)
+def test_build_df(
+    raw_data: list[list[str]],
+    expected_df: pd.DataFrame,
+) -> None:
     actual_df = get_data._build_df(raw_data)
 
     pd.testing.assert_frame_equal(actual_df, expected_df)