Skip to content

Commit ac69b50

Browse files
committed
Return column as string if not parsable as numeric
1 parent bf322ab commit ac69b50

File tree

2 files changed

+48
-18
lines changed

2 files changed

+48
-18
lines changed

src/pycps/get_data.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -142,11 +142,16 @@ def _get_data(url: str) -> pd.DataFrame:
142142
def _build_df(raw_data: list[list[str]]) -> pd.DataFrame:
143143
"""Build DataFrame out of parsed response content."""
144144

145-
col_names = [col_name.lower() for col_name in raw_data[0]]
146-
cols = raw_data[1:]
145+
column_names = [column_name.lower() for column_name in raw_data[0]]
146+
rows = raw_data[1:]
147147

148-
df = pd.DataFrame(data=cols, columns=col_names)
149-
df = df.apply(pd.to_numeric)
148+
df = pd.DataFrame(data=rows, columns=column_names)
149+
150+
# Set errors to "ignore" so that if column fails to parse as
151+
# numeric, it will remain a string
152+
# Originally flagged in:
153+
# https://github.com/matt-saenz/PyCPS/pull/3
154+
df = df.apply(pd.to_numeric, errors="ignore")
150155

151156
return df
152157

tests/test_get_data.py

+39-14
Original file line numberDiff line numberDiff line change
@@ -58,20 +58,45 @@ def test_make_url() -> None:
5858
assert actual == expected
5959

6060

61-
def test_build_df() -> None:
62-
raw_data = [
63-
["SOME_COLUMN", "ANOTHER_COLUMN"],
64-
["9", "7.3"],
65-
["5", "1.4"],
66-
]
67-
68-
expected_df = pd.DataFrame(
69-
{
70-
"some_column": [9, 5],
71-
"another_column": [7.3, 1.4],
72-
}
73-
)
74-
61+
@pytest.mark.parametrize(
62+
"raw_data,expected_df",
63+
[
64+
(
65+
# All columns parsable as numeric
66+
[
67+
["SOME_COLUMN", "ANOTHER_COLUMN"],
68+
["9", "7.3"],
69+
["5", "1.4"],
70+
],
71+
pd.DataFrame(
72+
{
73+
"some_column": [9, 5],
74+
"another_column": [7.3, 1.4],
75+
}
76+
),
77+
),
78+
(
79+
# One column not parsable as numeric
80+
# Should remain a string
81+
[
82+
["SOME_COLUMN", "ANOTHER_COLUMN", "STRING_COLUMN"],
83+
["9", "7.3", "a"],
84+
["5", "1.4", "b"],
85+
],
86+
pd.DataFrame(
87+
{
88+
"some_column": [9, 5],
89+
"another_column": [7.3, 1.4],
90+
"string_column": ["a", "b"],
91+
}
92+
),
93+
),
94+
],
95+
)
96+
def test_build_df(
97+
raw_data: list[list[str]],
98+
expected_df: pd.DataFrame,
99+
) -> None:
75100
actual_df = get_data._build_df(raw_data)
76101

77102
pd.testing.assert_frame_equal(actual_df, expected_df)

0 commit comments

Comments
 (0)