Skip to content

Commit f03ff7c

Browse files
committed
To_csv works, need to implement read_csv
1 parent 4cf2399 commit f03ff7c

File tree

5 files changed

+60
-27
lines changed

5 files changed

+60
-27
lines changed

Diff for: pandas/core/generic.py

+1
Original file line numberDiff line numberDiff line change
@@ -3939,6 +3939,7 @@ def to_csv(
39393939
doublequote=doublequote,
39403940
escapechar=escapechar,
39413941
storage_options=storage_options,
3942+
preserve_complex=preserve_complex,
39423943
)
39433944

39443945
# ----------------------------------------------------------------------

Diff for: pandas/io/formats/csvs.py

+11-12
Original file line numberDiff line numberDiff line change
@@ -100,24 +100,23 @@ def __init__(
100100
self.cols = self._initialize_columns(cols)
101101
self.chunksize = self._initialize_chunksize(chunksize)
102102

103+
print("output preserve var: ", self.preserve_complex)
103104
if self.preserve_complex:
105+
print("here1")
104106
import json
105107
import numpy as np
106108

107109
for col in self.obj.columns:
108110
if self.obj[col].dtype == "O":
109-
try:
110-
first_val = self.obj[col].iloc[0]
111-
if isinstance(first_val, (np.ndarray, list)):
112-
self.obj[col] = self.obj[col].apply(
113-
lambda x: json.dumps(x.tolist())
114-
if isinstance(x, np.ndarray)
115-
else json.dumps(x)
116-
if isinstance(x, list)
117-
else x
118-
)
119-
except Exception:
120-
continue
111+
first_val = self.obj[col].iloc[0]
112+
if isinstance(first_val, (np.ndarray, list)):
113+
self.obj[col] = self.obj[col].apply(
114+
lambda x: json.dumps(x.tolist())
115+
if isinstance(x, np.ndarray)
116+
else json.dumps(x) if isinstance(x, list)
117+
else x
118+
)
119+
121120

122121

123122
@property

Diff for: pandas/io/formats/format.py

+1
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,7 @@ def to_csv(
10001000
doublequote=doublequote,
10011001
escapechar=escapechar,
10021002
storage_options=storage_options,
1003+
preserve_complex=preserve_complex,
10031004
formatter=self.fmt,
10041005
)
10051006
csv_formatter.save()

Diff for: scripts/tests/test_csv.py

+45-13
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,60 @@
1+
# import pandas as pd
2+
# import numpy as np
3+
4+
# # Create a DataFrame with NumPy arrays
5+
# df = pd.DataFrame({
6+
# 'id': [1, 2],
7+
# 'embedding': [np.array([0.1, 0.2, 0.3]), np.array([0.4, 0.5, 0.6])]
8+
# })
9+
10+
# # Save to CSV
11+
# csv_file = "test_numpy_array.csv"
12+
# df.to_csv(csv_file, index=False, preserve_complex=True)
13+
# print(f"Saved CSV:\n{open(csv_file).read()}")
14+
15+
# # Read back the CSV
16+
# df_loaded = pd.read_csv(csv_file)
17+
18+
# # Print results
19+
# print("\nLoaded DataFrame:")
20+
# print(df_loaded)
21+
22+
# # ✅ **Make the test fail by checking if we correctly load NumPy arrays**
23+
# try:
24+
# assert isinstance(df_loaded["embedding"][0], np.ndarray), "Test Failed: Embeddings were not preserved as NumPy arrays!"
25+
# print("\nTest Passed: Embeddings were correctly preserved as NumPy arrays")
26+
# except AssertionError as e:
27+
# print("\nTest Failed: Pandas does not preserve NumPy arrays in CSV, needs improvement!")
28+
# raise e
29+
130
import pandas as pd
31+
print(pd.__file__)
32+
print(pd.__version__)
33+
234
import numpy as np
35+
import os
336

437
# Create a DataFrame with NumPy arrays
538
df = pd.DataFrame({
639
'id': [1, 2],
740
'embedding': [np.array([0.1, 0.2, 0.3]), np.array([0.4, 0.5, 0.6])]
841
})
942

10-
# Save to CSV
43+
# Save to CSV (where your custom preserve_complex logic resides)
1144
csv_file = "test_numpy_array.csv"
1245
df.to_csv(csv_file, index=False, preserve_complex=True)
13-
print(f"Saved CSV:\n{open(csv_file).read()}")
1446

15-
# Read back the CSV
16-
df_loaded = pd.read_csv(csv_file)
47+
# Read back the raw CSV content (as text only)
48+
with open(csv_file, "r") as f:
49+
csv_content = f.read()
1750

18-
# Print results
19-
print("\nLoaded DataFrame:")
20-
print(df_loaded)
51+
print(f"Saved CSV:\n{csv_content}")
2152

22-
# ✅ **Make the test fail by checking if we correctly load NumPy arrays**
53+
# Simple test: check that our JSON-ified arrays are present in the CSV text
2354
try:
24-
assert isinstance(df_loaded["embedding"][0], np.ndarray), "Test Failed: Embeddings were not preserved as NumPy arrays!"
25-
print("\nTest Passed: Embeddings were correctly preserved as NumPy arrays")
26-
except AssertionError as e:
27-
print("\nTest Failed: Pandas does not preserve NumPy arrays in CSV, needs improvement!")
28-
raise e
55+
assert "[0.1, 0.2, 0.3]" in csv_content
56+
assert "[0.4, 0.5, 0.6]" in csv_content
57+
print("\nTest Passed: The CSV output includes JSON-serialized arrays for 'embedding'.")
58+
except AssertionError:
59+
print("\nTest Failed: The CSV does not appear to have JSON-serialized arrays as expected!")
60+
raise

Diff for: test_numpy_array.csv

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
id,embedding
2-
1,[0.1 0.2 0.3]
3-
2,[0.4 0.5 0.6]
2+
1,"[0.1, 0.2, 0.3]"
3+
2,"[0.4, 0.5, 0.6]"

0 commit comments

Comments
 (0)