|
3 | 3 |
|
4 | 4 | from pandas import (
|
5 | 5 | DataFrame,
|
| 6 | + Index, |
6 | 7 | Series,
|
7 | 8 | concat,
|
8 | 9 | merge,
|
@@ -310,3 +311,86 @@ def test_merge_copy_keyword(using_copy_on_write, copy):
|
310 | 311 | else:
|
311 | 312 | assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
312 | 313 | assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
|
| 314 | + |
| 315 | + |
| 316 | +def test_join_on_key(using_copy_on_write): |
| 317 | + df_index = Index(["a", "b", "c"], name="key") |
| 318 | + |
| 319 | + df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True)) |
| 320 | + df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True)) |
| 321 | + |
| 322 | + df1_orig = df1.copy() |
| 323 | + df2_orig = df2.copy() |
| 324 | + |
| 325 | + result = df1.join(df2, on="key") |
| 326 | + |
| 327 | + if using_copy_on_write: |
| 328 | + assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) |
| 329 | + assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) |
| 330 | + assert np.shares_memory(get_array(result.index), get_array(df1.index)) |
| 331 | + assert not np.shares_memory(get_array(result.index), get_array(df2.index)) |
| 332 | + else: |
| 333 | + assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) |
| 334 | + assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) |
| 335 | + |
| 336 | + result.iloc[0, 0] = 0 |
| 337 | + if using_copy_on_write: |
| 338 | + assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) |
| 339 | + assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) |
| 340 | + |
| 341 | + result.iloc[0, 1] = 0 |
| 342 | + if using_copy_on_write: |
| 343 | + assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) |
| 344 | + |
| 345 | + tm.assert_frame_equal(df1, df1_orig) |
| 346 | + tm.assert_frame_equal(df2, df2_orig) |
| 347 | + |
| 348 | + |
| 349 | +def test_join_multiple_dataframes_on_key(using_copy_on_write): |
| 350 | + df_index = Index(["a", "b", "c"], name="key") |
| 351 | + |
| 352 | + df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True)) |
| 353 | + dfs_list = [ |
| 354 | + DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True)), |
| 355 | + DataFrame({"c": [7, 8, 9]}, index=df_index.copy(deep=True)), |
| 356 | + ] |
| 357 | + |
| 358 | + df1_orig = df1.copy() |
| 359 | + dfs_list_orig = [df.copy() for df in dfs_list] |
| 360 | + |
| 361 | + result = df1.join(dfs_list) |
| 362 | + |
| 363 | + if using_copy_on_write: |
| 364 | + assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) |
| 365 | + assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) |
| 366 | + assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) |
| 367 | + assert np.shares_memory(get_array(result.index), get_array(df1.index)) |
| 368 | + assert not np.shares_memory( |
| 369 | + get_array(result.index), get_array(dfs_list[0].index) |
| 370 | + ) |
| 371 | + assert not np.shares_memory( |
| 372 | + get_array(result.index), get_array(dfs_list[1].index) |
| 373 | + ) |
| 374 | + else: |
| 375 | + assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) |
| 376 | + assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) |
| 377 | + assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) |
| 378 | + |
| 379 | + result.iloc[0, 0] = 0 |
| 380 | + if using_copy_on_write: |
| 381 | + assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) |
| 382 | + assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) |
| 383 | + assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) |
| 384 | + |
| 385 | + result.iloc[0, 1] = 0 |
| 386 | + if using_copy_on_write: |
| 387 | + assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) |
| 388 | + assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) |
| 389 | + |
| 390 | + result.iloc[0, 2] = 0 |
| 391 | + if using_copy_on_write: |
| 392 | + assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) |
| 393 | + |
| 394 | + tm.assert_frame_equal(df1, df1_orig) |
| 395 | + for df, df_orig in zip(dfs_list, dfs_list_orig): |
| 396 | + tm.assert_frame_equal(df, df_orig) |
0 commit comments