|
1 | 1 | """Tests for the SegTransactionStats class."""
|
2 | 2 |
|
| 3 | +import ibis |
3 | 4 | import numpy as np
|
4 | 5 | import pandas as pd
|
5 | 6 | import pytest
|
6 | 7 |
|
7 |
| -from pyretailscience.analysis.segmentation import HMLSegmentation, SegTransactionStats, ThresholdSegmentation |
| 8 | +from pyretailscience.analysis.segmentation import ( |
| 9 | + HMLSegmentation, |
| 10 | + RFMSegmentation, |
| 11 | + SegTransactionStats, |
| 12 | + ThresholdSegmentation, |
| 13 | +) |
8 | 14 | from pyretailscience.options import ColumnHelper, get_option
|
9 | 15 |
|
10 | 16 | cols = ColumnHelper()
|
@@ -545,3 +551,151 @@ def test_alternate_value_col(self, base_df):
|
545 | 551 | assert result_df.loc[2, "segment_name"] == "Light"
|
546 | 552 | assert result_df.loc[4, "segment_name"] == "Medium"
|
547 | 553 | assert result_df.loc[5, "segment_name"] == "Light"
|
| 554 | + |
| 555 | + |
| 556 | +class TestRFMSegmentation: |
| 557 | + """Tests for the RFMSegmentation class.""" |
| 558 | + |
| 559 | + @pytest.fixture |
| 560 | + def base_df(self): |
| 561 | + """Return a base DataFrame for testing.""" |
| 562 | + return pd.DataFrame( |
| 563 | + { |
| 564 | + cols.customer_id: [1, 2, 3, 4, 5], |
| 565 | + cols.transaction_id: [101, 102, 103, 104, 105], |
| 566 | + cols.unit_spend: [100.0, 200.0, 150.0, 300.0, 250.0], |
| 567 | + cols.transaction_date: [ |
| 568 | + "2025-03-01", |
| 569 | + "2025-02-15", |
| 570 | + "2025-01-30", |
| 571 | + "2025-03-10", |
| 572 | + "2025-02-20", |
| 573 | + ], |
| 574 | + }, |
| 575 | + ) |
| 576 | + |
| 577 | + @pytest.fixture |
| 578 | + def expected_df(self): |
| 579 | + """Returns the expected DataFrame for testing segmentation.""" |
| 580 | + return pd.DataFrame( |
| 581 | + { |
| 582 | + "customer_id": [1, 2, 3, 4, 5], |
| 583 | + "frequency": [1, 1, 1, 1, 1], |
| 584 | + "monetary": [100.0, 200.0, 150.0, 300.0, 250.0], |
| 585 | + "r_score": [1, 3, 4, 0, 2], |
| 586 | + "f_score": [0, 1, 2, 3, 4], |
| 587 | + "m_score": [0, 2, 1, 4, 3], |
| 588 | + "rfm_segment": [100, 312, 421, 34, 243], |
| 589 | + "fm_segment": [0, 12, 21, 34, 43], |
| 590 | + }, |
| 591 | + ).set_index("customer_id") |
| 592 | + |
| 593 | + def test_correct_rfm_segmentation(self, base_df, expected_df): |
| 594 | + """Test that the RFM segmentation correctly calculates the RFM scores and segments.""" |
| 595 | + current_date = "2025-03-17" |
| 596 | + rfm_segmentation = RFMSegmentation(df=base_df, current_date=current_date) |
| 597 | + result_df = rfm_segmentation.df |
| 598 | + expected_df["recency_days"] = [16, 30, 46, 7, 25] |
| 599 | + expected_df["recency_days"] = expected_df["recency_days"].astype(result_df["recency_days"].dtype) |
| 600 | + |
| 601 | + pd.testing.assert_frame_equal( |
| 602 | + result_df.sort_index(), |
| 603 | + expected_df.sort_index(), |
| 604 | + check_like=True, |
| 605 | + ) |
| 606 | + |
| 607 | + def test_handles_dataframe_with_missing_columns(self): |
| 608 | + """Test that the method raises an error when required columns are missing.""" |
| 609 | + base_df = pd.DataFrame( |
| 610 | + { |
| 611 | + cols.customer_id: [1, 2, 3], |
| 612 | + cols.unit_spend: [100.0, 200.0, 150.0], |
| 613 | + cols.transaction_id: [101, 102, 103], |
| 614 | + }, |
| 615 | + ) |
| 616 | + |
| 617 | + with pytest.raises(ValueError): |
| 618 | + RFMSegmentation(df=base_df, current_date="2025-03-17") |
| 619 | + |
| 620 | + def test_single_customer(self): |
| 621 | + """Test that the method correctly calculates RFM segmentation for a single customer.""" |
| 622 | + df_single_customer = pd.DataFrame( |
| 623 | + { |
| 624 | + cols.customer_id: [1], |
| 625 | + cols.transaction_id: [101], |
| 626 | + cols.unit_spend: [200.0], |
| 627 | + cols.transaction_date: ["2025-03-01"], |
| 628 | + }, |
| 629 | + ) |
| 630 | + current_date = "2025-03-17" |
| 631 | + rfm_segmentation = RFMSegmentation(df=df_single_customer, current_date=current_date) |
| 632 | + result_df = rfm_segmentation.df |
| 633 | + assert result_df.loc[1, "rfm_segment"] == 0 |
| 634 | + |
| 635 | + def test_multiple_transactions_per_customer(self): |
| 636 | + """Test that the method correctly handles multiple transactions for the same customer.""" |
| 637 | + df_multiple_transactions = pd.DataFrame( |
| 638 | + { |
| 639 | + cols.customer_id: [1, 1, 1, 1, 1], |
| 640 | + cols.transaction_id: [101, 102, 103, 104, 105], |
| 641 | + cols.unit_spend: [120.0, 250.0, 180.0, 300.0, 220.0], |
| 642 | + cols.transaction_date: [ |
| 643 | + "2025-03-01", |
| 644 | + "2025-02-15", |
| 645 | + "2025-01-10", |
| 646 | + "2025-03-10", |
| 647 | + "2025-02-25", |
| 648 | + ], |
| 649 | + }, |
| 650 | + ) |
| 651 | + current_date = "2025-03-17" |
| 652 | + rfm_segmentation = RFMSegmentation(df=df_multiple_transactions, current_date=current_date) |
| 653 | + result_df = rfm_segmentation.df |
| 654 | + |
| 655 | + assert result_df.loc[1, "rfm_segment"] == 0 |
| 656 | + |
| 657 | + def test_calculates_rfm_correctly_for_all_customers(self, base_df): |
| 658 | + """Test that RFM scores are calculated correctly for all customers.""" |
| 659 | + current_date = "2025-03-17" |
| 660 | + expected_customer_count = 5 |
| 661 | + rfm_segmentation = RFMSegmentation(df=base_df, current_date=current_date) |
| 662 | + result_df = rfm_segmentation.df |
| 663 | + |
| 664 | + assert len(result_df) == expected_customer_count |
| 665 | + assert "rfm_segment" in result_df.columns |
| 666 | + |
| 667 | + def test_rfm_segmentation_with_no_date(self, base_df, expected_df): |
| 668 | + """Test that the RFM segmentation correctly calculates the RFM scores and segments.""" |
| 669 | + rfm_segmentation = RFMSegmentation(df=base_df) |
| 670 | + result_df = rfm_segmentation.df |
| 671 | + expected_df["recency_days"] = [18, 32, 48, 9, 27] |
| 672 | + expected_df["recency_days"] = expected_df["recency_days"].astype(result_df["recency_days"].dtype) |
| 673 | + |
| 674 | + pd.testing.assert_frame_equal( |
| 675 | + result_df.sort_index(), |
| 676 | + expected_df.sort_index(), |
| 677 | + check_like=True, |
| 678 | + ) |
| 679 | + |
| 680 | + def test_invalid_current_date_type(self, base_df): |
| 681 | + """Test that RFMSegmentation raises a TypeError when an invalid current_date is provided.""" |
| 682 | + with pytest.raises( |
| 683 | + TypeError, |
| 684 | + match="current_date must be a string in 'YYYY-MM-DD' format, a datetime.date object, or None", |
| 685 | + ): |
| 686 | + RFMSegmentation(base_df, current_date=12345) |
| 687 | + |
| 688 | + def test_invalid_df_type(self): |
| 689 | + """Test that RFMSegmentation raises a TypeError when df is neither a DataFrame nor an Ibis Table.""" |
| 690 | + invalid_df = "this is not a dataframe" |
| 691 | + |
| 692 | + with pytest.raises(TypeError, match="df must be either a pandas DataFrame or an Ibis Table"): |
| 693 | + RFMSegmentation(df=invalid_df, current_date="2025-03-17") |
| 694 | + |
| 695 | + def test_ibis_table_property(self, base_df): |
| 696 | + """Test that ibis_table property returns an Ibis Table.""" |
| 697 | + segmentation = RFMSegmentation(df=base_df, current_date="2025-03-17") |
| 698 | + |
| 699 | + result = segmentation.ibis_table |
| 700 | + |
| 701 | + assert isinstance(result, ibis.Table), "Expected ibis.Table but got a different type" |
0 commit comments