#UNIT_TEST

suryasandeepboda · suryasandeepboda · commit 9fb7418636c7 · 2025-03-13T18:14:05.000+05:30
Prompt Category: Unit Testing Prompt: extract_data.py Create unit test cases for the file extract_data.py. Also implement Pylint for this file such that code is optimised, code smells are detected. User Observation: $ python3 -m pytest test_extract_data.py -v ================================================ test session starts ================================================ platform darwin -- Python 3.9.6, pytest-8.3.5, pluggy-1.5.0 -- /Library/Developer/CommandLineTools/usr/bin/python3 cachedir: .pytest_cache rootdir: /Users/surya.sandeep.boda/Desktop/Marscode Zero to One 3 plugins: cov-6.0.0 collected 3 items test_extract_data.py::TestGoogleSheetExtraction::test_api_error PASSED [ 33%] test_extract_data.py::TestGoogleSheetExtraction::test_empty_sheet PASSED [ 66%] test_extract_data.py::TestGoogleSheetExtraction::test_successful_data_extraction PASSED [100%] ================================================= warnings summary ================================================== test_extract_data.py:3 /Users/surya.sandeep.boda/Desktop/Marscode Zero to One 3/test_extract_data.py:3: DeprecationWarning: Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0), (to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries) but was not found to be installed on your system. If this would cause problems for you, please provide us feedback at pandas-dev/pandas#54466 import pandas as pd -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html =========================================== 3 passed, 1 warning in 0.39s ============================================ $ python3 -m pytest test_extract_data.py -v ================================================ test session starts ================================================ platform darwin -- Python 3.9.6, pytest-8.3.5, pluggy-1.5.0 -- /Library/Developer/CommandLineTools/usr/bin/python3 cachedir: .pytest_cache rootdir: /Users/surya.sandeep.boda/Desktop/Marscode Zero to One 3 plugins: cov-6.0.0 collected 3 items test_extract_data.py::TestGoogleSheetExtraction::test_api_error PASSED [ 33%] test_extract_data.py::TestGoogleSheetExtraction::test_empty_sheet PASSED [ 66%] test_extract_data.py::TestGoogleSheetExtraction::test_successful_data_extraction PASSED [100%] ================================================= warnings summary ================================================== test_extract_data.py:3 /Users/surya.sandeep.boda/Desktop/Marscode Zero to One 3/test_extract_data.py:3: DeprecationWarning: Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0), (to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries) but was not found to be installed on your system. If this would cause problems for you, please provide us feedback at pandas-dev/pandas#54466 import pandas as pd -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html =========================================== 3 passed, 1 warning in 0.39s ============================================ $ python -m pytest test_extract_data.py --cov=extract_data zsh: command not found: python $ python3 -m pytest test_extract_data.py --cov=extract_data ================================================ test session starts ================================================ platform darwin -- Python 3.9.6, pytest-8.3.5, pluggy-1.5.0 rootdir: /Users/surya.sandeep.boda/Desktop/Marscode Zero to One 3 plugins: cov-6.0.0 collected 3 items test_extract_data.py ... [100%] ================================================= warnings summary ================================================== test_extract_data.py:3 /Users/surya.sandeep.boda/Desktop/Marscode Zero to One 3/test_extract_data.py:3: DeprecationWarning: Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0), (to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries) but was not found to be installed on your system. If this would cause problems for you, please provide us feedback at pandas-dev/pandas#54466 import pandas as pd -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html ---------- coverage: platform darwin, python 3.9.6-final-0 ----------- Name Stmts Miss Cover ------------------------------------- extract_data.py 44 6 86% ------------------------------------- TOTAL 44 6 86% =========================================== 3 passed, 1 warning in 0.45s ============================================ Response ID: cdc11218-5355-4cb3-ab05-3e5de03e1ced
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
-credentials.json
+credentials.json
+app.log
diff --git a/.pylintrc b/.pylintrc
@@ -0,0 +1,17 @@
+[MASTER]
+disable=
+    C0111, # missing-docstring
+    C0103, # invalid-name
+    W0621, # redefined-outer-name
+
+[FORMAT]
+max-line-length=100
+
+[MESSAGES CONTROL]
+disable=
+    logging-fstring-interpolation,
+    broad-except
+
+[REPORTS]
+output-format=text
+reports=yes
diff --git a/extract_data.py b/extract_data.py
@@ -1,3 +1,7 @@
+"""
+Module for extracting data from Google Sheets using Google Sheets API.
+"""
+
 from google.oauth2.service_account import Credentials
 from googleapiclient.discovery import build
 import pandas as pd
@@ -12,44 +16,50 @@
         logging.StreamHandler()
     ]
 )
-logger = logging.getLogger(__name__)
+LOGGER = logging.getLogger(__name__)
 
 def get_google_sheet_data():
+    """
+    Retrieves and processes data from a Google Sheet.
+    
+    Returns:
+        pandas.DataFrame: Processed data from the sheet, or None if an error occurs
+    """
     try:
         # Define the scope and credentials
-        SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
-        creds = Credentials.from_service_account_file('credentials.json', scopes=SCOPES)
-        logger.info("Successfully loaded credentials")
+        scopes = ['https://www.googleapis.com/auth/spreadsheets.readonly']
+        creds = Credentials.from_service_account_file('credentials.json', scopes=scopes)
+        LOGGER.info("Successfully loaded credentials")
 
         # Create the service
         service = build('sheets', 'v4', credentials=creds)
-        logger.info("Successfully created Google Sheets service")
+        LOGGER.info("Successfully created Google Sheets service")
 
-        # Spreadsheet ID from the URL
-        SPREADSHEET_ID = '15FMeidgU2Dg7Q4JKPkLAdJmQ3IxWCWJXjhCo9UterCE'
-        RANGE_NAME = 'POD 5!A1:CE1000'
+        # Spreadsheet constants
+        spreadsheet_id = '15FMeidgU2Dg7Q4JKPkLAdJmQ3IxWCWJXjhCo9UterCE'
+        range_name = 'POD 5!A1:CE1000'
 
         # Call the Sheets API
         sheet = service.spreadsheets()
         result = sheet.values().get(
-            spreadsheetId=SPREADSHEET_ID,
-            range=RANGE_NAME
+            spreadsheetId=spreadsheet_id,
+            range=range_name
         ).execute()
 
         values = result.get('values', [])
         
         if not values:
-            logger.error('No data found in the sheet')
+            LOGGER.error('No data found in the sheet')
             return None
 
         # Convert to DataFrame
-        df = pd.DataFrame(values[1:], columns=values[0])
-        logger.info(f"Retrieved {len(df)} rows of data")
+        df_data = pd.DataFrame(values[1:], columns=values[0])
+        LOGGER.info("Retrieved %d rows of data", len(df_data))
 
         # Log available columns for debugging
-        logger.debug(f"Available columns in sheet: {list(df.columns)}")
+        LOGGER.debug("Available columns in sheet: %s", list(df_data.columns))
 
-        # Map the required columns to actual column names in the sheet
+        # Column mapping configuration
         column_mapping = {
             'Email Address': 'Email address',
             'Tool Used': 'Tool being used',
@@ -63,30 +73,30 @@ def get_google_sheet_data():
             'POD': 'Pod'
         }
 
-        # Select required columns with flexible naming
+        # Process columns
         required_columns = []
         for sheet_col, mapped_col in column_mapping.items():
-            if sheet_col in df.columns:
-                df[mapped_col] = df[sheet_col]
+            if sheet_col in df_data.columns:
+                df_data[mapped_col] = df_data[sheet_col]
                 required_columns.append(mapped_col)
             else:
-                logger.warning(f"Column '{sheet_col}' not found in sheet")
+                LOGGER.warning("Column '%s' not found in sheet", sheet_col)
 
-        # Filter only required columns
-        filtered_df = df[required_columns]
-        logger.info("Successfully filtered required columns")
+        # Filter columns
+        filtered_df = df_data[required_columns]
+        LOGGER.info("Successfully filtered required columns")
         
         return filtered_df
 
-    except Exception as e:
-        logger.error(f"An error occurred: {str(e)}", exc_info=True)
+    except Exception as error:
+        LOGGER.error("An error occurred: %s", str(error), exc_info=True)
         return None
 
 if __name__ == "__main__":
-    data = get_google_sheet_data()
-    if data is not None:
-        logger.info("Data retrieval successful")
+    result_data = get_google_sheet_data()
+    if result_data is not None:
+        LOGGER.info("Data retrieval successful")
         print("\nFirst 5 rows of retrieved data:")
-        print(data.head())
+        print(result_data.head())
     else:
-        logger.error("Failed to retrieve data")
+        LOGGER.error("Failed to retrieve data")
diff --git a/test_extract_data.py b/test_extract_data.py
@@ -0,0 +1,64 @@
+import unittest
+from unittest.mock import patch, MagicMock
+import pandas as pd
+from extract_data import get_google_sheet_data
+
+class TestGoogleSheetExtraction(unittest.TestCase):
+    """Test cases for Google Sheet data extraction functionality"""
+
+    @patch('extract_data.Credentials')
+    @patch('extract_data.build')
+    def test_successful_data_extraction(self, mock_build, mock_credentials):
+        # Mock data
+        mock_values = [
+            ['Email Address', 'Tool Used', 'Feature', 'Context Awareness Rating'],
+            ['test@email.com', 'Tool1', 'Feature1', '4'],
+        ]
+        
+        # Setup mock service
+        mock_service = MagicMock()
+        mock_build.return_value = mock_service
+        mock_service.spreadsheets().values().get().execute.return_value = {
+            'values': mock_values
+        }
+
+        # Execute function
+        result = get_google_sheet_data()
+
+        # Assertions
+        self.assertIsNotNone(result)
+        self.assertIsInstance(result, pd.DataFrame)
+        self.assertTrue(len(result) > 0)
+
+    @patch('extract_data.Credentials')
+    @patch('extract_data.build')
+    def test_empty_sheet(self, mock_build, mock_credentials):
+        # Mock empty response
+        mock_service = MagicMock()
+        mock_build.return_value = mock_service
+        mock_service.spreadsheets().values().get().execute.return_value = {
+            'values': []
+        }
+
+        # Execute function
+        result = get_google_sheet_data()
+
+        # Assertions
+        self.assertIsNone(result)
+
+    @patch('extract_data.Credentials')
+    @patch('extract_data.build')
+    def test_api_error(self, mock_build, mock_credentials):
+        # Mock API error
+        mock_service = MagicMock()
+        mock_build.return_value = mock_service
+        mock_service.spreadsheets().values().get().execute.side_effect = Exception("API Error")
+
+        # Execute function
+        result = get_google_sheet_data()
+
+        # Assertions
+        self.assertIsNone(result)
+
+if __name__ == '__main__':
+    unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`		`-credentials.json`
	`1`	`+credentials.json`
	`2`	`+app.log`