Skip to content

Commit 9fb7418

Browse files
#UNIT_TEST
Prompt Category: Unit Testing Prompt: extract_data.py Create unit test cases for the file extract_data.py. Also implement Pylint for this file such that code is optimised, code smells are detected. User Observation: $ python3 -m pytest test_extract_data.py -v ================================================ test session starts ================================================ platform darwin -- Python 3.9.6, pytest-8.3.5, pluggy-1.5.0 -- /Library/Developer/CommandLineTools/usr/bin/python3 cachedir: .pytest_cache rootdir: /Users/surya.sandeep.boda/Desktop/Marscode Zero to One 3 plugins: cov-6.0.0 collected 3 items test_extract_data.py::TestGoogleSheetExtraction::test_api_error PASSED [ 33%] test_extract_data.py::TestGoogleSheetExtraction::test_empty_sheet PASSED [ 66%] test_extract_data.py::TestGoogleSheetExtraction::test_successful_data_extraction PASSED [100%] ================================================= warnings summary ================================================== test_extract_data.py:3 /Users/surya.sandeep.boda/Desktop/Marscode Zero to One 3/test_extract_data.py:3: DeprecationWarning: Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0), (to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries) but was not found to be installed on your system. If this would cause problems for you, please provide us feedback at pandas-dev/pandas#54466 import pandas as pd -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html =========================================== 3 passed, 1 warning in 0.39s ============================================ $ python3 -m pytest test_extract_data.py -v ================================================ test session starts ================================================ platform darwin -- Python 3.9.6, pytest-8.3.5, pluggy-1.5.0 -- /Library/Developer/CommandLineTools/usr/bin/python3 cachedir: .pytest_cache rootdir: /Users/surya.sandeep.boda/Desktop/Marscode Zero to One 3 plugins: cov-6.0.0 collected 3 items test_extract_data.py::TestGoogleSheetExtraction::test_api_error PASSED [ 33%] test_extract_data.py::TestGoogleSheetExtraction::test_empty_sheet PASSED [ 66%] test_extract_data.py::TestGoogleSheetExtraction::test_successful_data_extraction PASSED [100%] ================================================= warnings summary ================================================== test_extract_data.py:3 /Users/surya.sandeep.boda/Desktop/Marscode Zero to One 3/test_extract_data.py:3: DeprecationWarning: Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0), (to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries) but was not found to be installed on your system. If this would cause problems for you, please provide us feedback at pandas-dev/pandas#54466 import pandas as pd -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html =========================================== 3 passed, 1 warning in 0.39s ============================================ $ python -m pytest test_extract_data.py --cov=extract_data zsh: command not found: python $ python3 -m pytest test_extract_data.py --cov=extract_data ================================================ test session starts ================================================ platform darwin -- Python 3.9.6, pytest-8.3.5, pluggy-1.5.0 rootdir: /Users/surya.sandeep.boda/Desktop/Marscode Zero to One 3 plugins: cov-6.0.0 collected 3 items test_extract_data.py ... [100%] ================================================= warnings summary ================================================== test_extract_data.py:3 /Users/surya.sandeep.boda/Desktop/Marscode Zero to One 3/test_extract_data.py:3: DeprecationWarning: Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0), (to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries) but was not found to be installed on your system. If this would cause problems for you, please provide us feedback at pandas-dev/pandas#54466 import pandas as pd -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html ---------- coverage: platform darwin, python 3.9.6-final-0 ----------- Name Stmts Miss Cover ------------------------------------- extract_data.py 44 6 86% ------------------------------------- TOTAL 44 6 86% =========================================== 3 passed, 1 warning in 0.45s ============================================ Response ID: cdc11218-5355-4cb3-ab05-3e5de03e1ced
1 parent a96a844 commit 9fb7418

File tree

4 files changed

+122
-30
lines changed

4 files changed

+122
-30
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
credentials.json
1+
credentials.json
2+
app.log

.pylintrc

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
[MASTER]
2+
disable=
3+
C0111, # missing-docstring
4+
C0103, # invalid-name
5+
W0621, # redefined-outer-name
6+
7+
[FORMAT]
8+
max-line-length=100
9+
10+
[MESSAGES CONTROL]
11+
disable=
12+
logging-fstring-interpolation,
13+
broad-except
14+
15+
[REPORTS]
16+
output-format=text
17+
reports=yes

extract_data.py

+39-29
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""
2+
Module for extracting data from Google Sheets using Google Sheets API.
3+
"""
4+
15
from google.oauth2.service_account import Credentials
26
from googleapiclient.discovery import build
37
import pandas as pd
@@ -12,44 +16,50 @@
1216
logging.StreamHandler()
1317
]
1418
)
15-
logger = logging.getLogger(__name__)
19+
LOGGER = logging.getLogger(__name__)
1620

1721
def get_google_sheet_data():
22+
"""
23+
Retrieves and processes data from a Google Sheet.
24+
25+
Returns:
26+
pandas.DataFrame: Processed data from the sheet, or None if an error occurs
27+
"""
1828
try:
1929
# Define the scope and credentials
20-
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
21-
creds = Credentials.from_service_account_file('credentials.json', scopes=SCOPES)
22-
logger.info("Successfully loaded credentials")
30+
scopes = ['https://www.googleapis.com/auth/spreadsheets.readonly']
31+
creds = Credentials.from_service_account_file('credentials.json', scopes=scopes)
32+
LOGGER.info("Successfully loaded credentials")
2333

2434
# Create the service
2535
service = build('sheets', 'v4', credentials=creds)
26-
logger.info("Successfully created Google Sheets service")
36+
LOGGER.info("Successfully created Google Sheets service")
2737

28-
# Spreadsheet ID from the URL
29-
SPREADSHEET_ID = '15FMeidgU2Dg7Q4JKPkLAdJmQ3IxWCWJXjhCo9UterCE'
30-
RANGE_NAME = 'POD 5!A1:CE1000'
38+
# Spreadsheet constants
39+
spreadsheet_id = '15FMeidgU2Dg7Q4JKPkLAdJmQ3IxWCWJXjhCo9UterCE'
40+
range_name = 'POD 5!A1:CE1000'
3141

3242
# Call the Sheets API
3343
sheet = service.spreadsheets()
3444
result = sheet.values().get(
35-
spreadsheetId=SPREADSHEET_ID,
36-
range=RANGE_NAME
45+
spreadsheetId=spreadsheet_id,
46+
range=range_name
3747
).execute()
3848

3949
values = result.get('values', [])
4050

4151
if not values:
42-
logger.error('No data found in the sheet')
52+
LOGGER.error('No data found in the sheet')
4353
return None
4454

4555
# Convert to DataFrame
46-
df = pd.DataFrame(values[1:], columns=values[0])
47-
logger.info(f"Retrieved {len(df)} rows of data")
56+
df_data = pd.DataFrame(values[1:], columns=values[0])
57+
LOGGER.info("Retrieved %d rows of data", len(df_data))
4858

4959
# Log available columns for debugging
50-
logger.debug(f"Available columns in sheet: {list(df.columns)}")
60+
LOGGER.debug("Available columns in sheet: %s", list(df_data.columns))
5161

52-
# Map the required columns to actual column names in the sheet
62+
# Column mapping configuration
5363
column_mapping = {
5464
'Email Address': 'Email address',
5565
'Tool Used': 'Tool being used',
@@ -63,30 +73,30 @@ def get_google_sheet_data():
6373
'POD': 'Pod'
6474
}
6575

66-
# Select required columns with flexible naming
76+
# Process columns
6777
required_columns = []
6878
for sheet_col, mapped_col in column_mapping.items():
69-
if sheet_col in df.columns:
70-
df[mapped_col] = df[sheet_col]
79+
if sheet_col in df_data.columns:
80+
df_data[mapped_col] = df_data[sheet_col]
7181
required_columns.append(mapped_col)
7282
else:
73-
logger.warning(f"Column '{sheet_col}' not found in sheet")
83+
LOGGER.warning("Column '%s' not found in sheet", sheet_col)
7484

75-
# Filter only required columns
76-
filtered_df = df[required_columns]
77-
logger.info("Successfully filtered required columns")
85+
# Filter columns
86+
filtered_df = df_data[required_columns]
87+
LOGGER.info("Successfully filtered required columns")
7888

7989
return filtered_df
8090

81-
except Exception as e:
82-
logger.error(f"An error occurred: {str(e)}", exc_info=True)
91+
except Exception as error:
92+
LOGGER.error("An error occurred: %s", str(error), exc_info=True)
8393
return None
8494

8595
if __name__ == "__main__":
86-
data = get_google_sheet_data()
87-
if data is not None:
88-
logger.info("Data retrieval successful")
96+
result_data = get_google_sheet_data()
97+
if result_data is not None:
98+
LOGGER.info("Data retrieval successful")
8999
print("\nFirst 5 rows of retrieved data:")
90-
print(data.head())
100+
print(result_data.head())
91101
else:
92-
logger.error("Failed to retrieve data")
102+
LOGGER.error("Failed to retrieve data")

test_extract_data.py

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import unittest
2+
from unittest.mock import patch, MagicMock
3+
import pandas as pd
4+
from extract_data import get_google_sheet_data
5+
6+
class TestGoogleSheetExtraction(unittest.TestCase):
7+
"""Test cases for Google Sheet data extraction functionality"""
8+
9+
@patch('extract_data.Credentials')
10+
@patch('extract_data.build')
11+
def test_successful_data_extraction(self, mock_build, mock_credentials):
12+
# Mock data
13+
mock_values = [
14+
['Email Address', 'Tool Used', 'Feature', 'Context Awareness Rating'],
15+
['[email protected]', 'Tool1', 'Feature1', '4'],
16+
]
17+
18+
# Setup mock service
19+
mock_service = MagicMock()
20+
mock_build.return_value = mock_service
21+
mock_service.spreadsheets().values().get().execute.return_value = {
22+
'values': mock_values
23+
}
24+
25+
# Execute function
26+
result = get_google_sheet_data()
27+
28+
# Assertions
29+
self.assertIsNotNone(result)
30+
self.assertIsInstance(result, pd.DataFrame)
31+
self.assertTrue(len(result) > 0)
32+
33+
@patch('extract_data.Credentials')
34+
@patch('extract_data.build')
35+
def test_empty_sheet(self, mock_build, mock_credentials):
36+
# Mock empty response
37+
mock_service = MagicMock()
38+
mock_build.return_value = mock_service
39+
mock_service.spreadsheets().values().get().execute.return_value = {
40+
'values': []
41+
}
42+
43+
# Execute function
44+
result = get_google_sheet_data()
45+
46+
# Assertions
47+
self.assertIsNone(result)
48+
49+
@patch('extract_data.Credentials')
50+
@patch('extract_data.build')
51+
def test_api_error(self, mock_build, mock_credentials):
52+
# Mock API error
53+
mock_service = MagicMock()
54+
mock_build.return_value = mock_service
55+
mock_service.spreadsheets().values().get().execute.side_effect = Exception("API Error")
56+
57+
# Execute function
58+
result = get_google_sheet_data()
59+
60+
# Assertions
61+
self.assertIsNone(result)
62+
63+
if __name__ == '__main__':
64+
unittest.main()

0 commit comments

Comments
 (0)