Skip to content

Commit e08a8ab

Browse files
Analyze of titanic dataset
1 parent 6c5c16d commit e08a8ab

File tree

3 files changed

+1254
-0
lines changed

3 files changed

+1254
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "2299008a",
6+
"metadata": {},
7+
"source": [
8+
"# Analysis of Titanic_dataset"
9+
]
10+
},
11+
{
12+
"cell_type": "code",
13+
"execution_count": 1,
14+
"id": "b84ff62c",
15+
"metadata": {},
16+
"outputs": [
17+
{
18+
"ename": "FileNotFoundError",
19+
"evalue": "[Errno 2] No such file or directory: 'titanic_Data_Train1.csv'",
20+
"output_type": "error",
21+
"traceback": [
22+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
23+
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
24+
"Cell \u001b[1;32mIn[1], line 7\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[0;32m 6\u001b[0m \u001b[38;5;66;03m# Load the dataset\u001b[39;00m\n\u001b[1;32m----> 7\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtitanic_Data_Train1.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 9\u001b[0m \u001b[38;5;66;03m# Display the first few rows of the dataframe\u001b[39;00m\n\u001b[0;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFirst few rows of the dataset:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
25+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 209\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 210\u001b[0m kwargs[new_arg_name] \u001b[38;5;241m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
26+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\util\\_decorators.py:331\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m>\u001b[39m num_allow_args:\n\u001b[0;32m 326\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[0;32m 327\u001b[0m msg\u001b[38;5;241m.\u001b[39mformat(arguments\u001b[38;5;241m=\u001b[39m_format_argument_list(allow_args)),\n\u001b[0;32m 328\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[0;32m 329\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39mfind_stack_level(),\n\u001b[0;32m 330\u001b[0m )\n\u001b[1;32m--> 331\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
27+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:950\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)\u001b[0m\n\u001b[0;32m 935\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m 936\u001b[0m dialect,\n\u001b[0;32m 937\u001b[0m delimiter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 946\u001b[0m defaults\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdelimiter\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m,\u001b[39m\u001b[38;5;124m\"\u001b[39m},\n\u001b[0;32m 947\u001b[0m )\n\u001b[0;32m 948\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m--> 950\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n",
28+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:605\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 602\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[0;32m 604\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 605\u001b[0m parser \u001b[38;5;241m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[0;32m 607\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[0;32m 608\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
29+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1442\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 1439\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 1441\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m-> 1442\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_engine(f, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine)\n",
30+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1735\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m 1733\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[0;32m 1734\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m-> 1735\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m get_handle(\n\u001b[0;32m 1736\u001b[0m f,\n\u001b[0;32m 1737\u001b[0m mode,\n\u001b[0;32m 1738\u001b[0m encoding\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[0;32m 1739\u001b[0m compression\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompression\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[0;32m 1740\u001b[0m memory_map\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmemory_map\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m),\n\u001b[0;32m 1741\u001b[0m is_text\u001b[38;5;241m=\u001b[39mis_text,\n\u001b[0;32m 1742\u001b[0m errors\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding_errors\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstrict\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[0;32m 1743\u001b[0m storage_options\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstorage_options\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[0;32m 1744\u001b[0m )\n\u001b[0;32m 1745\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1746\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n",
31+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\common.py:856\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 851\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m 852\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[0;32m 853\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[0;32m 854\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[0;32m 855\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[1;32m--> 856\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\n\u001b[0;32m 857\u001b[0m handle,\n\u001b[0;32m 858\u001b[0m ioargs\u001b[38;5;241m.\u001b[39mmode,\n\u001b[0;32m 859\u001b[0m encoding\u001b[38;5;241m=\u001b[39mioargs\u001b[38;5;241m.\u001b[39mencoding,\n\u001b[0;32m 860\u001b[0m errors\u001b[38;5;241m=\u001b[39merrors,\n\u001b[0;32m 861\u001b[0m newline\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 862\u001b[0m )\n\u001b[0;32m 863\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 864\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[0;32m 865\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n",
32+
"\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'titanic_Data_Train1.csv'"
33+
]
34+
}
35+
],
36+
"source": [
37+
"import pandas as pd\n",
38+
"import numpy as np\n",
39+
"import seaborn as sns\n",
40+
"import matplotlib.pyplot as plt\n",
41+
"\n",
42+
"# Load the dataset\n",
43+
"df = pd.read_csv(\"titanic_Data_Train1.csv\")\n",
44+
"\n",
45+
"# Display the first few rows of the dataframe\n",
46+
"print(\"First few rows of the dataset:\")\n",
47+
"print(df.head())\n"
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": 2,
53+
"id": "1a3abb46",
54+
"metadata": {},
55+
"outputs": [
56+
{
57+
"ename": "NameError",
58+
"evalue": "name 'df' is not defined",
59+
"output_type": "error",
60+
"traceback": [
61+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
62+
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
63+
"Cell \u001b[1;32mIn[2], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Drop the irrelevant columns\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m df\u001b[38;5;241m.\u001b[39mdrop([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPassengerId\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mName\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSibSp\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mParch\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTicket\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCabin\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEmbarked\u001b[39m\u001b[38;5;124m'\u001b[39m], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m'\u001b[39m, inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 4\u001b[0m \u001b[38;5;66;03m# Display the first few rows of the dataframe after dropping columns\u001b[39;00m\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mDataset after dropping irrelevant columns:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
64+
"\u001b[1;31mNameError\u001b[0m: name 'df' is not defined"
65+
]
66+
}
67+
],
68+
"source": [
69+
"\n",
70+
"# Drop the irrelevant columns\n",
71+
"df.drop(['PassengerId', 'Name', 'SibSp', 'Parch', 'Ticket', 'Cabin', 'Embarked'], axis='columns', inplace=True)\n",
72+
"\n",
73+
"# Display the first few rows of the dataframe after dropping columns\n",
74+
"print(\"\\nDataset after dropping irrelevant columns:\")\n",
75+
"print(df.head())\n"
76+
]
77+
},
78+
{
79+
"cell_type": "code",
80+
"execution_count": 3,
81+
"id": "13240344",
82+
"metadata": {},
83+
"outputs": [
84+
{
85+
"name": "stdout",
86+
"output_type": "stream",
87+
"text": [
88+
"\n",
89+
"Missing values in the dataset:\n"
90+
]
91+
},
92+
{
93+
"ename": "NameError",
94+
"evalue": "name 'df' is not defined",
95+
"output_type": "error",
96+
"traceback": [
97+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
98+
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
99+
"Cell \u001b[1;32mIn[3], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Check for missing values\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mMissing values in the dataset:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(df\u001b[38;5;241m.\u001b[39misnull()\u001b[38;5;241m.\u001b[39msum())\n\u001b[0;32m 5\u001b[0m \u001b[38;5;66;03m# Handle missing values (for simplicity, fill missing Age values with the median and drop rows with missing Fare values)\u001b[39;00m\n\u001b[0;32m 6\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAge\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mfillna(df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAge\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmedian(), inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
100+
"\u001b[1;31mNameError\u001b[0m: name 'df' is not defined"
101+
]
102+
}
103+
],
104+
"source": [
105+
"\n",
106+
"# Check for missing values\n",
107+
"print(\"\\nMissing values in the dataset:\")\n",
108+
"print(df.isnull().sum())\n",
109+
"\n",
110+
"# Handle missing values (for simplicity, fill missing Age values with the median and drop rows with missing Fare values)\n",
111+
"df['Age'].fillna(df['Age'].median(), inplace=True)\n",
112+
"df.dropna(subset=['Fare'], inplace=True)\n",
113+
"\n",
114+
"# Verify that there are no more missing values\n",
115+
"print(\"\\nMissing values after handling:\")\n",
116+
"print(df.isnull().sum())\n",
117+
"\n",
118+
"# Summary statistics of the dataset\n",
119+
"print(\"\\nSummary statistics:\")\n",
120+
"print(df.describe())\n",
121+
"\n"
122+
]
123+
},
124+
{
125+
"cell_type": "code",
126+
"execution_count": null,
127+
"id": "0e6a758a",
128+
"metadata": {},
129+
"outputs": [],
130+
"source": [
131+
"# Exploratory Data Analysis\n",
132+
"# Plot the distribution of numerical features\n",
133+
"plt.figure(figsize=(12, 6))\n",
134+
"plt.subplot(1, 2, 1)\n",
135+
"sns.histplot(df['Age'], kde=True)\n",
136+
"plt.title('Distribution of Age')\n",
137+
"\n",
138+
"plt.subplot(1, 2, 2)\n",
139+
"sns.histplot(df['Fare'], kde=True)\n",
140+
"plt.title('Distribution of Fare')\n",
141+
"\n",
142+
"plt.tight_layout()\n",
143+
"plt.show()\n",
144+
"\n",
145+
"# Plot the survival rate based on Sex\n",
146+
"plt.figure(figsize=(6, 4))\n",
147+
"sns.countplot(x='Sex', hue='Survived', data=df)\n",
148+
"plt.title('Survival Rate by Sex')\n",
149+
"plt.show()\n",
150+
"\n",
151+
"# Convert categorical variable 'Sex' into dummy/indicator variables\n",
152+
"df = pd.get_dummies(df, columns=['Sex'], drop_first=True)\n",
153+
"\n",
154+
"# Display the first few rows of the dataframe after encoding\n",
155+
"print(\"\\nDataset after encoding categorical variables:\")\n",
156+
"print(df.head())\n"
157+
]
158+
}
159+
],
160+
"metadata": {
161+
"kernelspec": {
162+
"display_name": "Python 3 (ipykernel)",
163+
"language": "python",
164+
"name": "python3"
165+
},
166+
"language_info": {
167+
"codemirror_mode": {
168+
"name": "ipython",
169+
"version": 3
170+
},
171+
"file_extension": ".py",
172+
"mimetype": "text/x-python",
173+
"name": "python",
174+
"nbconvert_exporter": "python",
175+
"pygments_lexer": "ipython3",
176+
"version": "3.11.3"
177+
}
178+
},
179+
"nbformat": 4,
180+
"nbformat_minor": 5
181+
}

0 commit comments

Comments
 (0)