|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "id": "2299008a", |
| 6 | + "metadata": {}, |
| 7 | + "source": [ |
| 8 | + "# Analysis of Titanic_dataset" |
| 9 | + ] |
| 10 | + }, |
| 11 | + { |
| 12 | + "cell_type": "code", |
| 13 | + "execution_count": 1, |
| 14 | + "id": "b84ff62c", |
| 15 | + "metadata": {}, |
| 16 | + "outputs": [ |
| 17 | + { |
| 18 | + "ename": "FileNotFoundError", |
| 19 | + "evalue": "[Errno 2] No such file or directory: 'titanic_Data_Train1.csv'", |
| 20 | + "output_type": "error", |
| 21 | + "traceback": [ |
| 22 | + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", |
| 23 | + "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", |
| 24 | + "Cell \u001b[1;32mIn[1], line 7\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[0;32m 6\u001b[0m \u001b[38;5;66;03m# Load the dataset\u001b[39;00m\n\u001b[1;32m----> 7\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtitanic_Data_Train1.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 9\u001b[0m \u001b[38;5;66;03m# Display the first few rows of the dataframe\u001b[39;00m\n\u001b[0;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFirst few rows of the dataset:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", |
| 25 | + "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 209\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 210\u001b[0m kwargs[new_arg_name] \u001b[38;5;241m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", |
| 26 | + "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\util\\_decorators.py:331\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m>\u001b[39m num_allow_args:\n\u001b[0;32m 326\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[0;32m 327\u001b[0m msg\u001b[38;5;241m.\u001b[39mformat(arguments\u001b[38;5;241m=\u001b[39m_format_argument_list(allow_args)),\n\u001b[0;32m 328\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[0;32m 329\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39mfind_stack_level(),\n\u001b[0;32m 330\u001b[0m )\n\u001b[1;32m--> 331\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", |
| 27 | + "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:950\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)\u001b[0m\n\u001b[0;32m 935\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m 936\u001b[0m dialect,\n\u001b[0;32m 937\u001b[0m delimiter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 946\u001b[0m defaults\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdelimiter\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m,\u001b[39m\u001b[38;5;124m\"\u001b[39m},\n\u001b[0;32m 947\u001b[0m )\n\u001b[0;32m 948\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m--> 950\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n", |
| 28 | + "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:605\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 602\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[0;32m 604\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 605\u001b[0m parser \u001b[38;5;241m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[0;32m 607\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[0;32m 608\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", |
| 29 | + "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1442\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 1439\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 1441\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m-> 1442\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_engine(f, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine)\n", |
| 30 | + "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1735\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m 1733\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[0;32m 1734\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m-> 1735\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m get_handle(\n\u001b[0;32m 1736\u001b[0m f,\n\u001b[0;32m 1737\u001b[0m mode,\n\u001b[0;32m 1738\u001b[0m encoding\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[0;32m 1739\u001b[0m compression\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompression\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[0;32m 1740\u001b[0m memory_map\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmemory_map\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m),\n\u001b[0;32m 1741\u001b[0m is_text\u001b[38;5;241m=\u001b[39mis_text,\n\u001b[0;32m 1742\u001b[0m errors\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding_errors\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstrict\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[0;32m 1743\u001b[0m storage_options\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstorage_options\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[0;32m 1744\u001b[0m )\n\u001b[0;32m 1745\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1746\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n", |
| 31 | + "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\common.py:856\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 851\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m 852\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[0;32m 853\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[0;32m 854\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[0;32m 855\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[1;32m--> 856\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\n\u001b[0;32m 857\u001b[0m handle,\n\u001b[0;32m 858\u001b[0m ioargs\u001b[38;5;241m.\u001b[39mmode,\n\u001b[0;32m 859\u001b[0m encoding\u001b[38;5;241m=\u001b[39mioargs\u001b[38;5;241m.\u001b[39mencoding,\n\u001b[0;32m 860\u001b[0m errors\u001b[38;5;241m=\u001b[39merrors,\n\u001b[0;32m 861\u001b[0m newline\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 862\u001b[0m )\n\u001b[0;32m 863\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 864\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[0;32m 865\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n", |
| 32 | + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'titanic_Data_Train1.csv'" |
| 33 | + ] |
| 34 | + } |
| 35 | + ], |
| 36 | + "source": [ |
| 37 | + "import pandas as pd\n", |
| 38 | + "import numpy as np\n", |
| 39 | + "import seaborn as sns\n", |
| 40 | + "import matplotlib.pyplot as plt\n", |
| 41 | + "\n", |
| 42 | + "# Load the dataset\n", |
| 43 | + "df = pd.read_csv(\"titanic_Data_Train1.csv\")\n", |
| 44 | + "\n", |
| 45 | + "# Display the first few rows of the dataframe\n", |
| 46 | + "print(\"First few rows of the dataset:\")\n", |
| 47 | + "print(df.head())\n" |
| 48 | + ] |
| 49 | + }, |
| 50 | + { |
| 51 | + "cell_type": "code", |
| 52 | + "execution_count": 2, |
| 53 | + "id": "1a3abb46", |
| 54 | + "metadata": {}, |
| 55 | + "outputs": [ |
| 56 | + { |
| 57 | + "ename": "NameError", |
| 58 | + "evalue": "name 'df' is not defined", |
| 59 | + "output_type": "error", |
| 60 | + "traceback": [ |
| 61 | + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", |
| 62 | + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", |
| 63 | + "Cell \u001b[1;32mIn[2], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Drop the irrelevant columns\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m df\u001b[38;5;241m.\u001b[39mdrop([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPassengerId\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mName\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSibSp\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mParch\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTicket\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCabin\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEmbarked\u001b[39m\u001b[38;5;124m'\u001b[39m], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m'\u001b[39m, inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 4\u001b[0m \u001b[38;5;66;03m# Display the first few rows of the dataframe after dropping columns\u001b[39;00m\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mDataset after dropping irrelevant columns:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", |
| 64 | + "\u001b[1;31mNameError\u001b[0m: name 'df' is not defined" |
| 65 | + ] |
| 66 | + } |
| 67 | + ], |
| 68 | + "source": [ |
| 69 | + "\n", |
| 70 | + "# Drop the irrelevant columns\n", |
| 71 | + "df.drop(['PassengerId', 'Name', 'SibSp', 'Parch', 'Ticket', 'Cabin', 'Embarked'], axis='columns', inplace=True)\n", |
| 72 | + "\n", |
| 73 | + "# Display the first few rows of the dataframe after dropping columns\n", |
| 74 | + "print(\"\\nDataset after dropping irrelevant columns:\")\n", |
| 75 | + "print(df.head())\n" |
| 76 | + ] |
| 77 | + }, |
| 78 | + { |
| 79 | + "cell_type": "code", |
| 80 | + "execution_count": 3, |
| 81 | + "id": "13240344", |
| 82 | + "metadata": {}, |
| 83 | + "outputs": [ |
| 84 | + { |
| 85 | + "name": "stdout", |
| 86 | + "output_type": "stream", |
| 87 | + "text": [ |
| 88 | + "\n", |
| 89 | + "Missing values in the dataset:\n" |
| 90 | + ] |
| 91 | + }, |
| 92 | + { |
| 93 | + "ename": "NameError", |
| 94 | + "evalue": "name 'df' is not defined", |
| 95 | + "output_type": "error", |
| 96 | + "traceback": [ |
| 97 | + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", |
| 98 | + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", |
| 99 | + "Cell \u001b[1;32mIn[3], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Check for missing values\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mMissing values in the dataset:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(df\u001b[38;5;241m.\u001b[39misnull()\u001b[38;5;241m.\u001b[39msum())\n\u001b[0;32m 5\u001b[0m \u001b[38;5;66;03m# Handle missing values (for simplicity, fill missing Age values with the median and drop rows with missing Fare values)\u001b[39;00m\n\u001b[0;32m 6\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAge\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mfillna(df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAge\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmedian(), inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", |
| 100 | + "\u001b[1;31mNameError\u001b[0m: name 'df' is not defined" |
| 101 | + ] |
| 102 | + } |
| 103 | + ], |
| 104 | + "source": [ |
| 105 | + "\n", |
| 106 | + "# Check for missing values\n", |
| 107 | + "print(\"\\nMissing values in the dataset:\")\n", |
| 108 | + "print(df.isnull().sum())\n", |
| 109 | + "\n", |
| 110 | + "# Handle missing values (for simplicity, fill missing Age values with the median and drop rows with missing Fare values)\n", |
| 111 | + "df['Age'].fillna(df['Age'].median(), inplace=True)\n", |
| 112 | + "df.dropna(subset=['Fare'], inplace=True)\n", |
| 113 | + "\n", |
| 114 | + "# Verify that there are no more missing values\n", |
| 115 | + "print(\"\\nMissing values after handling:\")\n", |
| 116 | + "print(df.isnull().sum())\n", |
| 117 | + "\n", |
| 118 | + "# Summary statistics of the dataset\n", |
| 119 | + "print(\"\\nSummary statistics:\")\n", |
| 120 | + "print(df.describe())\n", |
| 121 | + "\n" |
| 122 | + ] |
| 123 | + }, |
| 124 | + { |
| 125 | + "cell_type": "code", |
| 126 | + "execution_count": null, |
| 127 | + "id": "0e6a758a", |
| 128 | + "metadata": {}, |
| 129 | + "outputs": [], |
| 130 | + "source": [ |
| 131 | + "# Exploratory Data Analysis\n", |
| 132 | + "# Plot the distribution of numerical features\n", |
| 133 | + "plt.figure(figsize=(12, 6))\n", |
| 134 | + "plt.subplot(1, 2, 1)\n", |
| 135 | + "sns.histplot(df['Age'], kde=True)\n", |
| 136 | + "plt.title('Distribution of Age')\n", |
| 137 | + "\n", |
| 138 | + "plt.subplot(1, 2, 2)\n", |
| 139 | + "sns.histplot(df['Fare'], kde=True)\n", |
| 140 | + "plt.title('Distribution of Fare')\n", |
| 141 | + "\n", |
| 142 | + "plt.tight_layout()\n", |
| 143 | + "plt.show()\n", |
| 144 | + "\n", |
| 145 | + "# Plot the survival rate based on Sex\n", |
| 146 | + "plt.figure(figsize=(6, 4))\n", |
| 147 | + "sns.countplot(x='Sex', hue='Survived', data=df)\n", |
| 148 | + "plt.title('Survival Rate by Sex')\n", |
| 149 | + "plt.show()\n", |
| 150 | + "\n", |
| 151 | + "# Convert categorical variable 'Sex' into dummy/indicator variables\n", |
| 152 | + "df = pd.get_dummies(df, columns=['Sex'], drop_first=True)\n", |
| 153 | + "\n", |
| 154 | + "# Display the first few rows of the dataframe after encoding\n", |
| 155 | + "print(\"\\nDataset after encoding categorical variables:\")\n", |
| 156 | + "print(df.head())\n" |
| 157 | + ] |
| 158 | + } |
| 159 | + ], |
| 160 | + "metadata": { |
| 161 | + "kernelspec": { |
| 162 | + "display_name": "Python 3 (ipykernel)", |
| 163 | + "language": "python", |
| 164 | + "name": "python3" |
| 165 | + }, |
| 166 | + "language_info": { |
| 167 | + "codemirror_mode": { |
| 168 | + "name": "ipython", |
| 169 | + "version": 3 |
| 170 | + }, |
| 171 | + "file_extension": ".py", |
| 172 | + "mimetype": "text/x-python", |
| 173 | + "name": "python", |
| 174 | + "nbconvert_exporter": "python", |
| 175 | + "pygments_lexer": "ipython3", |
| 176 | + "version": "3.11.3" |
| 177 | + } |
| 178 | + }, |
| 179 | + "nbformat": 4, |
| 180 | + "nbformat_minor": 5 |
| 181 | +} |
0 commit comments