|
13 | 13 | },
|
14 | 14 | {
|
15 | 15 | "cell_type": "code",
|
16 |
| - "execution_count": 7, |
| 16 | + "execution_count": 2, |
17 | 17 | "metadata": {},
|
18 | 18 | "outputs": [],
|
19 | 19 | "source": [
|
|
5374 | 5374 | "(df.isnull().sum()/df.shape[0]).sort_values().plot.bar(ylim=(0,1),grid=True)"
|
5375 | 5375 | ]
|
5376 | 5376 | },
|
5377 |
| - { |
5378 |
| - "cell_type": "markdown", |
5379 |
| - "metadata": {}, |
5380 |
| - "source": [ |
5381 |
| - "## Data ex\n", |
5382 |
| - "\n", |
5383 |
| - "https://web.archive.org/web/20180506061559/http://lisp.vse.cz/pkdd99/Challenge/chall.htm" |
5384 |
| - ] |
5385 |
| - }, |
5386 |
| - { |
5387 |
| - "cell_type": "code", |
5388 |
| - "execution_count": 8, |
5389 |
| - "metadata": {}, |
5390 |
| - "outputs": [], |
5391 |
| - "source": [ |
5392 |
| - "import os" |
5393 |
| - ] |
5394 |
| - }, |
5395 |
| - { |
5396 |
| - "cell_type": "code", |
5397 |
| - "execution_count": 10, |
5398 |
| - "metadata": {}, |
5399 |
| - "outputs": [], |
5400 |
| - "source": [ |
5401 |
| - "files = os.listdir('auxiliary/data_ex')" |
5402 |
| - ] |
5403 |
| - }, |
5404 |
| - { |
5405 |
| - "cell_type": "code", |
5406 |
| - "execution_count": 11, |
5407 |
| - "metadata": {}, |
5408 |
| - "outputs": [ |
5409 |
| - { |
5410 |
| - "data": { |
5411 |
| - "text/plain": [ |
5412 |
| - "['account.asc',\n", |
5413 |
| - " 'card.asc',\n", |
5414 |
| - " 'client.asc',\n", |
5415 |
| - " 'disp.asc',\n", |
5416 |
| - " 'district.asc',\n", |
5417 |
| - " 'loan.asc',\n", |
5418 |
| - " 'order.asc',\n", |
5419 |
| - " 'trans.asc']" |
5420 |
| - ] |
5421 |
| - }, |
5422 |
| - "execution_count": 11, |
5423 |
| - "metadata": {}, |
5424 |
| - "output_type": "execute_result" |
5425 |
| - } |
5426 |
| - ], |
5427 |
| - "source": [ |
5428 |
| - "files" |
5429 |
| - ] |
5430 |
| - }, |
5431 |
| - { |
5432 |
| - "cell_type": "code", |
5433 |
| - "execution_count": 14, |
5434 |
| - "metadata": {}, |
5435 |
| - "outputs": [ |
5436 |
| - { |
5437 |
| - "name": "stderr", |
5438 |
| - "output_type": "stream", |
5439 |
| - "text": [ |
5440 |
| - "C:\\Users\\Martin Hronec\\AppData\\Local\\Temp\\ipykernel_22912\\2341300206.py:3: DtypeWarning: Columns (8) have mixed types. Specify dtype option on import or set low_memory=False.\n", |
5441 |
| - " data[file_name.split('.')[0]] = pd.read_table('auxiliary/data_ex/' + file_name, sep = ';')\n" |
5442 |
| - ] |
5443 |
| - } |
5444 |
| - ], |
5445 |
| - "source": [ |
5446 |
| - "data = {}\n", |
5447 |
| - "for file_name in files: \n", |
5448 |
| - " data[file_name.split('.')[0]] = pd.read_table('auxiliary/data_ex/' + file_name, sep = ';')\n", |
5449 |
| - " " |
5450 |
| - ] |
5451 |
| - }, |
5452 |
| - { |
5453 |
| - "cell_type": "code", |
5454 |
| - "execution_count": 16, |
5455 |
| - "metadata": {}, |
5456 |
| - "outputs": [ |
5457 |
| - { |
5458 |
| - "data": { |
5459 |
| - "text/html": [ |
5460 |
| - "<div>\n", |
5461 |
| - "<style scoped>\n", |
5462 |
| - " .dataframe tbody tr th:only-of-type {\n", |
5463 |
| - " vertical-align: middle;\n", |
5464 |
| - " }\n", |
5465 |
| - "\n", |
5466 |
| - " .dataframe tbody tr th {\n", |
5467 |
| - " vertical-align: top;\n", |
5468 |
| - " }\n", |
5469 |
| - "\n", |
5470 |
| - " .dataframe thead th {\n", |
5471 |
| - " text-align: right;\n", |
5472 |
| - " }\n", |
5473 |
| - "</style>\n", |
5474 |
| - "<table border=\"1\" class=\"dataframe\">\n", |
5475 |
| - " <thead>\n", |
5476 |
| - " <tr style=\"text-align: right;\">\n", |
5477 |
| - " <th></th>\n", |
5478 |
| - " <th>account_id</th>\n", |
5479 |
| - " <th>district_id</th>\n", |
5480 |
| - " <th>frequency</th>\n", |
5481 |
| - " <th>date</th>\n", |
5482 |
| - " </tr>\n", |
5483 |
| - " </thead>\n", |
5484 |
| - " <tbody>\n", |
5485 |
| - " <tr>\n", |
5486 |
| - " <th>0</th>\n", |
5487 |
| - " <td>576</td>\n", |
5488 |
| - " <td>55</td>\n", |
5489 |
| - " <td>POPLATEK MESICNE</td>\n", |
5490 |
| - " <td>930101</td>\n", |
5491 |
| - " </tr>\n", |
5492 |
| - " <tr>\n", |
5493 |
| - " <th>1</th>\n", |
5494 |
| - " <td>3818</td>\n", |
5495 |
| - " <td>74</td>\n", |
5496 |
| - " <td>POPLATEK MESICNE</td>\n", |
5497 |
| - " <td>930101</td>\n", |
5498 |
| - " </tr>\n", |
5499 |
| - " <tr>\n", |
5500 |
| - " <th>2</th>\n", |
5501 |
| - " <td>704</td>\n", |
5502 |
| - " <td>55</td>\n", |
5503 |
| - " <td>POPLATEK MESICNE</td>\n", |
5504 |
| - " <td>930101</td>\n", |
5505 |
| - " </tr>\n", |
5506 |
| - " <tr>\n", |
5507 |
| - " <th>3</th>\n", |
5508 |
| - " <td>2378</td>\n", |
5509 |
| - " <td>16</td>\n", |
5510 |
| - " <td>POPLATEK MESICNE</td>\n", |
5511 |
| - " <td>930101</td>\n", |
5512 |
| - " </tr>\n", |
5513 |
| - " <tr>\n", |
5514 |
| - " <th>4</th>\n", |
5515 |
| - " <td>2632</td>\n", |
5516 |
| - " <td>24</td>\n", |
5517 |
| - " <td>POPLATEK MESICNE</td>\n", |
5518 |
| - " <td>930102</td>\n", |
5519 |
| - " </tr>\n", |
5520 |
| - " <tr>\n", |
5521 |
| - " <th>...</th>\n", |
5522 |
| - " <td>...</td>\n", |
5523 |
| - " <td>...</td>\n", |
5524 |
| - " <td>...</td>\n", |
5525 |
| - " <td>...</td>\n", |
5526 |
| - " </tr>\n", |
5527 |
| - " <tr>\n", |
5528 |
| - " <th>4495</th>\n", |
5529 |
| - " <td>124</td>\n", |
5530 |
| - " <td>55</td>\n", |
5531 |
| - " <td>POPLATEK MESICNE</td>\n", |
5532 |
| - " <td>971228</td>\n", |
5533 |
| - " </tr>\n", |
5534 |
| - " <tr>\n", |
5535 |
| - " <th>4496</th>\n", |
5536 |
| - " <td>3958</td>\n", |
5537 |
| - " <td>59</td>\n", |
5538 |
| - " <td>POPLATEK MESICNE</td>\n", |
5539 |
| - " <td>971228</td>\n", |
5540 |
| - " </tr>\n", |
5541 |
| - " <tr>\n", |
5542 |
| - " <th>4497</th>\n", |
5543 |
| - " <td>777</td>\n", |
5544 |
| - " <td>30</td>\n", |
5545 |
| - " <td>POPLATEK MESICNE</td>\n", |
5546 |
| - " <td>971228</td>\n", |
5547 |
| - " </tr>\n", |
5548 |
| - " <tr>\n", |
5549 |
| - " <th>4498</th>\n", |
5550 |
| - " <td>1573</td>\n", |
5551 |
| - " <td>63</td>\n", |
5552 |
| - " <td>POPLATEK MESICNE</td>\n", |
5553 |
| - " <td>971229</td>\n", |
5554 |
| - " </tr>\n", |
5555 |
| - " <tr>\n", |
5556 |
| - " <th>4499</th>\n", |
5557 |
| - " <td>3276</td>\n", |
5558 |
| - " <td>1</td>\n", |
5559 |
| - " <td>POPLATEK MESICNE</td>\n", |
5560 |
| - " <td>971229</td>\n", |
5561 |
| - " </tr>\n", |
5562 |
| - " </tbody>\n", |
5563 |
| - "</table>\n", |
5564 |
| - "<p>4500 rows × 4 columns</p>\n", |
5565 |
| - "</div>" |
5566 |
| - ], |
5567 |
| - "text/plain": [ |
5568 |
| - " account_id district_id frequency date\n", |
5569 |
| - "0 576 55 POPLATEK MESICNE 930101\n", |
5570 |
| - "1 3818 74 POPLATEK MESICNE 930101\n", |
5571 |
| - "2 704 55 POPLATEK MESICNE 930101\n", |
5572 |
| - "3 2378 16 POPLATEK MESICNE 930101\n", |
5573 |
| - "4 2632 24 POPLATEK MESICNE 930102\n", |
5574 |
| - "... ... ... ... ...\n", |
5575 |
| - "4495 124 55 POPLATEK MESICNE 971228\n", |
5576 |
| - "4496 3958 59 POPLATEK MESICNE 971228\n", |
5577 |
| - "4497 777 30 POPLATEK MESICNE 971228\n", |
5578 |
| - "4498 1573 63 POPLATEK MESICNE 971229\n", |
5579 |
| - "4499 3276 1 POPLATEK MESICNE 971229\n", |
5580 |
| - "\n", |
5581 |
| - "[4500 rows x 4 columns]" |
5582 |
| - ] |
5583 |
| - }, |
5584 |
| - "execution_count": 16, |
5585 |
| - "metadata": {}, |
5586 |
| - "output_type": "execute_result" |
5587 |
| - } |
5588 |
| - ], |
5589 |
| - "source": [ |
5590 |
| - "data['account']" |
5591 |
| - ] |
5592 |
| - }, |
5593 | 5377 | {
|
5594 | 5378 | "cell_type": "markdown",
|
5595 | 5379 | "metadata": {
|
|
5810 | 5594 | "name": "python",
|
5811 | 5595 | "nbconvert_exporter": "python",
|
5812 | 5596 | "pygments_lexer": "ipython3",
|
5813 |
| - "version": "3.10.7" |
| 5597 | + "version": "0.0.0" |
5814 | 5598 | },
|
5815 | 5599 | "microsoft": {
|
5816 | 5600 | "ms_spell_check": {
|
|
0 commit comments