Skip to content

Commit f42afc2

Browse files
committed
few details and README Added
1 parent b80d240 commit f42afc2

File tree

5 files changed

+123
-80
lines changed

5 files changed

+123
-80
lines changed

README.md

+17-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Data-Generator
2-
This code is to generate random dataset of Bangladeshi people's profile. A few attributes were considered like a person's NID,
2+
This code is to generate random dataset of **Bangladeshi people's profile**. A few attributes were considered like a person's NID,
33
name (first-name, middle name, last name), father's name (first-name, middle name, last name), mother's name (first-name, middle name,
44
last name), present address (house number, street, ward number, area, thana (Police Station), city, district, division) for people
55
living in cities, present address ( ward number, village, thana (Police Station), district, division) for people living in villages,
@@ -9,6 +9,7 @@ mobile, email if any, marital status, gender, place of birth, Passport number if
99
person (name as above, address as above, mobile and email), income, asset, tax, educational qualification (SSC, HSC, graduation,
1010
post graduation, PhD).
1111

12+
## Procedure
1213
First, a list of Bangladeshi people's names (in names1.csv file) were collected from the internet. All the names were fragmented to
1314
single names. Then different tags were assigned to each of them based on whether the name is a male or female name (gender), whether it
1415
is a Muslim, or other religion's name and whether it is a first, middle or last name (Positioning). Tags were used to generate more
@@ -23,3 +24,18 @@ of the fields were given and then randomly picked from them. For educational qua
2324
passed S.S.C from), H.S.C result, college (where a person passed H.S.C from), under graduation result, university, post graduation
2425
result, university were generated. Other personal information like marital status, height, weight also generated randomly.
2526

27+
### Package installation
28+
The scripts requires some libraries to be installed before executing. By running the following code all the required modules can be installed.
29+
```sh
30+
$ pip install -r requirements.txt
31+
```
32+
33+
### Run
34+
```sh
35+
$ python main.py
36+
```
37+
38+
It will take one input asking for how many rows to generate in the first line.
39+
40+
**Thank You**
41+

data_generator.py

+85-76
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,19 @@
11
import random
22
import string
3-
3+
import math
44

55
class Data_Generator():
66

7-
def __init__(self):
7+
"""
8+
Generates profile according to given names.
9+
Different attributes generated randomly.
10+
"""
11+
12+
def __init__(self, limit):
13+
14+
self.limit = limit
15+
self.mul = math.ceil(limit/350000)
16+
817
self.marital_status = {
918
1: 'Single',
1019
2: 'Married',
@@ -304,78 +313,78 @@ def __init__(self):
304313
self.new_generated_ec_mobile.append(self.generate_phone_no())
305314
self.new_generated_ec_email.append(self.generate_email(ec_fname, ec_lname))
306315

307-
def get_data(self, limit):
308-
Data = {'First name': self.new_generated_first_names[:limit],
309-
'Middle name': self.new_generated_middle_names[:limit],
310-
'Last name': self.new_generated_last_names[:limit],
311-
'NID': self.new_generated_NID[:limit],
312-
'Fathers First Name': self.new_generated_fathers_first_name[:limit],
313-
'Fathers Middle Name': self.new_generated_fathers_middle_name[:limit],
314-
'Fathers Last Name': self.new_generated_fathers_last_name[:limit],
315-
'Mothers First Name': self.new_generated_mothers_first_name[:limit],
316-
'Mothers Middle Name': self.new_generated_mothers_middle_name[:limit],
317-
'Mothers Last Name': self.new_generated_mothers_last_name[:limit],
318-
'Sex': self.new_generated_sex[:limit],
319-
'Religion': self.new_generated_religion[:limit],
320-
'Marital Status': self.new_generated_marital_status[:limit],
321-
'Height': self.new_generated_height[:limit],
322-
'Weight': self.new_generated_weight[:limit],
323-
'Profession': self.new_generated_profession[:limit],
324-
'Income': self.new_generated_salary[:limit],
325-
'Income Tax' : self.new_generated_income_tax[:limit],
326-
'TIN No': self.new_generated_tin_no[:limit],
327-
'Assets': self.new_generated_assets[:limit],
328-
'Date of Birth': self.new_generated_DOB[:limit],
329-
'Passport No': self.new_generated_passport_no[:limit],
330-
'Email Address': self.new_generated_emails[:limit],
331-
'Mobile No': self.new_generated_mobile_no[:limit],
332-
'Prsnt_addr_house_no': self.new_generated_present_addr_house_no[:limit],
333-
'Prsnt_addr_street_no': self.new_generated_present_addr_street_no[:limit],
334-
'Prsnt_addr_ward_no': self.new_generated_present_addr_word_no[:limit],
335-
'Prsnt_addr_village': self.new_generated_present_addr_village[:limit],
336-
'Prsnt_addr_thana': self.new_generated_present_addr_thana[:limit],
337-
'Prsnt_addr_city': self.new_generated_present_addr_city[:limit],
338-
'Prsnt_addr_district': self.new_generated_present_addr_district[:limit],
339-
'Prsnt_addr_division': self.new_generated_present_addr_division[:limit],
340-
'Prmntn_addr_house_no': self.new_generated_permanent_addr_house_no[:limit],
341-
'Prmntn_addr_street_no': self.new_generated_permanent_addr_street_no[:limit],
342-
'Prmntn_addr_ward_no': self.new_generated_permanent_addr_word_no[:limit],
343-
'Prmntn_addr_village': self.new_generated_permanent_addr_village[:limit],
344-
'Prmntn_addr_thana': self.new_generated_permanent_addr_thana[:limit],
345-
'Prmntn_addr_city': self.new_generated_permanent_addr_city[:limit],
346-
'Prmntn_addr_district': self.new_generated_permanent_addr_district[:limit],
347-
'Prmntn_addr_division': self.new_generated_permanent_addr_division[:limit],
348-
'SSC Passed From': self.new_generated_school[:limit],
349-
'SSC Result': self.new_generated_SSC_gpa[:limit],
350-
'HSC Passed From': self.new_generated_college[:limit],
351-
'HSC Result': self.new_generated_HSC_gpa[:limit],
352-
'Graduated From': self.new_generated_ug[:limit],
353-
'Undergrad Result': self.new_generated_ug_cgpa[:limit],
354-
'Post Graduated From': self.new_generated_grad[:limit],
355-
'Postgrad Result': self.new_generated_grad_cgpa[:limit],
356-
'Doctorate From': self.new_generated_phd[:limit],
357-
'Place of Birth': self.new_generated_place_of_birth[:limit],
358-
'EC First Name': self.new_generated_ec_firstname[:limit],
359-
'EC Middle Name': self.new_generated_ec_middlename[:limit],
360-
'EC Last Name': self.new_generated_ec_lastname[:limit],
361-
'EC_Prsnt_addr_house_no': self.new_generated_ec_present_addr_house_no[:limit],
362-
'EC_Prsnt_addr_street_no': self.new_generated_ec_present_addr_street_no[:limit],
363-
'EC_Prsnt_addr_ward_no': self.new_generated_ec_present_addr_word_no[:limit],
364-
'EC_Prsnt_addr_village': self.new_generated_ec_present_addr_village[:limit],
365-
'EC_Prsnt_addr_thana': self.new_generated_ec_present_addr_thana[:limit],
366-
'EC_Prsnt_addr_city': self.new_generated_ec_present_addr_city[:limit],
367-
'EC_Prsnt_addr_district': self.new_generated_ec_present_addr_district[:limit],
368-
'EC_Prsnt_addr_division': self.new_generated_ec_present_addr_division[:limit],
369-
'EC_Prmntn_addr_house_no': self.new_generated_ec_permanent_addr_house_no[:limit],
370-
'EC_Prmntn_addr_street_no': self.new_generated_ec_permanent_addr_street_no[:limit],
371-
'EC_Prmntn_addr_ward_no': self.new_generated_ec_permanent_addr_word_no[:limit],
372-
'EC_Prmntn_addr_village': self.new_generated_ec_permanent_addr_village[:limit],
373-
'EC_Prmntn_addr_thana': self.new_generated_ec_permanent_addr_thana[:limit],
374-
'EC_Prmntn_addr_city': self.new_generated_ec_permanent_addr_city[:limit],
375-
'EC_Prmntn_addr_district': self.new_generated_ec_permanent_addr_district[:limit],
376-
'EC_Prmntn_addr_division': self.new_generated_ec_permanent_addr_division[:limit],
377-
'EC Mobile No': self.new_generated_ec_mobile[:limit],
378-
'EC Email Address': self.new_generated_ec_email[:limit],
316+
def get_data(self):
317+
Data = {'First name': self.new_generated_first_names[:self.limit],
318+
'Middle name': self.new_generated_middle_names[:self.limit],
319+
'Last name': self.new_generated_last_names[:self.limit],
320+
'NID': self.new_generated_NID[:self.limit],
321+
'Fathers First Name': self.new_generated_fathers_first_name[:self.limit],
322+
'Fathers Middle Name': self.new_generated_fathers_middle_name[:self.limit],
323+
'Fathers Last Name': self.new_generated_fathers_last_name[:self.limit],
324+
'Mothers First Name': self.new_generated_mothers_first_name[:self.limit],
325+
'Mothers Middle Name': self.new_generated_mothers_middle_name[:self.limit],
326+
'Mothers Last Name': self.new_generated_mothers_last_name[:self.limit],
327+
'Sex': self.new_generated_sex[:self.limit],
328+
'Religion': self.new_generated_religion[:self.limit],
329+
'Marital Status': self.new_generated_marital_status[:self.limit],
330+
'Height': self.new_generated_height[:self.limit],
331+
'Weight': self.new_generated_weight[:self.limit],
332+
'Profession': self.new_generated_profession[:self.limit],
333+
'Income': self.new_generated_salary[:self.limit],
334+
'Income Tax' : self.new_generated_income_tax[:self.limit],
335+
'TIN No': self.new_generated_tin_no[:self.limit],
336+
'Assets': self.new_generated_assets[:self.limit],
337+
'Date of Birth': self.new_generated_DOB[:self.limit],
338+
'Passport No': self.new_generated_passport_no[:self.limit],
339+
'Email Address': self.new_generated_emails[:self.limit],
340+
'Mobile No': self.new_generated_mobile_no[:self.limit],
341+
'Prsnt_addr_house_no': self.new_generated_present_addr_house_no[:self.limit],
342+
'Prsnt_addr_street_no': self.new_generated_present_addr_street_no[:self.limit],
343+
'Prsnt_addr_ward_no': self.new_generated_present_addr_word_no[:self.limit],
344+
'Prsnt_addr_village': self.new_generated_present_addr_village[:self.limit],
345+
'Prsnt_addr_thana': self.new_generated_present_addr_thana[:self.limit],
346+
'Prsnt_addr_city': self.new_generated_present_addr_city[:self.limit],
347+
'Prsnt_addr_district': self.new_generated_present_addr_district[:self.limit],
348+
'Prsnt_addr_division': self.new_generated_present_addr_division[:self.limit],
349+
'Prmntn_addr_house_no': self.new_generated_permanent_addr_house_no[:self.limit],
350+
'Prmntn_addr_street_no': self.new_generated_permanent_addr_street_no[:self.limit],
351+
'Prmntn_addr_ward_no': self.new_generated_permanent_addr_word_no[:self.limit],
352+
'Prmntn_addr_village': self.new_generated_permanent_addr_village[:self.limit],
353+
'Prmntn_addr_thana': self.new_generated_permanent_addr_thana[:self.limit],
354+
'Prmntn_addr_city': self.new_generated_permanent_addr_city[:self.limit],
355+
'Prmntn_addr_district': self.new_generated_permanent_addr_district[:self.limit],
356+
'Prmntn_addr_division': self.new_generated_permanent_addr_division[:self.limit],
357+
'SSC Passed From': self.new_generated_school[:self.limit],
358+
'SSC Result': self.new_generated_SSC_gpa[:self.limit],
359+
'HSC Passed From': self.new_generated_college[:self.limit],
360+
'HSC Result': self.new_generated_HSC_gpa[:self.limit],
361+
'Graduated From': self.new_generated_ug[:self.limit],
362+
'Undergrad Result': self.new_generated_ug_cgpa[:self.limit],
363+
'Post Graduated From': self.new_generated_grad[:self.limit],
364+
'Postgrad Result': self.new_generated_grad_cgpa[:self.limit],
365+
'Doctorate From': self.new_generated_phd[:self.limit],
366+
'Place of Birth': self.new_generated_place_of_birth[:self.limit],
367+
'EC First Name': self.new_generated_ec_firstname[:self.limit],
368+
'EC Middle Name': self.new_generated_ec_middlename[:self.limit],
369+
'EC Last Name': self.new_generated_ec_lastname[:self.limit],
370+
'EC_Prsnt_addr_house_no': self.new_generated_ec_present_addr_house_no[:self.limit],
371+
'EC_Prsnt_addr_street_no': self.new_generated_ec_present_addr_street_no[:self.limit],
372+
'EC_Prsnt_addr_ward_no': self.new_generated_ec_present_addr_word_no[:self.limit],
373+
'EC_Prsnt_addr_village': self.new_generated_ec_present_addr_village[:self.limit],
374+
'EC_Prsnt_addr_thana': self.new_generated_ec_present_addr_thana[:self.limit],
375+
'EC_Prsnt_addr_city': self.new_generated_ec_present_addr_city[:self.limit],
376+
'EC_Prsnt_addr_district': self.new_generated_ec_present_addr_district[:self.limit],
377+
'EC_Prsnt_addr_division': self.new_generated_ec_present_addr_division[:self.limit],
378+
'EC_Prmntn_addr_house_no': self.new_generated_ec_permanent_addr_house_no[:self.limit],
379+
'EC_Prmntn_addr_street_no': self.new_generated_ec_permanent_addr_street_no[:self.limit],
380+
'EC_Prmntn_addr_ward_no': self.new_generated_ec_permanent_addr_word_no[:self.limit],
381+
'EC_Prmntn_addr_village': self.new_generated_ec_permanent_addr_village[:self.limit],
382+
'EC_Prmntn_addr_thana': self.new_generated_ec_permanent_addr_thana[:self.limit],
383+
'EC_Prmntn_addr_city': self.new_generated_ec_permanent_addr_city[:self.limit],
384+
'EC_Prmntn_addr_district': self.new_generated_ec_permanent_addr_district[:self.limit],
385+
'EC_Prmntn_addr_division': self.new_generated_ec_permanent_addr_division[:self.limit],
386+
'EC Mobile No': self.new_generated_ec_mobile[:self.limit],
387+
'EC Email Address': self.new_generated_ec_email[:self.limit],
379388
}
380389

381390
columns = ['First name', 'Middle name', 'Last name', 'NID', 'Fathers First Name', \
@@ -406,11 +415,11 @@ def generate_profile(self, first_names, middle_names, last_names, gender, religi
406415
for lastname in last_names:
407416
if middle_names != None:
408417
for middlename in middle_names:
409-
for i in range(2):
418+
for i in range(self.mul):
410419
self.generate_single_profile(first_names, last_names, firstname, middlename, lastname, gender, religion)
411420
#
412421
else:
413-
for i in range(2):
422+
for i in range(self.mul):
414423
self.generate_single_profile(first_names, last_names, firstname, None, lastname, gender, religion)
415424

416425
def generate_single_profile(self, first_names, last_names, firstname, middlename, lastname, gender, religion):

data_loader.py

+4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33

44
class Data_Loader():
55

6+
"""
7+
Loads csv file into a Pandas DataFrame by shuffling all rows.
8+
Fragments names by poistioning, religion and gender.
9+
"""
610

711
def __init__(self, Data=None):
812
self.dataframe = Data

main.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,26 @@
22
from data_generator import Data_Generator
33

44
if __name__ == "__main__":
5+
6+
# Enter how many rows to generate
7+
limit = int(input("Enter number of rows to generate: "))
8+
9+
# Initializing classes, Data_Loader(), Data_Generator
510
loader = Data_Loader()
6-
generator = Data_Generator()
11+
generator = Data_Generator(limit)
712

13+
# Loads signle names from a csv file into a dataframe
814
filename = 'names1.csv'
915
df = loader.read_csvfile(filename)
1016

17+
# Generates firstnames, middlenames, lastnames for every religion and gender
1118
for religion in loader.religion:
1219
for gender in loader.gender:
1320
firstnames, middlenames, lastnames = loader.get_names_by_religion_gender(religion, gender)
1421

1522
for i in range(2):
1623
if i % 2 == 0:
24+
# Generates 2 word length names
1725
if gender == 'unisex':
1826
gender1 = 'male'
1927
firstnames1, middlenames1 , lastnames1 = loader.get_names_by_religion_gender(religion, gender1)
@@ -27,6 +35,7 @@
2735
last_names2 = lastnames + lastnames2
2836
generator.generate_profile(first_names2, None, last_names2, gender2, religion)
2937
else:
38+
# Generates 3 word length names
3039
if gender == 'unisex':
3140
gender3 = 'male'
3241
firstnames3, middlenames3 , lastnames3 = loader.get_names_by_religion_gender(religion, gender3)
@@ -41,8 +50,11 @@
4150
middle_names4 = middlenames + middlenames4
4251
last_names4 = lastnames + lastnames4
4352
generator.generate_profile(first_names4, middle_names4, last_names4, gender4, religion)
44-
53+
54+
# Gets all data already generated by data_generator class
4555
data, collumns = generator.get_data()
46-
filename = 'new.csv'
56+
57+
# Saves all rows and columns into a csv file
58+
filename = str(limit) + '_rows.csv'
4759
saved_data = loader.save_csvfile(filename, data, collumns)
4860
print(saved_data)

requirements.txt

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
pandas==0.23.4
2+
scikit-learn==0.20.1

0 commit comments

Comments
 (0)