Skip to content

Commit 0bba187

Browse files
committed
Data Wrangling
1 parent 99962e1 commit 0bba187

File tree

1 file changed

+160
-0
lines changed

1 file changed

+160
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
/**************** Data Wrangling / Data Munging *************/
2+
3+
SELECT DISTINCT(department)
4+
FROM staff
5+
ORDER BY department;
6+
7+
8+
/********* Reformatting Characters Data *********/
9+
10+
SELECT DISTINCT(UPPER(department))
11+
FROM staff
12+
ORDER BY 1;
13+
14+
15+
SELECT DISTINCT(LOWER(department))
16+
FROM staff
17+
ORDER BY 1;
18+
19+
20+
/*** Concatetation ***/
21+
SELECT
22+
last_name,
23+
job_title || ' - ' || department AS title_with_department
24+
FROM staff;
25+
26+
/*** Trim ***/
27+
SELECT
28+
TRIM(' data sciece rocks ! ');
29+
30+
-- with trim is 19 characters
31+
SELECT
32+
LENGTH(TRIM(' data sciece rocks ! '));
33+
34+
-- without trim is 27 characters
35+
SELECT
36+
LENGTH(' data sciece rocks ! ');
37+
38+
39+
/* How many employees with Assistant roles */
40+
SELECT
41+
COUNT(*) AS employees_with_Assistant_role
42+
FROM staff
43+
WHERE job_title LIKE '%Assistant%';
44+
45+
46+
/* What are those Assistant roles? */
47+
SELECT DISTINCT(job_title)
48+
FROM staff
49+
WHERE job_title LIKE '%Assistant%'
50+
ORDER BY 1;
51+
52+
53+
/* let's check which roles are assistant role or not */
54+
SELECT
55+
DISTINCT(job_title),
56+
job_title LIKE '%Assistant%' is_assistant_role
57+
FROM staff
58+
ORDER BY 1;
59+
60+
--------------------------------------------------------------------------------------------
61+
62+
63+
/********* Extracting Strings from Characters *********/
64+
-- SUBSTRING('string' FROM position FOR how_many)
65+
66+
---------------------- SubString words ----------------------------------------------------
67+
SELECT 'abcdefghijkl' as test_string;
68+
69+
70+
SELECT
71+
SUBSTRING('abcdefghikl' FROM 5 FOR 3) as sub_string;
72+
73+
74+
SELECT
75+
SUBSTRING('abcdefghikl' FROM 5) as sub_string;
76+
77+
78+
SELECT job_title
79+
FROM staff
80+
WHERE job_title LIKE 'Assistant%';
81+
82+
83+
/* We want to extract job category from the assistant position which starts with word Assisant */
84+
SELECT
85+
SUBSTRING(job_title FROM LENGTH('Assistant')+1) AS job_category,
86+
job_title
87+
FROM staff
88+
WHERE job_title LIKE 'Assistant%';
89+
90+
91+
/* As there are several duplicated ones, we want to know only unique ones */
92+
SELECT
93+
DISTINCT(SUBSTRING(job_title FROM LENGTH('Assistant')+1)) AS job_category,
94+
job_title
95+
FROM staff
96+
WHERE job_title LIKE 'Assistant%';
97+
98+
99+
---------------------- Replacing words ----------------------------------------------------
100+
101+
/* we want to replace word Assistant with Asst. */
102+
SELECT
103+
OVERLAY(job_title PLACING 'Asst.' FROM 1 FOR LENGTH('Assistant')) AS shorten_job_title
104+
FROM staff
105+
WHERE job_title LIKE 'Assistant%';
106+
107+
108+
--------------------------------------------------------------------------------------------
109+
110+
/********* Filtering with Regualar Expressions *********/
111+
-- SIMILAR TO
112+
113+
/* We want to know job title with Assistant with Level 3 and 4 */
114+
-- we will put the desired words into group
115+
-- Pipe character | is for OR condition
116+
SELECT
117+
job_title
118+
FROM staff
119+
WHERE job_title SIMILAR TO '%Assistant%(III|IV)';
120+
121+
122+
/* now we want to know job title with Assistant, started with roman numerial I, follwed by 1 character
123+
it can be II,IV, etc.. as long as it starts with character I
124+
125+
underscore _ : for one character */
126+
127+
SELECT
128+
DISTINCT(job_title)
129+
FROM staff
130+
WHERE job_title SIMILAR TO '%Assistant I_';
131+
132+
133+
/* job title starts with either E, P or S character , followed by any characters*/
134+
SELECT job_title
135+
FROM staff
136+
WHERE job_title SIMILAR TO '[EPS]%';
137+
138+
--------------------------------------------------------------------------------------------
139+
140+
/********* Reformatting Numerics Data *********/
141+
-- TRUNC() Truncate values Note: trunc just truncate value, not rounding value.
142+
-- CEIL
143+
-- FLOOR
144+
-- ROUND
145+
146+
SELECT
147+
department,
148+
AVG(salary) AS avg_salary,
149+
TRUNC(AVG(salary)) AS truncated_salary,
150+
TRUNC(AVG(salary), 2) AS truncated_salary_2_decimal,
151+
ROUND(AVG(salary), 2) AS rounded_salary,
152+
CEIL(AVG(salary)) AS ceiling_salary,
153+
FLOOR(AVG(salary)) AS floor_salary
154+
FROM staff
155+
GROUP BY department;
156+
157+
158+
159+
160+

0 commit comments

Comments
 (0)