-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathetl-proces-chinook.sql
322 lines (288 loc) · 9.3 KB
/
etl-proces-chinook.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
CREATE DATABASE RACCOON_CHINOOK_DB;
USE DATABASE RACCOON_CHINOOK_DB;
CREATE SCHEMA RACCOON_CHINOOK_DB.STAGING;
USE SCHEMA RACCOON_CHINOOK_DB.STAGING;
CREATE OR REPLACE STAGE my_stage;
CREATE TABLE Artist_staging (
ArtistId INT PRIMARY KEY,
Name VARCHAR(120)
);
CREATE TABLE Album_staging (
AlbumId INT PRIMARY KEY,
Title VARCHAR(160),
ArtistId INT,
FOREIGN KEY (ArtistId) REFERENCES Artist_staging(ArtistId)
);
CREATE TABLE MediaType_staging (
MediaTypeId INT PRIMARY KEY,
Name VARCHAR(120)
);
CREATE TABLE Genre_staging (
GenreId INT PRIMARY KEY,
Name VARCHAR(120)
);
CREATE TABLE Track_staging (
TrackId INT PRIMARY KEY,
Name VARCHAR(200),
AlbumId INT,
MediaTypeId INT,
GenreId INT,
Composer VARCHAR(220),
Milliseconds INT,
Bytes INT,
UnitPrice DECIMAL(10, 2),
FOREIGN KEY (AlbumId) REFERENCES Album_staging(AlbumId),
FOREIGN KEY (MediaTypeId) REFERENCES MediaType_staging(MediaTypeId),
FOREIGN KEY (GenreId) REFERENCES Genre_staging(GenreId)
);
CREATE TABLE Playlist_staging (
PlaylistId INT PRIMARY KEY,
Name VARCHAR(120)
);
CREATE TABLE PlaylistTrack_staging (
PlaylistId INT,
TrackId INT,
PRIMARY KEY (PlaylistId, TrackId),
FOREIGN KEY (PlaylistId) REFERENCES Playlist_staging(PlaylistId),
FOREIGN KEY (TrackId) REFERENCES Track_staging(TrackId)
);
CREATE TABLE Employee_staging (
EmployeeId INT PRIMARY KEY,
FirstName VARCHAR(20),
LastName VARCHAR(20),
Title VARCHAR(30),
ReportsTo INT,
BirthDate DATETIME,
HireDate DATETIME,
Address VARCHAR(70),
City VARCHAR(40),
State VARCHAR(40),
Country VARCHAR(40),
PostalCode VARCHAR(10),
Phone VARCHAR(24),
Fax VARCHAR(24),
Email VARCHAR(60),
FOREIGN KEY (ReportsTo) REFERENCES Employee_staging(EmployeeId)
);
CREATE TABLE Customer_staging (
CustomerId INT PRIMARY KEY,
FirstName VARCHAR(40),
LastName VARCHAR(40),
Company VARCHAR(80),
Address VARCHAR(70),
City VARCHAR(40),
State VARCHAR(40),
Country VARCHAR(40),
PostalCode VARCHAR(10),
Phone VARCHAR(24),
Fax VARCHAR(24),
Email VARCHAR(60),
SupportRepId INT,
FOREIGN KEY (SupportRepId) REFERENCES Employee_staging(EmployeeId)
);
CREATE TABLE Invoice_staging (
InvoiceId INT PRIMARY KEY,
CustomerId INT,
InvoiceDate DATETIME,
BillingAddress VARCHAR(70),
BillingCity VARCHAR(40),
BillingState VARCHAR(40),
BillingCountry VARCHAR(40),
BillingPostalCode VARCHAR(10),
Total DECIMAL(10, 2),
FOREIGN KEY (CustomerId) REFERENCES Customer_staging(CustomerId)
);
CREATE TABLE InvoiceLine_staging (
InvoiceLineId INT PRIMARY KEY,
InvoiceId INT,
TrackId INT,
UnitPrice DECIMAL(10, 2),
Quantity INT,
FOREIGN KEY (InvoiceId) REFERENCES Invoice_staging(InvoiceId),
FOREIGN KEY (TrackId) REFERENCES Track_staging(TrackId)
);
COPY INTO Artist_staging
FROM @my_stage/artist.csv
FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1);
COPY INTO Album_staging
FROM @my_stage/album.csv
FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1);
COPY INTO MediaType_staging
FROM @my_stage/mediatype.csv
FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1);
COPY INTO Genre_staging
FROM @my_stage/genre.csv
FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1);
COPY INTO Track_staging
FROM @my_stage/track.csv
FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1);
COPY INTO Playlist_staging
FROM @my_stage/playlist.csv
FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1);
COPY INTO PlaylistTrack_staging
FROM @my_stage/playlisttrack.csv
FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1);
COPY INTO Employee_staging
FROM @my_stage/employee.csv
FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1)
ON_ERROR = 'CONTINUE';
COPY INTO Customer_staging
FROM @my_stage/customer.csv
FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1);
COPY INTO Invoice_staging
FROM @my_stage/invoice.csv
FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1);
COPY INTO InvoiceLine_staging
FROM @my_stage/invoiceline.csv
FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1);
CREATE TABLE Date_Dim AS
SELECT DISTINCT
ROW_NUMBER() OVER (ORDER BY CAST(i.InvoiceDate AS DATE)) AS Dim_DateId,
CAST(i.InvoiceDate AS DATE) AS Date,
DATE_PART('day', i.InvoiceDate) AS Day,
DATE_PART('month', i.InvoiceDate) AS Month,
DATE_PART('year', i.InvoiceDate) AS Year,
CASE
WHEN DATE_PART('dow', i.InvoiceDate) = 0 THEN 'Sunday'
WHEN DATE_PART('dow', i.InvoiceDate) = 1 THEN 'Monday'
WHEN DATE_PART('dow', i.InvoiceDate) = 2 THEN 'Tuesday'
WHEN DATE_PART('dow', i.InvoiceDate) = 3 THEN 'Wednesday'
WHEN DATE_PART('dow', i.InvoiceDate) = 4 THEN 'Thursday'
WHEN DATE_PART('dow', i.InvoiceDate) = 5 THEN 'Friday'
WHEN DATE_PART('dow', i.InvoiceDate) = 6 THEN 'Saturday'
END AS Weekday,
DATE_PART('dow', i.InvoiceDate) + 1 AS Day_Week,
EXTRACT(WEEK FROM DATE_TRUNC('WEEK', i.InvoiceDate + INTERVAL '1 DAY')) AS Week,
CASE
WHEN DATE_PART('month', i.InvoiceDate) = 1 THEN 'January'
WHEN DATE_PART('month', i.InvoiceDate) = 2 THEN 'February'
WHEN DATE_PART('month', i.InvoiceDate) = 3 THEN 'March'
WHEN DATE_PART('month', i.InvoiceDate) = 4 THEN 'April'
WHEN DATE_PART('month', i.InvoiceDate) = 5 THEN 'May'
WHEN DATE_PART('month', i.InvoiceDate) = 6 THEN 'June'
WHEN DATE_PART('month', i.InvoiceDate) = 7 THEN 'July'
WHEN DATE_PART('month', i.InvoiceDate) = 8 THEN 'August'
WHEN DATE_PART('month', i.InvoiceDate) = 9 THEN 'September'
WHEN DATE_PART('month', i.InvoiceDate) = 10 THEN 'October'
WHEN DATE_PART('month', i.InvoiceDate) = 11 THEN 'November'
WHEN DATE_PART('month', i.InvoiceDate) = 12 THEN 'December'
END AS MonthString
FROM Invoice_staging i;
CREATE TABLE Track_Dim AS
SELECT
t.TrackId,
t.Name,
t.Composer,
t.Milliseconds,
t.Bytes,
t.UnitPrice
FROM Track_staging t;
CREATE TABLE Customer_Dim AS
SELECT
c.CustomerId,
c.FirstName,
c.LastName,
c.Company,
c.Address,
c.City,
c.State,
c.Country,
c.PostalCode,
c.Phone,
c.Fax,
c.Email,
c.SupportRepId
FROM Customer_staging c;
CREATE TABLE Employee_Dim AS
SELECT
e.EmployeeId,
e.FirstName,
e.LastName,
e.Title,
e.BirthDate,
e.HireDate,
e.Address,
e.City,
e.State,
e.Country,
e.Phone,
e.Email
FROM Employee_staging e;
CREATE TABLE Album_Dim AS
SELECT
a.AlbumId,
a.Title
FROM Album_staging a;
CREATE TABLE Artist_Dim AS
SELECT
ar.ArtistId,
ar.Name
FROM Artist_staging ar;
CREATE TABLE Genre_Dim AS
SELECT
g.GenreId,
g.Name
FROM Genre_staging g;
CREATE TABLE MediaType_Dim AS
SELECT
m.MediaTypeId,
m.Name
FROM MediaType_staging m;
CREATE TABLE Playlist_Dim AS
SELECT
p.PlaylistId,
p.Name
FROM Playlist_staging p;
CREATE TABLE Invoice_Dim AS
SELECT
i.InvoiceId,
i.CustomerId,
i.InvoiceDate,
i.BillingAddress,
i.BillingCity,
i.BillingState,
i.BillingCountry,
i.BillingPostalCode,
i.Total
FROM Invoice_staging i;
CREATE OR REPLACE TABLE Sales_Fact AS
SELECT
il.InvoiceLineId AS Sales_FactId,
il.UnitPrice AS UnitPrice,
il.Quantity AS Quantity,
di.InvoiceId AS InvoiceId,
dt.TrackId AS TrackId,
dd.DateId AS DateId,
de.EmployeeId AS EmployeeId,
dg.GenreId AS GenreId,
dp.PlaylistId AS PlaylistId,
da.ArtistId AS ArtistId,
a.AlbumId AS AlbumId,
inv.InvoiceId AS InvoiceDimId
FROM InvoiceLine_staging il
JOIN Track_Dim dt ON il.TrackId = dt.TrackId
JOIN Invoice_staging di ON il.InvoiceId = di.InvoiceId
JOIN Date_Dim dd ON CAST(di.InvoiceDate AS DATE) = dd.Date
JOIN Customer_Dim dc ON di.CustomerId = dc.CustomerId
LEFT JOIN Employee_Dim de ON dc.SupportRepId = de.EmployeeId
LEFT JOIN Track_staging tr ON il.TrackId = tr.TrackId
LEFT JOIN PlaylistTrack_staging pt ON il.TrackId = pt.TrackId
LEFT JOIN Genre_Dim dg ON tr.GenreId = dg.GenreId
LEFT JOIN Playlist_Dim dp ON pt.PlaylistId = dp.PlaylistId
LEFT JOIN Album_staging al ON tr.AlbumId = al.AlbumId
LEFT JOIN Artist_staging ar ON al.ArtistId = ar.ArtistId
LEFT JOIN Artist_Dim da ON ar.ArtistId = da.ArtistId
LEFT JOIN Album_Dim a ON al.AlbumId = a.AlbumId
JOIN MediaType_Dim dm ON tr.MediaTypeId = dm.MediaTypeId
JOIN Invoice_Dim inv ON di.InvoiceId = inv.InvoiceId;
DROP TABLE IF EXISTS InvoiceLine_staging;
DROP TABLE IF EXISTS Invoice_staging;
DROP TABLE IF EXISTS Customer_staging;
DROP TABLE IF EXISTS Employee_staging;
DROP TABLE IF EXISTS PlaylistTrack_staging;
DROP TABLE IF EXISTS Playlist_staging;
DROP TABLE IF EXISTS Track_staging;
DROP TABLE IF EXISTS Genre_staging;
DROP TABLE IF EXISTS MediaType_staging;
DROP TABLE IF EXISTS Album_staging;
DROP TABLE IF EXISTS Artist_staging;