diff --git a/hugo/content/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab.md b/hugo/content/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab.md
new file mode 100644
index 0000000..0d0a71d
--- /dev/null
+++ b/hugo/content/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab.md
@@ -0,0 +1,259 @@
+---
+date: 2024-11-16T17:52:23+01:00
+draft: false 
+author: Gabriel LOPEZ
+title: (DataExpert.io) Bootcamp - Day 1 - Lab
+---
+
+> **Goal :** Create a *cumulative table design*
+
+## Problem overview 
+We have a table containing the stats for the NBA players, there's one record for each player's season. 
+
+```
+postgres=# \d player_seasons;
+```
+
+Table `public.player_seasons`
+
+| Column       | Type    | 
+| ------------ | ------- | 
+| player_name  | text    | 
+| age          | integer | 
+| height       | text    | 
+| weight       | integer | 
+| college      | text    | 
+| country      | text    |
+| draft_year   | text    |
+| draft_round  | text    |
+| draft_number | text    |
+| gp           | real    |
+| pts          | real    |
+| reb          | real    |
+| ast          | real    | 
+| netrtg       | real    |  
+| oreb_pct     | real    |   
+| dreb_pct     | real    |    
+| usg_pct      | real    |     
+| ts_pct       | real    |      
+| ast_pct      | real    |       
+| season       | integer |        
+
+**Indexes:**
+`"player_seasons_pkey" PRIMARY KEY, btree (player_name, season)`
+
+We have a temporal data problem with the table where joining the table with another would cause shuffling of the players records (same player statics won't be following each other) making **run-length encoding** compression less efficient
+## Run-Length Encoding
+
+> **Run-Length Encoding** is a simple data compression algorithm that encodes consecutive repeated data elements (runs) as a single value plus a count of its repetitions. 
+ 
+Instead of storing the repeated data multiple times, it stores the data value and the number of times it appears consecutively.
+
+We are going to transform the table to have one row per player with a column of arrays of the player seasons. 
+
+## Cumulative table design
+
+The cumulative design serves two distinct but complementary purposes:
+
+1. **Join/GroupBy Optimization**:
+    - By storing temporal data (seasons) together in arrays, we optimize for:
+        - Fewer rows to join
+        - Less data shuffling during grouping
+        - Better data locality
+
+2. **RLE Compression**: When we later explode/unnest the arrays, the data will naturally group temporal values together, making RLE more efficient.
+ 
+
+## What things are part of a season and what things aren't ?
+We want to store the temporal component in it's own data type. 
+
+We create a `STRUCT` named `season_stats` with Postges :
+```sql
+CREATE TYPE season_stats AS (  
+    season INTEGER,  
+    gp INTEGER,  
+    pts REAL,  
+    reb REAL,  
+    ast REAL  
+)
+```
+We don't take all the season statistics in this struct as we won't need all of them.
+
+## Creating the cumulative table
+Then we create the cumulative table schema using our new `STRUCT` :
+```sql
+CREATE TABLE players (  
+    player_name TEXT,  
+    height TEXT,  
+    college TEXT,  
+    country TEXT,  
+	draft_year TEXT,
+    draft_round TEXT,  
+    season_stats season_stats[],  
+    current_season INTEGER,  
+    PRIMARY KEY(player_name, current_season)  
+)
+```
+
+We want to figure out what is the first year in the table is :
+```sql
+SELECT MIN(season) FROM player_seasons;
+```
+
+It is `1996`
+
+```sql
+WITH yesterday AS (  
+    SELECT * FROM players  
+    WHERE current_seasons = 1995  
+),  
+today AS (  
+    SELECT * FROM player_seasons  
+    WHERE season = 1996  
+)  
+  
+SELECT * FROM today t FULL OUTER JOIN yesterday y  
+    ON t.player_name = y.player_name
+```
+
+The request give us `<null>` values for the left side of the join `1995` (`yesterday`) as it doesn't exists.
+
+Now we want to `COALESCE()` the non temporal values. 
+
+```sql
+WITH yesterday AS (  
+    SELECT * FROM players  
+    WHERE current_seasons = 1995  
+),  
+today AS (  
+    SELECT * FROM player_seasons  
+    WHERE season = 1996  
+)  
+SELECT 
+	COALESCE(t.player_name, y.player_name) AS player_name,
+	COALESCE(t.height, y.height) AS height,
+	COALESCE(t.college, y.college) AS college,
+	COALESCE(t.country, y.country) AS country,
+	COALESCE(t.draft_year, y.draft_year) AS draft_year,
+	COALESCE(t.draft_round, y.draft_round) AS draft_round,
+	COALESCE(t.draft_number, y.draft_number) AS draft_number,
+FROM today t FULL OUTER JOIN yesterday y  
+    ON t.player_name = y.player_name
+```
+
+ **Purpose of COALESCE here**:
+   - Handling data continuity between two time periods
+   - It ensures we keep the non-temporal data when a player exists in either period
+ 
+**What the query actually does**:
+For each player: 
+- If player exists in 'today' (1996): use today's data 
+- If player only exists in 'yesterday' (1995): use yesterday's data 
+- If player exists in both: use today's data (through COALESCE taking first non-NULL)`
+
+```sql
+SELECT * FROM player_seasons;  
+  
+DROP TABLE IF EXISTS players ;  
+  
+CREATE TYPE scoring_class AS ENUM('star', 'good', 'average', 'bad');  
+  
+CREATE TABLE players (  
+    player_name TEXT,  
+    height TEXT,  
+    college TEXT,  
+    country TEXT,  
+    draft_year TEXT,  
+    draft_round TEXT,  
+    draft_number TEXT,  
+    season_stats season_stats[],  
+    scoring_class scoring_class,  
+    years_since_last_season INTEGER,  
+    current_season INTEGER,  
+    PRIMARY KEY(player_name, current_season)  
+);  
+  
+-- This is the SEED query for cumulation because year 1995 is going to be <null>,  
+-- the FULL OUTER JOIN is just taking everything from today as yesterday doesn't exist.  
+INSERT INTO players  
+WITH yesterday AS (  
+    SELECT * FROM players  
+    WHERE current_season  = 2000  
+),  
+today AS (  
+    SELECT * FROM player_seasons  
+    WHERE season = 2001  
+)  
+  
+SELECT  
+    COALESCE(t.player_name, y.player_name) AS player_name,  
+    COALESCE(t.height, y.height) AS height,  
+    COALESCE(t.college, y.college) AS college,  
+    COALESCE(t.country, y.country) AS country,  
+    COALESCE(t.draft_year, y.draft_year) AS draft_year,  
+    COALESCE(t.draft_round, y.draft_round) AS draft_round,  
+    COALESCE(t.draft_number, y.draft_number) AS draft_number,  
+    -- If yesterday is null we create the initial array  
+    CASE WHEN y.season_stats IS NULL THEN  
+        ARRAY[ROW(  
+            t.season,  
+            t.gp,  
+            t.pts,  
+            t.reb,  
+            t.ast  
+        )::season_stats]  
+    -- If today is not null we create the new value by concatenating the array of previous values  
+    -- with today's ones.    
+    -- We don't want to keep adding values to the season_stats array if the player is retired 
+    WHEN t.season IS NOT NULL THEN  
+        y.season_stats || ARRAY[ROW(  
+            t.season,  
+            t.gp,  
+            t.pts,  
+            t.reb,  
+            t.ast  
+        )::season_stats]  
+    -- Otherwise we carry the history forward without modifying it.  
+    ELSE y.season_stats  
+    END AS season_stats,  
+    -- Determine the scoring class of the player for current season  
+    CASE  
+        WHEN t.season IS NOT NULL THEN  
+        CASE WHEN t.pts > 20 THEN 'star'  
+            WHEN t.pts >  15 THEN 'good'  
+            WHEN t.pts > 10 THEN 'average'  
+            ELSE 'bad'  
+        END::scoring_class  
+        ELSE y.scoring_class  
+    END as scoring_class,  
+    CASE WHEN t.season IS NOT NULL THEN 0  
+        ELSE y.years_since_last_season + 1  
+    END as years_since_last_season,  
+    COALESCE(t.season, y.current_season + 1) as current_season  
+FROM today t FULL OUTER JOIN yesterday y  
+    ON t.player_name = y.player_name;  
+  
+-- No GROUP BY = very fast, everything happens in the map step it can be parrallelized
+SELECT  
+    player_name,  
+    (season_stats[CARDINALITY(season_stats)]::season_stats).pts /  
+    CASE WHEN (season_stats[1]::season_stats).pts = 0 THEN 1 ELSE ((season_stats[1]::season_stats).pts) END  
+FROM players  
+WHERE current_season = 2001  
+AND scoring_class = 'star';  
+  
+-- Going back the original table  
+WITH unnested AS (  
+    SELECT player_name,  
+        UNNEST(season_stats) AS season_stats  
+    FROM players  
+    WHERE current_season = 2001  
+)  
+  
+SELECT player_name,  
+       (season_stats::season_stats).*  
+FROM unnested;  
+  
+-- Here we keep player stats (temporal attributes) sorted through the JOIN
+-- We can apply RLE compression efficiently
+```
diff --git a/hugo/hugo.toml b/hugo/hugo.toml
index 4d40dbd..b0b260e 100644
--- a/hugo/hugo.toml
+++ b/hugo/hugo.toml
@@ -1,3 +1,5 @@
 baseURL="https://glopez.github.io/blog/"
 title="Gabriel Study Blog"
 theme="archie"
+
+enableEmoji = true
diff --git a/hugo/public/index.html b/hugo/public/index.html
index ea67a77..4e34c9a 100644
--- a/hugo/public/index.html
+++ b/hugo/public/index.html
@@ -93,6 +93,115 @@ <h2 id="idempotency">Idempotency</h2>
 					<a class="readmore" href="/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day2/lecture/">Read more ⟶</a>
 				</section>
 				
+				<section class="list-item">
+					<h1 class="title"><a href="/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab/">(DataExpert.io) Bootcamp - Day 1 - Lab</a></h1>
+					<time>Nov 16, 2024</time>
+					<br><div class="description">
+	
+	<blockquote>
+<p><strong>Goal :</strong> Create a <em>cumulative table design</em></p>
+</blockquote>
+<h2 id="problem-overview">Problem overview</h2>
+<p>We have a table containing the stats for the NBA players, there&rsquo;s one record for each player&rsquo;s season.</p>
+<pre tabindex="0"><code>postgres=# \d player_seasons;
+</code></pre><p>Table <code>public.player_seasons</code></p>
+<table>
+  <thead>
+      <tr>
+          <th>Column</th>
+          <th>Type</th>
+      </tr>
+  </thead>
+  <tbody>
+      <tr>
+          <td>player_name</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>age</td>
+          <td>integer</td>
+      </tr>
+      <tr>
+          <td>height</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>weight</td>
+          <td>integer</td>
+      </tr>
+      <tr>
+          <td>college</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>country</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>draft_year</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>draft_round</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>draft_number</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>gp</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>pts</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>reb</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>ast</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>netrtg</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>oreb_pct</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>dreb_pct</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>usg_pct</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>ts_pct</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>ast_pct</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>season</td>
+          <td>integer</td>
+      </tr>
+  </tbody>
+</table>
+<p><strong>Indexes:</strong>
+<code>&quot;player_seasons_pkey&quot; PRIMARY KEY, btree (player_name, season)</code></p>&hellip;
+	
+</div>
+					<a class="readmore" href="/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab/">Read more ⟶</a>
+				</section>
+				
 				<section class="list-item">
 					<h1 class="title"><a href="/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lecture/">(DataExpert.io) Bootcamp - Day 1 - Lecture</a></h1>
 					<time>Nov 15, 2024</time>
diff --git a/hugo/public/index.xml b/hugo/public/index.xml
index 6cc2f11..9e509e4 100644
--- a/hugo/public/index.xml
+++ b/hugo/public/index.xml
@@ -22,6 +22,13 @@
       <guid>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day2/lecture/</guid>
       <description>&lt;p&gt;Today&amp;rsquo;s lecture deals with &lt;strong&gt;Slowly Changing Dimensions&lt;/strong&gt; and &lt;strong&gt;Idempotency&lt;/strong&gt;.&lt;/p&gt;&#xA;&lt;blockquote&gt;&#xA;&lt;p&gt;&lt;strong&gt;Slowly changing dimensions&lt;/strong&gt; = An attribute that drifts over time&lt;/p&gt;&#xA;&lt;/blockquote&gt;&#xA;&lt;p&gt;&lt;em&gt;Example:&lt;/em&gt; Your favorite food&lt;/p&gt;&#xA;&lt;h2 id=&#34;idempotency&#34;&gt;Idempotency&lt;/h2&gt;&#xA;&lt;p&gt;You need to model slowly dimensions the right way because they impact idempotency.&lt;/p&gt;&#xA;&lt;blockquote&gt;&#xA;&lt;p&gt;&lt;strong&gt;Idempotent&lt;/strong&gt; = Denoting an element of a set which is unchanged in value when multiplied or otherwise operated on by itself.&lt;/p&gt;&#xA;&lt;/blockquote&gt;&#xA;&lt;blockquote&gt;&#xA;&lt;p&gt;&lt;strong&gt;Idempotent pipeline&lt;/strong&gt; = The ability for your data pipeline to produce the same results whether it&amp;rsquo;s running in production or in backfill.&lt;/p&gt;</description>
     </item>
+    <item>
+      <title>(DataExpert.io) Bootcamp - Day 1 - Lab</title>
+      <link>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab/</link>
+      <pubDate>Sat, 16 Nov 2024 17:52:23 +0100</pubDate>
+      <guid>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab/</guid>
+      <description>&lt;blockquote&gt;&#xA;&lt;p&gt;&lt;strong&gt;Goal :&lt;/strong&gt; Create a &lt;em&gt;cumulative table design&lt;/em&gt;&lt;/p&gt;&#xA;&lt;/blockquote&gt;&#xA;&lt;h2 id=&#34;problem-overview&#34;&gt;Problem overview&lt;/h2&gt;&#xA;&lt;p&gt;We have a table containing the stats for the NBA players, there&amp;rsquo;s one record for each player&amp;rsquo;s season.&lt;/p&gt;&#xA;&lt;pre tabindex=&#34;0&#34;&gt;&lt;code&gt;postgres=# \d player_seasons;&#xA;&lt;/code&gt;&lt;/pre&gt;&lt;p&gt;Table &lt;code&gt;public.player_seasons&lt;/code&gt;&lt;/p&gt;&#xA;&lt;table&gt;&#xA;  &lt;thead&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;th&gt;Column&lt;/th&gt;&#xA;          &lt;th&gt;Type&lt;/th&gt;&#xA;      &lt;/tr&gt;&#xA;  &lt;/thead&gt;&#xA;  &lt;tbody&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;player_name&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;age&lt;/td&gt;&#xA;          &lt;td&gt;integer&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;height&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;weight&lt;/td&gt;&#xA;          &lt;td&gt;integer&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;college&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;country&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;draft_year&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;draft_round&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;draft_number&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;gp&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;pts&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;reb&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;ast&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;netrtg&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;oreb_pct&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;dreb_pct&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;usg_pct&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;ts_pct&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;ast_pct&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;season&lt;/td&gt;&#xA;          &lt;td&gt;integer&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;  &lt;/tbody&gt;&#xA;&lt;/table&gt;&#xA;&lt;p&gt;&lt;strong&gt;Indexes:&lt;/strong&gt;&#xA;&lt;code&gt;&amp;quot;player_seasons_pkey&amp;quot; PRIMARY KEY, btree (player_name, season)&lt;/code&gt;&lt;/p&gt;</description>
+    </item>
     <item>
       <title>(DataExpert.io) Bootcamp - Day 1 - Lecture</title>
       <link>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lecture/</link>
diff --git a/hugo/public/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/RLE.svg b/hugo/public/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/RLE.svg
new file mode 100644
index 0000000..824c092
--- /dev/null
+++ b/hugo/public/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/RLE.svg
@@ -0,0 +1,29 @@
+<svg viewBox="0 0 800 200" xmlns="http://www.w3.org/2000/svg">
+    <!-- Original Data -->
+    <rect x="50" y="20" width="40" height="40" fill="#ff6b6b"/>
+    <rect x="90" y="20" width="40" height="40" fill="#ff6b6b"/>
+    <rect x="130" y="20" width="40" height="40" fill="#ff6b6b"/>
+    <rect x="170" y="20" width="40" height="40" fill="#4ecdc4"/>
+    <rect x="210" y="20" width="40" height="40" fill="#4ecdc4"/>
+    <rect x="250" y="20" width="40" height="40" fill="#45b7af"/>
+    
+    <!-- Arrow -->
+    <path d="M 150 80 L 150 120" stroke="black" stroke-width="2" marker-end="url(#arrow)"/>
+    <defs>
+        <marker id="arrow" viewBox="0 0 10 10" refX="9" refY="5"
+            markerWidth="6" markerHeight="6" orient="auto">
+            <path d="M 0 0 L 10 5 L 0 10 z"/>
+        </marker>
+    </defs>
+    
+    <!-- Encoded Data -->
+    <rect x="50" y="140" width="80" height="40" fill="#ff6b6b"/>
+    <text x="90" y="165" text-anchor="middle" fill="white" font-family="Arial">3x</text>
+    
+    <rect x="170" y="140" width="60" height="40" fill="#4ecdc4"/>
+    <text x="200" y="165" text-anchor="middle" fill="white" font-family="Arial">2x</text>
+    
+    <rect x="250" y="140" width="40" height="40" fill="#45b7af"/>
+    <text x="270" y="165" text-anchor="middle" fill="white" font-family="Arial">1x</text>
+</svg>
+
diff --git a/hugo/public/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab/index.html b/hugo/public/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab/index.html
new file mode 100644
index 0000000..e018c76
--- /dev/null
+++ b/hugo/public/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab/index.html
@@ -0,0 +1,480 @@
+<!DOCTYPE html>
+<html><head lang="en"><script src="/blog/livereload.js?mindelay=10&amp;v=2&amp;port=1313&amp;path=blog/livereload" data-no-instant defer></script>
+	<meta charset="utf-8" />
+	<meta http-equiv="X-UA-Compatible" content="IE=edge"><title>(DataExpert.io) Bootcamp - Day 1 - Lab - Gabriel Study Blog</title><meta name="viewport" content="width=device-width, initial-scale=1">
+	<meta name="description" content="
+Goal : Create a cumulative table design
+
+Problem overview
+We have a table containing the stats for the NBA players, there&rsquo;s one record for each player&rsquo;s season.
+postgres=# \d player_seasons;
+Table public.player_seasons
+
+  
+      
+          Column
+          Type
+      
+  
+  
+      
+          player_name
+          text
+      
+      
+          age
+          integer
+      
+      
+          height
+          text
+      
+      
+          weight
+          integer
+      
+      
+          college
+          text
+      
+      
+          country
+          text
+      
+      
+          draft_year
+          text
+      
+      
+          draft_round
+          text
+      
+      
+          draft_number
+          text
+      
+      
+          gp
+          real
+      
+      
+          pts
+          real
+      
+      
+          reb
+          real
+      
+      
+          ast
+          real
+      
+      
+          netrtg
+          real
+      
+      
+          oreb_pct
+          real
+      
+      
+          dreb_pct
+          real
+      
+      
+          usg_pct
+          real
+      
+      
+          ts_pct
+          real
+      
+      
+          ast_pct
+          real
+      
+      
+          season
+          integer
+      
+  
+
+Indexes:
+&quot;player_seasons_pkey&quot; PRIMARY KEY, btree (player_name, season)" />
+	<meta property="og:image" content=""/>
+	<meta property="og:url" content="http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab/">
+  <meta property="og:site_name" content="Gabriel Study Blog">
+  <meta property="og:title" content="(DataExpert.io) Bootcamp - Day 1 - Lab">
+  <meta property="og:description" content="Goal : Create a cumulative table design
+Problem overview We have a table containing the stats for the NBA players, there’s one record for each player’s season.
+postgres=# \d player_seasons; Table public.player_seasons
+Column Type player_name text age integer height text weight integer college text country text draft_year text draft_round text draft_number text gp real pts real reb real ast real netrtg real oreb_pct real dreb_pct real usg_pct real ts_pct real ast_pct real season integer Indexes: &#34;player_seasons_pkey&#34; PRIMARY KEY, btree (player_name, season)">
+  <meta property="og:locale" content="en">
+  <meta property="og:type" content="article">
+    <meta property="article:section" content="posts">
+    <meta property="article:published_time" content="2024-11-16T17:52:23+01:00">
+    <meta property="article:modified_time" content="2024-11-16T17:52:23+01:00">
+
+  <meta name="twitter:card" content="summary">
+  <meta name="twitter:title" content="(DataExpert.io) Bootcamp - Day 1 - Lab">
+  <meta name="twitter:description" content="Goal : Create a cumulative table design
+Problem overview We have a table containing the stats for the NBA players, there’s one record for each player’s season.
+postgres=# \d player_seasons; Table public.player_seasons
+Column Type player_name text age integer height text weight integer college text country text draft_year text draft_round text draft_number text gp real pts real reb real ast real netrtg real oreb_pct real dreb_pct real usg_pct real ts_pct real ast_pct real season integer Indexes: &#34;player_seasons_pkey&#34; PRIMARY KEY, btree (player_name, season)">
+
+	
+        <link href="http://localhost:1313/blog/css/fonts.2c2227b81b1970a03e760aa2e6121cd01f87c88586803cbb282aa224720a765f.css" rel="stylesheet">
+	
+
+	
+	<link rel="stylesheet" type="text/css" media="screen" href="http://localhost:1313/blog/css/main.5cebd7d4fb2b97856af8d32a6def16164fcf7d844e98e236fcb3559655020373.css" />
+
+	
+	
+
+	
+	
+
+	
+</head>
+<body>
+        <div class="content"><header>
+	<div class="main">
+		<a href="http://localhost:1313/">Gabriel Study Blog</a>
+	</div>
+	<nav>
+		
+		
+	</nav>
+</header>
+
+<main>
+	<article>
+		<div class="title">
+			<h1 class="title">(DataExpert.io) Bootcamp - Day 1 - Lab</h1>
+			<div class="meta">Posted on Nov 16, 2024</div>
+		</div>
+		
+
+		
+
+		<section class="body">
+			<blockquote>
+<p><strong>Goal :</strong> Create a <em>cumulative table design</em></p>
+</blockquote>
+<h2 id="problem-overview">Problem overview</h2>
+<p>We have a table containing the stats for the NBA players, there&rsquo;s one record for each player&rsquo;s season.</p>
+<pre tabindex="0"><code>postgres=# \d player_seasons;
+</code></pre><p>Table <code>public.player_seasons</code></p>
+<table>
+  <thead>
+      <tr>
+          <th>Column</th>
+          <th>Type</th>
+      </tr>
+  </thead>
+  <tbody>
+      <tr>
+          <td>player_name</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>age</td>
+          <td>integer</td>
+      </tr>
+      <tr>
+          <td>height</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>weight</td>
+          <td>integer</td>
+      </tr>
+      <tr>
+          <td>college</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>country</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>draft_year</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>draft_round</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>draft_number</td>
+          <td>text</td>
+      </tr>
+      <tr>
+          <td>gp</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>pts</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>reb</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>ast</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>netrtg</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>oreb_pct</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>dreb_pct</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>usg_pct</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>ts_pct</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>ast_pct</td>
+          <td>real</td>
+      </tr>
+      <tr>
+          <td>season</td>
+          <td>integer</td>
+      </tr>
+  </tbody>
+</table>
+<p><strong>Indexes:</strong>
+<code>&quot;player_seasons_pkey&quot; PRIMARY KEY, btree (player_name, season)</code></p>
+<p>We have a temporal data problem with the table where joining the table with another would cause shuffling of the players records (same player statics won&rsquo;t be following each other) making <strong>run-length encoding</strong> compression less efficient</p>
+<h2 id="run-length-encoding">Run-Length Encoding</h2>
+<blockquote>
+<p><strong>Run-Length Encoding</strong> is a simple data compression algorithm that encodes consecutive repeated data elements (runs) as a single value plus a count of its repetitions.</p>
+</blockquote>
+<p>Instead of storing the repeated data multiple times, it stores the data value and the number of times it appears consecutively.</p>
+<p>We are going to transform the table to have one row per player with a column of arrays of the player seasons.</p>
+<h2 id="cumulative-table-design">Cumulative table design</h2>
+<p>The cumulative design serves two distinct but complementary purposes:</p>
+<ol>
+<li>
+<p><strong>Join/GroupBy Optimization</strong>:</p>
+<ul>
+<li>By storing temporal data (seasons) together in arrays, we optimize for:
+<ul>
+<li>Fewer rows to join</li>
+<li>Less data shuffling during grouping</li>
+<li>Better data locality</li>
+</ul>
+</li>
+</ul>
+</li>
+<li>
+<p><strong>RLE Compression</strong>: When we later explode/unnest the arrays, the data will naturally group temporal values together, making RLE more efficient.</p>
+</li>
+</ol>
+<h2 id="what-things-are-part-of-a-season-and-what-things-arent-">What things are part of a season and what things aren&rsquo;t ?</h2>
+<p>We want to store the temporal component in it&rsquo;s own data type.</p>
+<p>We create a <code>STRUCT</code> named <code>season_stats</code> with Postges :</p>
+<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-sql" data-lang="sql"><span style="display:flex;"><span><span style="color:#66d9ef">CREATE</span> <span style="color:#66d9ef">TYPE</span> season_stats <span style="color:#66d9ef">AS</span> (  
+</span></span><span style="display:flex;"><span>    season INTEGER,  
+</span></span><span style="display:flex;"><span>    gp INTEGER,  
+</span></span><span style="display:flex;"><span>    pts REAL,  
+</span></span><span style="display:flex;"><span>    reb REAL,  
+</span></span><span style="display:flex;"><span>    ast REAL  
+</span></span><span style="display:flex;"><span>)
+</span></span></code></pre></div><p>We don&rsquo;t take all the season statistics in this struct as we won&rsquo;t need all of them.</p>
+<h2 id="creating-the-cumulative-table">Creating the cumulative table</h2>
+<p>Then we create the cumulative table schema using our new <code>STRUCT</code> :</p>
+<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-sql" data-lang="sql"><span style="display:flex;"><span><span style="color:#66d9ef">CREATE</span> <span style="color:#66d9ef">TABLE</span> players (  
+</span></span><span style="display:flex;"><span>    player_name TEXT,  
+</span></span><span style="display:flex;"><span>    height TEXT,  
+</span></span><span style="display:flex;"><span>    college TEXT,  
+</span></span><span style="display:flex;"><span>    country TEXT,  
+</span></span><span style="display:flex;"><span>	draft_year TEXT,
+</span></span><span style="display:flex;"><span>    draft_round TEXT,  
+</span></span><span style="display:flex;"><span>    season_stats season_stats[],  
+</span></span><span style="display:flex;"><span>    current_season INTEGER,  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">PRIMARY</span> <span style="color:#66d9ef">KEY</span>(player_name, current_season)  
+</span></span><span style="display:flex;"><span>)
+</span></span></code></pre></div><p>We want to figure out what is the first year in the table is :</p>
+<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-sql" data-lang="sql"><span style="display:flex;"><span><span style="color:#66d9ef">SELECT</span> <span style="color:#66d9ef">MIN</span>(season) <span style="color:#66d9ef">FROM</span> player_seasons;
+</span></span></code></pre></div><p>It is <code>1996</code></p>
+<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-sql" data-lang="sql"><span style="display:flex;"><span><span style="color:#66d9ef">WITH</span> yesterday <span style="color:#66d9ef">AS</span> (  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">SELECT</span> <span style="color:#f92672">*</span> <span style="color:#66d9ef">FROM</span> players  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">WHERE</span> current_seasons <span style="color:#f92672">=</span> <span style="color:#ae81ff">1995</span>  
+</span></span><span style="display:flex;"><span>),  
+</span></span><span style="display:flex;"><span>today <span style="color:#66d9ef">AS</span> (  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">SELECT</span> <span style="color:#f92672">*</span> <span style="color:#66d9ef">FROM</span> player_seasons  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">WHERE</span> season <span style="color:#f92672">=</span> <span style="color:#ae81ff">1996</span>  
+</span></span><span style="display:flex;"><span>)  
+</span></span><span style="display:flex;"><span>  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">SELECT</span> <span style="color:#f92672">*</span> <span style="color:#66d9ef">FROM</span> today t <span style="color:#66d9ef">FULL</span> <span style="color:#66d9ef">OUTER</span> <span style="color:#66d9ef">JOIN</span> yesterday y  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">ON</span> t.player_name <span style="color:#f92672">=</span> y.player_name
+</span></span></code></pre></div><p>The request give us <code>&lt;null&gt;</code> values for the left side of the join <code>1995</code> (<code>yesterday</code>) as it doesn&rsquo;t exists.</p>
+<p>Now we want to <code>COALESCE()</code> the non temporal values.</p>
+<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-sql" data-lang="sql"><span style="display:flex;"><span><span style="color:#66d9ef">WITH</span> yesterday <span style="color:#66d9ef">AS</span> (  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">SELECT</span> <span style="color:#f92672">*</span> <span style="color:#66d9ef">FROM</span> players  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">WHERE</span> current_seasons <span style="color:#f92672">=</span> <span style="color:#ae81ff">1995</span>  
+</span></span><span style="display:flex;"><span>),  
+</span></span><span style="display:flex;"><span>today <span style="color:#66d9ef">AS</span> (  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">SELECT</span> <span style="color:#f92672">*</span> <span style="color:#66d9ef">FROM</span> player_seasons  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">WHERE</span> season <span style="color:#f92672">=</span> <span style="color:#ae81ff">1996</span>  
+</span></span><span style="display:flex;"><span>)  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">SELECT</span> 
+</span></span><span style="display:flex;"><span>	COALESCE(t.player_name, y.player_name) <span style="color:#66d9ef">AS</span> player_name,
+</span></span><span style="display:flex;"><span>	COALESCE(t.height, y.height) <span style="color:#66d9ef">AS</span> height,
+</span></span><span style="display:flex;"><span>	COALESCE(t.college, y.college) <span style="color:#66d9ef">AS</span> college,
+</span></span><span style="display:flex;"><span>	COALESCE(t.country, y.country) <span style="color:#66d9ef">AS</span> country,
+</span></span><span style="display:flex;"><span>	COALESCE(t.draft_year, y.draft_year) <span style="color:#66d9ef">AS</span> draft_year,
+</span></span><span style="display:flex;"><span>	COALESCE(t.draft_round, y.draft_round) <span style="color:#66d9ef">AS</span> draft_round,
+</span></span><span style="display:flex;"><span>	COALESCE(t.draft_number, y.draft_number) <span style="color:#66d9ef">AS</span> draft_number,
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">FROM</span> today t <span style="color:#66d9ef">FULL</span> <span style="color:#66d9ef">OUTER</span> <span style="color:#66d9ef">JOIN</span> yesterday y  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">ON</span> t.player_name <span style="color:#f92672">=</span> y.player_name
+</span></span></code></pre></div><p><strong>Purpose of COALESCE here</strong>:</p>
+<ul>
+<li>Handling data continuity between two time periods</li>
+<li>It ensures we keep the non-temporal data when a player exists in either period</li>
+</ul>
+<p><strong>What the query actually does</strong>:
+For each player:</p>
+<ul>
+<li>If player exists in &rsquo;today&rsquo; (1996): use today&rsquo;s data</li>
+<li>If player only exists in &lsquo;yesterday&rsquo; (1995): use yesterday&rsquo;s data</li>
+<li>If player exists in both: use today&rsquo;s data (through COALESCE taking first non-NULL)`</li>
+</ul>
+<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-sql" data-lang="sql"><span style="display:flex;"><span><span style="color:#66d9ef">SELECT</span> <span style="color:#f92672">*</span> <span style="color:#66d9ef">FROM</span> player_seasons;  
+</span></span><span style="display:flex;"><span>  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">DROP</span> <span style="color:#66d9ef">TABLE</span> <span style="color:#66d9ef">IF</span> <span style="color:#66d9ef">EXISTS</span> players ;  
+</span></span><span style="display:flex;"><span>  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">CREATE</span> <span style="color:#66d9ef">TYPE</span> scoring_class <span style="color:#66d9ef">AS</span> ENUM(<span style="color:#e6db74">&#39;star&#39;</span>, <span style="color:#e6db74">&#39;good&#39;</span>, <span style="color:#e6db74">&#39;average&#39;</span>, <span style="color:#e6db74">&#39;bad&#39;</span>);  
+</span></span><span style="display:flex;"><span>  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">CREATE</span> <span style="color:#66d9ef">TABLE</span> players (  
+</span></span><span style="display:flex;"><span>    player_name TEXT,  
+</span></span><span style="display:flex;"><span>    height TEXT,  
+</span></span><span style="display:flex;"><span>    college TEXT,  
+</span></span><span style="display:flex;"><span>    country TEXT,  
+</span></span><span style="display:flex;"><span>    draft_year TEXT,  
+</span></span><span style="display:flex;"><span>    draft_round TEXT,  
+</span></span><span style="display:flex;"><span>    draft_number TEXT,  
+</span></span><span style="display:flex;"><span>    season_stats season_stats[],  
+</span></span><span style="display:flex;"><span>    scoring_class scoring_class,  
+</span></span><span style="display:flex;"><span>    years_since_last_season INTEGER,  
+</span></span><span style="display:flex;"><span>    current_season INTEGER,  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">PRIMARY</span> <span style="color:#66d9ef">KEY</span>(player_name, current_season)  
+</span></span><span style="display:flex;"><span>);  
+</span></span><span style="display:flex;"><span>  
+</span></span><span style="display:flex;"><span><span style="color:#75715e">-- This is the SEED query for cumulation because year 1995 is going to be &lt;null&gt;,  
+</span></span></span><span style="display:flex;"><span><span style="color:#75715e">-- the FULL OUTER JOIN is just taking everything from today as yesterday doesn&#39;t exist.  
+</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#66d9ef">INSERT</span> <span style="color:#66d9ef">INTO</span> players  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">WITH</span> yesterday <span style="color:#66d9ef">AS</span> (  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">SELECT</span> <span style="color:#f92672">*</span> <span style="color:#66d9ef">FROM</span> players  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">WHERE</span> current_season  <span style="color:#f92672">=</span> <span style="color:#ae81ff">2000</span>  
+</span></span><span style="display:flex;"><span>),  
+</span></span><span style="display:flex;"><span>today <span style="color:#66d9ef">AS</span> (  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">SELECT</span> <span style="color:#f92672">*</span> <span style="color:#66d9ef">FROM</span> player_seasons  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">WHERE</span> season <span style="color:#f92672">=</span> <span style="color:#ae81ff">2001</span>  
+</span></span><span style="display:flex;"><span>)  
+</span></span><span style="display:flex;"><span>  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">SELECT</span>  
+</span></span><span style="display:flex;"><span>    COALESCE(t.player_name, y.player_name) <span style="color:#66d9ef">AS</span> player_name,  
+</span></span><span style="display:flex;"><span>    COALESCE(t.height, y.height) <span style="color:#66d9ef">AS</span> height,  
+</span></span><span style="display:flex;"><span>    COALESCE(t.college, y.college) <span style="color:#66d9ef">AS</span> college,  
+</span></span><span style="display:flex;"><span>    COALESCE(t.country, y.country) <span style="color:#66d9ef">AS</span> country,  
+</span></span><span style="display:flex;"><span>    COALESCE(t.draft_year, y.draft_year) <span style="color:#66d9ef">AS</span> draft_year,  
+</span></span><span style="display:flex;"><span>    COALESCE(t.draft_round, y.draft_round) <span style="color:#66d9ef">AS</span> draft_round,  
+</span></span><span style="display:flex;"><span>    COALESCE(t.draft_number, y.draft_number) <span style="color:#66d9ef">AS</span> draft_number,  
+</span></span><span style="display:flex;"><span>    <span style="color:#75715e">-- If yesterday is null we create the initial array  
+</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span>    <span style="color:#66d9ef">CASE</span> <span style="color:#66d9ef">WHEN</span> y.season_stats <span style="color:#66d9ef">IS</span> <span style="color:#66d9ef">NULL</span> <span style="color:#66d9ef">THEN</span>  
+</span></span><span style="display:flex;"><span>        ARRAY[<span style="color:#66d9ef">ROW</span>(  
+</span></span><span style="display:flex;"><span>            t.season,  
+</span></span><span style="display:flex;"><span>            t.gp,  
+</span></span><span style="display:flex;"><span>            t.pts,  
+</span></span><span style="display:flex;"><span>            t.reb,  
+</span></span><span style="display:flex;"><span>            t.ast  
+</span></span><span style="display:flex;"><span>        )::season_stats]  
+</span></span><span style="display:flex;"><span>    <span style="color:#75715e">-- If today is not null we create the new value by concatenating the array of previous values  
+</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span>    <span style="color:#75715e">-- with today&#39;s ones.    
+</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span>    <span style="color:#75715e">-- We don&#39;t want to keep adding values to the season_stats array if the player is retired 
+</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span>    <span style="color:#66d9ef">WHEN</span> t.season <span style="color:#66d9ef">IS</span> <span style="color:#66d9ef">NOT</span> <span style="color:#66d9ef">NULL</span> <span style="color:#66d9ef">THEN</span>  
+</span></span><span style="display:flex;"><span>        y.season_stats <span style="color:#f92672">||</span> ARRAY[<span style="color:#66d9ef">ROW</span>(  
+</span></span><span style="display:flex;"><span>            t.season,  
+</span></span><span style="display:flex;"><span>            t.gp,  
+</span></span><span style="display:flex;"><span>            t.pts,  
+</span></span><span style="display:flex;"><span>            t.reb,  
+</span></span><span style="display:flex;"><span>            t.ast  
+</span></span><span style="display:flex;"><span>        )::season_stats]  
+</span></span><span style="display:flex;"><span>    <span style="color:#75715e">-- Otherwise we carry the history forward without modifying it.  
+</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span>    <span style="color:#66d9ef">ELSE</span> y.season_stats  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">END</span> <span style="color:#66d9ef">AS</span> season_stats,  
+</span></span><span style="display:flex;"><span>    <span style="color:#75715e">-- Determine the scoring class of the player for current season  
+</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span>    <span style="color:#66d9ef">CASE</span>  
+</span></span><span style="display:flex;"><span>        <span style="color:#66d9ef">WHEN</span> t.season <span style="color:#66d9ef">IS</span> <span style="color:#66d9ef">NOT</span> <span style="color:#66d9ef">NULL</span> <span style="color:#66d9ef">THEN</span>  
+</span></span><span style="display:flex;"><span>        <span style="color:#66d9ef">CASE</span> <span style="color:#66d9ef">WHEN</span> t.pts <span style="color:#f92672">&gt;</span> <span style="color:#ae81ff">20</span> <span style="color:#66d9ef">THEN</span> <span style="color:#e6db74">&#39;star&#39;</span>  
+</span></span><span style="display:flex;"><span>            <span style="color:#66d9ef">WHEN</span> t.pts <span style="color:#f92672">&gt;</span>  <span style="color:#ae81ff">15</span> <span style="color:#66d9ef">THEN</span> <span style="color:#e6db74">&#39;good&#39;</span>  
+</span></span><span style="display:flex;"><span>            <span style="color:#66d9ef">WHEN</span> t.pts <span style="color:#f92672">&gt;</span> <span style="color:#ae81ff">10</span> <span style="color:#66d9ef">THEN</span> <span style="color:#e6db74">&#39;average&#39;</span>  
+</span></span><span style="display:flex;"><span>            <span style="color:#66d9ef">ELSE</span> <span style="color:#e6db74">&#39;bad&#39;</span>  
+</span></span><span style="display:flex;"><span>        <span style="color:#66d9ef">END</span>::scoring_class  
+</span></span><span style="display:flex;"><span>        <span style="color:#66d9ef">ELSE</span> y.scoring_class  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">END</span> <span style="color:#66d9ef">as</span> scoring_class,  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">CASE</span> <span style="color:#66d9ef">WHEN</span> t.season <span style="color:#66d9ef">IS</span> <span style="color:#66d9ef">NOT</span> <span style="color:#66d9ef">NULL</span> <span style="color:#66d9ef">THEN</span> <span style="color:#ae81ff">0</span>  
+</span></span><span style="display:flex;"><span>        <span style="color:#66d9ef">ELSE</span> y.years_since_last_season <span style="color:#f92672">+</span> <span style="color:#ae81ff">1</span>  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">END</span> <span style="color:#66d9ef">as</span> years_since_last_season,  
+</span></span><span style="display:flex;"><span>    COALESCE(t.season, y.current_season <span style="color:#f92672">+</span> <span style="color:#ae81ff">1</span>) <span style="color:#66d9ef">as</span> current_season  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">FROM</span> today t <span style="color:#66d9ef">FULL</span> <span style="color:#66d9ef">OUTER</span> <span style="color:#66d9ef">JOIN</span> yesterday y  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">ON</span> t.player_name <span style="color:#f92672">=</span> y.player_name;  
+</span></span><span style="display:flex;"><span>  
+</span></span><span style="display:flex;"><span><span style="color:#75715e">-- No GROUP BY = very fast, everything happens in the map step it can be parrallelized
+</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#66d9ef">SELECT</span>  
+</span></span><span style="display:flex;"><span>    player_name,  
+</span></span><span style="display:flex;"><span>    (season_stats[<span style="color:#66d9ef">CARDINALITY</span>(season_stats)]::season_stats).pts <span style="color:#f92672">/</span>  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">CASE</span> <span style="color:#66d9ef">WHEN</span> (season_stats[<span style="color:#ae81ff">1</span>]::season_stats).pts <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span> <span style="color:#66d9ef">THEN</span> <span style="color:#ae81ff">1</span> <span style="color:#66d9ef">ELSE</span> ((season_stats[<span style="color:#ae81ff">1</span>]::season_stats).pts) <span style="color:#66d9ef">END</span>  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">FROM</span> players  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">WHERE</span> current_season <span style="color:#f92672">=</span> <span style="color:#ae81ff">2001</span>  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">AND</span> scoring_class <span style="color:#f92672">=</span> <span style="color:#e6db74">&#39;star&#39;</span>;  
+</span></span><span style="display:flex;"><span>  
+</span></span><span style="display:flex;"><span><span style="color:#75715e">-- Going back the original table  
+</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#66d9ef">WITH</span> unnested <span style="color:#66d9ef">AS</span> (  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">SELECT</span> player_name,  
+</span></span><span style="display:flex;"><span>        <span style="color:#66d9ef">UNNEST</span>(season_stats) <span style="color:#66d9ef">AS</span> season_stats  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">FROM</span> players  
+</span></span><span style="display:flex;"><span>    <span style="color:#66d9ef">WHERE</span> current_season <span style="color:#f92672">=</span> <span style="color:#ae81ff">2001</span>  
+</span></span><span style="display:flex;"><span>)  
+</span></span><span style="display:flex;"><span>  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">SELECT</span> player_name,  
+</span></span><span style="display:flex;"><span>       (season_stats::season_stats).<span style="color:#f92672">*</span>  
+</span></span><span style="display:flex;"><span><span style="color:#66d9ef">FROM</span> unnested;  
+</span></span><span style="display:flex;"><span>  
+</span></span><span style="display:flex;"><span><span style="color:#75715e">-- Here we keep player stats (temporal attributes) sorted through the JOIN
+</span></span></span><span style="display:flex;"><span><span style="color:#75715e">-- We can apply RLE compression efficiently
+</span></span></span></code></pre></div>
+		</section>
+
+		<div class="post-tags">
+			
+			
+			
+		</div>
+		</article>
+</main>
+<footer>
+  <div style="display:flex"></div>
+  <div class="footer-info">
+    2024  <a
+      href="https://github.com/athul/archie">Archie Theme</a> | Built with <a href="https://gohugo.io">Hugo</a>
+  </div>
+</footer>
+</div>
+    </body>
+</html>
diff --git a/hugo/public/posts/index.html b/hugo/public/posts/index.html
index 703e1bf..164f313 100644
--- a/hugo/public/posts/index.html
+++ b/hugo/public/posts/index.html
@@ -48,6 +48,8 @@ <h1 class="page-title">All articles</h1>
 			<a href="/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day3/lecture/">(DataExpert.io) Bootcamp - Day 3 - Lecture</a> <span class="meta">Nov 19, 2024</span>
 		</li><li class="post">
 			<a href="/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day2/lecture/">(DataExpert.io) Bootcamp - Day 2 - Lecture</a> <span class="meta">Nov 17, 2024</span>
+		</li><li class="post">
+			<a href="/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab/">(DataExpert.io) Bootcamp - Day 1 - Lab</a> <span class="meta">Nov 16, 2024</span>
 		</li><li class="post">
 			<a href="/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lecture/">(DataExpert.io) Bootcamp - Day 1 - Lecture</a> <span class="meta">Nov 15, 2024</span>
 		</li></ul>
diff --git a/hugo/public/posts/index.xml b/hugo/public/posts/index.xml
index e93f36a..3e5048e 100644
--- a/hugo/public/posts/index.xml
+++ b/hugo/public/posts/index.xml
@@ -13,7 +13,7 @@
       <link>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day3/lecture/</link>
       <pubDate>Tue, 19 Nov 2024 17:46:24 +0100</pubDate>
       <guid>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day3/lecture/</guid>
-      <description>&lt;p&gt;How to build a data agnostic graph data model ?&lt;/p&gt;&#xA;&lt;h2 id=&#34;index&#34;&gt;Index&lt;/h2&gt;&#xA;&lt;ul&gt;&#xA;&lt;li&gt;Additive VS non-additive dimensions&lt;/li&gt;&#xA;&lt;li&gt;The power of Enums&lt;/li&gt;&#xA;&lt;li&gt;When should you use flexible data types ?&lt;/li&gt;&#xA;&lt;li&gt;Graph data modeling&lt;/li&gt;&#xA;&lt;/ul&gt;&#xA;&lt;h2 id=&#34;additive-vs-non-additive-dimensions&#34;&gt;Additive vs Non-additive dimensions&lt;/h2&gt;&#xA;&lt;h3 id=&#34;what-makes-a-dimension-additive-&#34;&gt;What makes a dimension additive ?&lt;/h3&gt;&#xA;&lt;p&gt;Additivity refers to whether numerical facts (measures) in a fact table can be meaningfully aggregated across different dimensions.&lt;/p&gt;&#xA;&lt;p&gt;If you take all the sub-totals and sum them up you should have the total&lt;/p&gt;</description>
+      <description>&lt;p&gt;Today&amp;rsquo;s lecture is about dimensional additivity and how to build a flexible data model ready for graph database consumption&lt;/p&gt;&#xA;&lt;h2 id=&#34;index&#34;&gt;Index&lt;/h2&gt;&#xA;&lt;ul&gt;&#xA;&lt;li&gt;Additive VS non-additive dimensions&lt;/li&gt;&#xA;&lt;li&gt;The power of Enums&lt;/li&gt;&#xA;&lt;li&gt;When should you use flexible data types ?&lt;/li&gt;&#xA;&lt;li&gt;Graph data modeling&lt;/li&gt;&#xA;&lt;/ul&gt;&#xA;&lt;h2 id=&#34;additive-vs-non-additive-dimensions&#34;&gt;Additive vs Non-additive dimensions&lt;/h2&gt;&#xA;&lt;h3 id=&#34;what-makes-a-dimension-additive-&#34;&gt;What makes a dimension additive ?&lt;/h3&gt;&#xA;&lt;p&gt;Additivity refers to whether numerical facts (measures) in a fact table can be meaningfully aggregated across different dimensions.&lt;/p&gt;&#xA;&lt;p&gt;If you take all the sub-totals and sum them up you should have the total&lt;/p&gt;</description>
     </item>
     <item>
       <title>(DataExpert.io) Bootcamp - Day 2 - Lecture</title>
@@ -22,6 +22,13 @@
       <guid>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day2/lecture/</guid>
       <description>&lt;p&gt;Today&amp;rsquo;s lecture deals with &lt;strong&gt;Slowly Changing Dimensions&lt;/strong&gt; and &lt;strong&gt;Idempotency&lt;/strong&gt;.&lt;/p&gt;&#xA;&lt;blockquote&gt;&#xA;&lt;p&gt;&lt;strong&gt;Slowly changing dimensions&lt;/strong&gt; = An attribute that drifts over time&lt;/p&gt;&#xA;&lt;/blockquote&gt;&#xA;&lt;p&gt;&lt;em&gt;Example:&lt;/em&gt; Your favorite food&lt;/p&gt;&#xA;&lt;h2 id=&#34;idempotency&#34;&gt;Idempotency&lt;/h2&gt;&#xA;&lt;p&gt;You need to model slowly dimensions the right way because they impact idempotency.&lt;/p&gt;&#xA;&lt;blockquote&gt;&#xA;&lt;p&gt;&lt;strong&gt;Idempotent&lt;/strong&gt; = Denoting an element of a set which is unchanged in value when multiplied or otherwise operated on by itself.&lt;/p&gt;&#xA;&lt;/blockquote&gt;&#xA;&lt;blockquote&gt;&#xA;&lt;p&gt;&lt;strong&gt;Idempotent pipeline&lt;/strong&gt; = The ability for your data pipeline to produce the same results whether it&amp;rsquo;s running in production or in backfill.&lt;/p&gt;</description>
     </item>
+    <item>
+      <title>(DataExpert.io) Bootcamp - Day 1 - Lab</title>
+      <link>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab/</link>
+      <pubDate>Sat, 16 Nov 2024 17:52:23 +0100</pubDate>
+      <guid>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab/</guid>
+      <description>&lt;blockquote&gt;&#xA;&lt;p&gt;&lt;strong&gt;Goal :&lt;/strong&gt; Create a &lt;em&gt;cumulative table design&lt;/em&gt;&lt;/p&gt;&#xA;&lt;/blockquote&gt;&#xA;&lt;h2 id=&#34;problem-overview&#34;&gt;Problem overview&lt;/h2&gt;&#xA;&lt;p&gt;We have a table containing the stats for the NBA players, there&amp;rsquo;s one record for each player&amp;rsquo;s season.&lt;/p&gt;&#xA;&lt;pre tabindex=&#34;0&#34;&gt;&lt;code&gt;postgres=# \d player_seasons;&#xA;&lt;/code&gt;&lt;/pre&gt;&lt;p&gt;Table &lt;code&gt;public.player_seasons&lt;/code&gt;&lt;/p&gt;&#xA;&lt;table&gt;&#xA;  &lt;thead&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;th&gt;Column&lt;/th&gt;&#xA;          &lt;th&gt;Type&lt;/th&gt;&#xA;      &lt;/tr&gt;&#xA;  &lt;/thead&gt;&#xA;  &lt;tbody&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;player_name&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;age&lt;/td&gt;&#xA;          &lt;td&gt;integer&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;height&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;weight&lt;/td&gt;&#xA;          &lt;td&gt;integer&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;college&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;country&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;draft_year&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;draft_round&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;draft_number&lt;/td&gt;&#xA;          &lt;td&gt;text&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;gp&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;pts&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;reb&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;ast&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;netrtg&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;oreb_pct&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;dreb_pct&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;usg_pct&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;ts_pct&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;ast_pct&lt;/td&gt;&#xA;          &lt;td&gt;real&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;      &lt;tr&gt;&#xA;          &lt;td&gt;season&lt;/td&gt;&#xA;          &lt;td&gt;integer&lt;/td&gt;&#xA;      &lt;/tr&gt;&#xA;  &lt;/tbody&gt;&#xA;&lt;/table&gt;&#xA;&lt;p&gt;&lt;strong&gt;Indexes:&lt;/strong&gt;&#xA;&lt;code&gt;&amp;quot;player_seasons_pkey&amp;quot; PRIMARY KEY, btree (player_name, season)&lt;/code&gt;&lt;/p&gt;</description>
+    </item>
     <item>
       <title>(DataExpert.io) Bootcamp - Day 1 - Lecture</title>
       <link>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lecture/</link>
diff --git a/hugo/public/sitemap.xml b/hugo/public/sitemap.xml
index 6df5fb5..7b730a4 100644
--- a/hugo/public/sitemap.xml
+++ b/hugo/public/sitemap.xml
@@ -13,6 +13,9 @@
   </url><url>
     <loc>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day2/lecture/</loc>
     <lastmod>2024-11-17T19:21:32+01:00</lastmod>
+  </url><url>
+    <loc>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lab/</loc>
+    <lastmod>2024-11-16T17:52:23+01:00</lastmod>
   </url><url>
     <loc>http://localhost:1313/blog/posts/data-engineering/bootcamps/data-expert-io/dimensional-data-modelling/day1/lecture/</loc>
     <lastmod>2024-11-15T17:46:24+01:00</lastmod>
diff --git a/hugo/public/svg/RLE.svg b/hugo/public/svg/RLE.svg
new file mode 100644
index 0000000..824c092
--- /dev/null
+++ b/hugo/public/svg/RLE.svg
@@ -0,0 +1,29 @@
+<svg viewBox="0 0 800 200" xmlns="http://www.w3.org/2000/svg">
+    <!-- Original Data -->
+    <rect x="50" y="20" width="40" height="40" fill="#ff6b6b"/>
+    <rect x="90" y="20" width="40" height="40" fill="#ff6b6b"/>
+    <rect x="130" y="20" width="40" height="40" fill="#ff6b6b"/>
+    <rect x="170" y="20" width="40" height="40" fill="#4ecdc4"/>
+    <rect x="210" y="20" width="40" height="40" fill="#4ecdc4"/>
+    <rect x="250" y="20" width="40" height="40" fill="#45b7af"/>
+    
+    <!-- Arrow -->
+    <path d="M 150 80 L 150 120" stroke="black" stroke-width="2" marker-end="url(#arrow)"/>
+    <defs>
+        <marker id="arrow" viewBox="0 0 10 10" refX="9" refY="5"
+            markerWidth="6" markerHeight="6" orient="auto">
+            <path d="M 0 0 L 10 5 L 0 10 z"/>
+        </marker>
+    </defs>
+    
+    <!-- Encoded Data -->
+    <rect x="50" y="140" width="80" height="40" fill="#ff6b6b"/>
+    <text x="90" y="165" text-anchor="middle" fill="white" font-family="Arial">3x</text>
+    
+    <rect x="170" y="140" width="60" height="40" fill="#4ecdc4"/>
+    <text x="200" y="165" text-anchor="middle" fill="white" font-family="Arial">2x</text>
+    
+    <rect x="250" y="140" width="40" height="40" fill="#45b7af"/>
+    <text x="270" y="165" text-anchor="middle" fill="white" font-family="Arial">1x</text>
+</svg>
+
diff --git a/hugo/static/svg/RLE.svg b/hugo/static/svg/RLE.svg
new file mode 100644
index 0000000..824c092
--- /dev/null
+++ b/hugo/static/svg/RLE.svg
@@ -0,0 +1,29 @@
+<svg viewBox="0 0 800 200" xmlns="http://www.w3.org/2000/svg">
+    <!-- Original Data -->
+    <rect x="50" y="20" width="40" height="40" fill="#ff6b6b"/>
+    <rect x="90" y="20" width="40" height="40" fill="#ff6b6b"/>
+    <rect x="130" y="20" width="40" height="40" fill="#ff6b6b"/>
+    <rect x="170" y="20" width="40" height="40" fill="#4ecdc4"/>
+    <rect x="210" y="20" width="40" height="40" fill="#4ecdc4"/>
+    <rect x="250" y="20" width="40" height="40" fill="#45b7af"/>
+    
+    <!-- Arrow -->
+    <path d="M 150 80 L 150 120" stroke="black" stroke-width="2" marker-end="url(#arrow)"/>
+    <defs>
+        <marker id="arrow" viewBox="0 0 10 10" refX="9" refY="5"
+            markerWidth="6" markerHeight="6" orient="auto">
+            <path d="M 0 0 L 10 5 L 0 10 z"/>
+        </marker>
+    </defs>
+    
+    <!-- Encoded Data -->
+    <rect x="50" y="140" width="80" height="40" fill="#ff6b6b"/>
+    <text x="90" y="165" text-anchor="middle" fill="white" font-family="Arial">3x</text>
+    
+    <rect x="170" y="140" width="60" height="40" fill="#4ecdc4"/>
+    <text x="200" y="165" text-anchor="middle" fill="white" font-family="Arial">2x</text>
+    
+    <rect x="250" y="140" width="40" height="40" fill="#45b7af"/>
+    <text x="270" y="165" text-anchor="middle" fill="white" font-family="Arial">1x</text>
+</svg>
+

Column	Type
player_name	text
age	integer
height	text
weight	integer
college	text
country	text
draft_year	text
draft_round	text
draft_number	text
gp	real
pts	real
reb	real
ast	real
netrtg	real
oreb_pct	real
dreb_pct	real
usg_pct	real
ts_pct	real
ast_pct	real
season	integer