Skip to content

Commit

Permalink
feat: transforms aggregated "progress" events into a new fact
Browse files Browse the repository at this point in the history
  • Loading branch information
pomegranited committed Jun 21, 2024
1 parent aa78d44 commit 1e64447
Show file tree
Hide file tree
Showing 15 changed files with 215 additions and 19 deletions.
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
# Don't commit dbt logs
logs/*

target/
dbt_packages/
logs/
File renamed without changes.
File renamed without changes.
4 changes: 0 additions & 4 deletions dbt_completion_aggregator/.gitignore

This file was deleted.

Empty file.
Empty file.
16 changes: 3 additions & 13 deletions dbt_completion_aggregator/dbt_project.yml → dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# name or the intended use of these models
name: 'dbt_completion_aggregator'
version: '1.0.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: 'aspects'
Expand All @@ -18,19 +19,8 @@ seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

target-path: "target" # directory which will store compiled SQL files

clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"


# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models

# In this example config, we tell dbt to build all models in the example/
# directory as views. These settings can be overridden in the individual model
# files using the `{{ config(...) }}` macro.
models:
example:
# Config indicated by + and applies to all files under models/example/
example:
+materialized: view
File renamed without changes.
30 changes: 30 additions & 0 deletions models/completion_aggregator/aggregated_completion_events.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{{
config(
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=aspects.get_engine("ReplacingMergeTree()"),
primary_key="(org, course_key, verb_id)",
order_by="(org, course_key, verb_id, emission_time, actor_id, object_id, event_id)",
partition_by="(toYYYYMM(emission_time))",
ttl=env_var("ASPECTS_DATA_TTL_EXPRESSION", ""),
)
}}

select
event_id,
CAST(emission_time, 'DateTime') as emission_time,
actor_id,
object_id,
course_key,
org,
verb_id,
JSON_VALUE(
event,
'$.result.extensions."https://w3id.org/xapi/cmi5/result/extensions/progress"'
) as progress_percent,
JSON_VALUE(
event,
'$.result.completion'
) as completed
from {{ ref("xapi_events_all_parsed") }}
where verb_id = 'http://adlnet.gov/expapi/verbs/progressed'
61 changes: 61 additions & 0 deletions models/completion_aggregator/fact_aggregated_completions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
with
completions as (
select
emission_time,
org,
course_key,
actor_id,
progress_percent,
if(
object_id like '%/course/%',
splitByString('/course/', object_id)[-1],
splitByString('/xblock/', object_id)[-1]
) as entity_id,
cast(progress_percent as Float) / 100 as scaled_progress
from {{ ref("aggregated_completion_events") }}
)

select
completions.emission_time as emission_time,
completions.org as org,
completions.course_key as course_key,
courses.course_name as course_name,
courses.course_run as course_run,
completions.entity_id as entity_id,
if(blocks.block_name != '', blocks.block_name, courses.course_name) as entity_name,
if(
blocks.block_name != '', blocks.display_name_with_location, null
) as entity_name_with_location,
completions.actor_id as actor_id,
cast(completions.scaled_progress as Float) as scaled_progress,
case
when scaled_progress >= 0.9
then '90-100%'
when scaled_progress >= 0.8 and scaled_progress < 0.9
then '80-89%'
when scaled_progress >= 0.7 and scaled_progress < 0.8
then '70-79%'
when scaled_progress >= 0.6 and scaled_progress < 0.7
then '60-69%'
when scaled_progress >= 0.5 and scaled_progress < 0.6
then '50-59%'
when scaled_progress >= 0.4 and scaled_progress < 0.5
then '40-49%'
when scaled_progress >= 0.3 and scaled_progress < 0.4
then '30-39%'
when scaled_progress >= 0.2 and scaled_progress < 0.3
then '20-29%'
when scaled_progress >= 0.1 and scaled_progress < 0.2
then '10-19%'
else '0-9%'
end as completion_bucket,
users.username as username,
users.name as name,
users.email as email
from completions
join {{ ref("course_names") }} courses on completions.course_key = courses.course_key
left join
{{ ref("course_block_names") }} blocks on completions.entity_id = blocks.location
left outer join
{{ ref("dim_user_pii") }} users
on toUUID(completions.actor_id) = users.external_user_id
89 changes: 89 additions & 0 deletions models/completion_aggregator/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
version: 2

models:
- name: fact_aggregated_completions
database: "{{ env_var('DBT_PROFILE_TARGET_DATABASE', 'reporting') }}"
description: "One record per aggregated completion event for component"
columns:
- name: emission_time
description: "Timestamp, to the second, of when this event was emitted"
data_type: DateTime64(3)
- name: org
data_type: String
description: "The organization that the course belongs to"
- name: course_key
data_type: String
description: "The course key for the course"
- name: course_name
data_type: String
description: "The name of the course"
- name: course_run
data_type: String
description: "The course run for the course"
- name: entity_id
description: "The block ID or course key for the graded entity"
data_type: String
- name: entity_name
data_type: String
description: "The name of the graded entity (course or block)"
- name: entity_name_with_location
data_type: Nullable(String)
description: "The entity's display name with section, subsection, and unit prepended to the name. This provides additional context when looking at block names and can help data consumers understand which block they are analyzing"
- name: actor_id
data_type: String
description: "The xAPI actor identifier"
- name: scaled_progress
description: "A ratio between 0 and 1, inclusive, of the learner's progress"
data_type: Float32
- name: completion_bucket
description: "A displayable value of progress sorted into 10% buckets. Useful for grouping progress together to show high-level learner performance"
data_type: String
- name: completed
description: "Flag indicating whether the object has been fully completed"
data_type: Bool
data_tests:
- is_completed:
progress_field: "scaled_progress"
- name: username
data_type: String
description: "The username of the learner"
- name: name
data_type: String
description: "The full name of the learner"
- name: email
data_type: String
description: "The email address of the learner"

- name: aggregated_completion_events
description: "A materialized view for xAPI events related to aggregated completions"
columns:
- name: event_id
data_type: uuid
description: "The unique identifier for the event"
- name: emission_time
data_type: datetime
description: "The time the event was emitted"
- name: actor_id
data_type: string
description: "The xAPI actor identifier"
- name: object_id
data_type: string
description: "The xAPI object identifier"
- name: course_key
data_type: string
description: "The course identifier"
- name: org
data_type: string
description: "The organization that the course belongs to"
- name: verb_id
data_type: string
description: "The xAPI verb identifier"
- name: progress_percent
data_type: string
description: "The percentage of the xAPI object completed"
- name: completed
description: "Flag indicating whether the object has been fully completed"
data_type: Bool
data_tests:
- is_completed:
progress_field: "progress_percent"
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# requirements file is required by Aspects.
File renamed without changes.
File renamed without changes.
27 changes: 27 additions & 0 deletions tests/generic/test_is_completed.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{% test is_completed(model, progress_field) %}

with validation as (

select
completion,
{{ progress_field }} as progress,

from {{ model }}

),

validation_errors as (

select
completion, scaled_progress

from validation
-- if this is true, then progress isn't being captured correctly
where completion == true and progress < 100

)

select *
from validation_errors

{% endtest %}

0 comments on commit 1e64447

Please sign in to comment.