Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Sample Scripts and Data #199

Merged
merged 5 commits into from
Feb 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion example/sample_data/sample_table_programmatic_source.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
database,cluster,schema_name,name,description,tags,description_source
database,cluster,schema,name,description,tags,description_source
hive,gold,test_schema,test_table1,"**Size**: 50T\n\n**Monthly Cost**: $5000","expensive","s3_crawler"
dynamo,gold,test_schema,test_table2,"**Size**: 1T\n\n**Monthly Cost**: $50","cheap","s3_crawler"
hive,gold,test_schema,test_table1,"### Quality Report:\n --- \n Ipsus enom. Ipsus enom ipsus lorenum.\n ---\n[![Build Status](https://api.travis-ci.com/lyft/amundsendatabuilder.svg?branch=master)](https://travis-ci.com/lyft/amundsendatabuilder)","low_quality","quality_service"
4 changes: 2 additions & 2 deletions example/sample_data/sample_user.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
email,first_name,last_name,name,github_username,team_name,employee_type,manager_email,slack_id
email,first_name,last_name,full_name,github_username,team_name,employee_type,manager_email,slack_id
[email protected],Roald,Amundsen,"Roald Amundsen",lyft,"Team Amundsen",sailor,"[email protected]",ramundzn
[email protected],Christopher,Columbus,"Christopher Columbus",ChristopherColumbusFAKE,"Team Amundsen",sailor,"[email protected]",chrisc
[email protected], Buzz, Aldrin,"Buzz Aldrin",BuzzAldrinFAKE,"Team Amundsen",astronaut,"[email protected]",buzz
[email protected],Buzz,Aldrin,"Buzz Aldrin",BuzzAldrinFAKE,"Team Amundsen",astronaut,"[email protected]",buzz
16 changes: 8 additions & 8 deletions example/scripts/sample_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def load_user_data_from_csv(file_name):
'(email VARCHAR(64) NOT NULL , '
'first_name VARCHAR(64) NOT NULL , '
'last_name VARCHAR(64) NOT NULL , '
'name VARCHAR(64) NOT NULL , '
'full_name VARCHAR(64) NOT NULL , '
'github_username VARCHAR(64) NOT NULL , '
'team_name VARCHAR(64) NOT NULL, '
'employee_type VARCHAR(64) NOT NULL,'
Expand All @@ -202,15 +202,15 @@ def load_user_data_from_csv(file_name):
to_db = [(i['email'],
i['first_name'],
i['last_name'],
i['name'],
i['full_name'],
i['github_username'],
i['team_name'],
i['employee_type'],
i['manager_email'],
i['slack_id']) for i in dr]

cur.executemany("INSERT INTO test_user_metadata ("
"email, first_name, last_name, name, github_username, "
"email, first_name, last_name, full_name, github_username, "
"team_name, employee_type, "
"manager_email, slack_id ) VALUES "
"(?, ?, ?, ?, ?, ?, ?, ?, ?);", to_db)
Expand Down Expand Up @@ -434,7 +434,7 @@ def create_last_updated_job():
'publisher.neo4j.{}'.format(neo4j_csv_publisher.NEO4J_PASSWORD):
neo4j_password,
'publisher.neo4j.{}'.format(neo4j_csv_publisher.JOB_PUBLISH_TAG):
'unique_lastupdated_tag', # should use unique tag here like {ds}
'unique_last_updated_tag', # should use unique tag here like {ds}
})

job = DefaultJob(conf=job_config,
Expand Down Expand Up @@ -543,7 +543,7 @@ def _load_csv(self):
for column_dict in self.columns:
db = column_dict['database']
cluster = column_dict['cluster']
schema = column_dict['schema_name']
schema = column_dict['schema']
table = column_dict['table_name']
id = self._get_key(db, cluster, schema, table)
column = ColumnMetadata(
Expand All @@ -562,15 +562,15 @@ def _load_csv(self):
for table_dict in tables:
db = table_dict['database']
cluster = table_dict['cluster']
schema = table_dict['schema_name']
schema = table_dict['schema']
table = table_dict['name']
id = self._get_key(db, cluster, schema, table)
columns = parsed_columns[id]
if columns is None:
columns = []
table = TableMetadata(database=table_dict['database'],
cluster=table_dict['cluster'],
schema_name=table_dict['schema_name'],
schema=table_dict['schema'],
name=table_dict['name'],
description=table_dict['description'],
columns=columns,
Expand Down Expand Up @@ -732,7 +732,7 @@ def create_table_column_job(table_path, column_path):
with user, a, b, c, read, own, follow, manager
where user.full_name is not null
return user.email as email, user.first_name as first_name, user.last_name as last_name,
user.full_name as name, user.github_username as github_username, user.team_name as team_name,
user.full_name as full_name, user.github_username as github_username, user.team_name as team_name,
user.employee_type as employee_type, manager.email as manager_email, user.slack_id as slack_id,
user.is_active as is_active,
REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_read,
Expand Down