diff --git a/example/sample_data/sample_table_programmatic_source.csv b/example/sample_data/sample_table_programmatic_source.csv index eb0b09cb4..3d6f6551d 100644 --- a/example/sample_data/sample_table_programmatic_source.csv +++ b/example/sample_data/sample_table_programmatic_source.csv @@ -1,4 +1,4 @@ -database,cluster,schema_name,name,description,tags,description_source +database,cluster,schema,name,description,tags,description_source hive,gold,test_schema,test_table1,"**Size**: 50T\n\n**Monthly Cost**: $5000","expensive","s3_crawler" dynamo,gold,test_schema,test_table2,"**Size**: 1T\n\n**Monthly Cost**: $50","cheap","s3_crawler" hive,gold,test_schema,test_table1,"### Quality Report:\n --- \n Ipsus enom. Ipsus enom ipsus lorenum.\n ---\n[![Build Status](https://api.travis-ci.com/lyft/amundsendatabuilder.svg?branch=master)](https://travis-ci.com/lyft/amundsendatabuilder)","low_quality","quality_service" \ No newline at end of file diff --git a/example/sample_data/sample_user.csv b/example/sample_data/sample_user.csv index 1e8d4ec70..74b413a74 100644 --- a/example/sample_data/sample_user.csv +++ b/example/sample_data/sample_user.csv @@ -1,4 +1,4 @@ -email,first_name,last_name,name,github_username,team_name,employee_type,manager_email,slack_id +email,first_name,last_name,full_name,github_username,team_name,employee_type,manager_email,slack_id roald.amundsen@example.org,Roald,Amundsen,"Roald Amundsen",lyft,"Team Amundsen",sailor,"phboss@example.org",ramundzn chrisc@example.org,Christopher,Columbus,"Christopher Columbus",ChristopherColumbusFAKE,"Team Amundsen",sailor,"phboss@example.org",chrisc -buzz@example.org, Buzz, Aldrin,"Buzz Aldrin",BuzzAldrinFAKE,"Team Amundsen",astronaut,"phboss@example.org",buzz +buzz@example.org,Buzz,Aldrin,"Buzz Aldrin",BuzzAldrinFAKE,"Team Amundsen",astronaut,"phboss@example.org",buzz diff --git a/example/scripts/sample_data_loader.py b/example/scripts/sample_data_loader.py index 35c6cf3d5..c5c27663e 100644 --- a/example/scripts/sample_data_loader.py +++ b/example/scripts/sample_data_loader.py @@ -190,7 +190,7 @@ def load_user_data_from_csv(file_name): '(email VARCHAR(64) NOT NULL , ' 'first_name VARCHAR(64) NOT NULL , ' 'last_name VARCHAR(64) NOT NULL , ' - 'name VARCHAR(64) NOT NULL , ' + 'full_name VARCHAR(64) NOT NULL , ' 'github_username VARCHAR(64) NOT NULL , ' 'team_name VARCHAR(64) NOT NULL, ' 'employee_type VARCHAR(64) NOT NULL,' @@ -202,7 +202,7 @@ def load_user_data_from_csv(file_name): to_db = [(i['email'], i['first_name'], i['last_name'], - i['name'], + i['full_name'], i['github_username'], i['team_name'], i['employee_type'], @@ -210,7 +210,7 @@ def load_user_data_from_csv(file_name): i['slack_id']) for i in dr] cur.executemany("INSERT INTO test_user_metadata (" - "email, first_name, last_name, name, github_username, " + "email, first_name, last_name, full_name, github_username, " "team_name, employee_type, " "manager_email, slack_id ) VALUES " "(?, ?, ?, ?, ?, ?, ?, ?, ?);", to_db) @@ -434,7 +434,7 @@ def create_last_updated_job(): 'publisher.neo4j.{}'.format(neo4j_csv_publisher.NEO4J_PASSWORD): neo4j_password, 'publisher.neo4j.{}'.format(neo4j_csv_publisher.JOB_PUBLISH_TAG): - 'unique_lastupdated_tag', # should use unique tag here like {ds} + 'unique_last_updated_tag', # should use unique tag here like {ds} }) job = DefaultJob(conf=job_config, @@ -543,7 +543,7 @@ def _load_csv(self): for column_dict in self.columns: db = column_dict['database'] cluster = column_dict['cluster'] - schema = column_dict['schema_name'] + schema = column_dict['schema'] table = column_dict['table_name'] id = self._get_key(db, cluster, schema, table) column = ColumnMetadata( @@ -562,7 +562,7 @@ def _load_csv(self): for table_dict in tables: db = table_dict['database'] cluster = table_dict['cluster'] - schema = table_dict['schema_name'] + schema = table_dict['schema'] table = table_dict['name'] id = self._get_key(db, cluster, schema, table) columns = parsed_columns[id] @@ -570,7 +570,7 @@ def _load_csv(self): columns = [] table = TableMetadata(database=table_dict['database'], cluster=table_dict['cluster'], - schema_name=table_dict['schema_name'], + schema=table_dict['schema'], name=table_dict['name'], description=table_dict['description'], columns=columns, @@ -732,7 +732,7 @@ def create_table_column_job(table_path, column_path): with user, a, b, c, read, own, follow, manager where user.full_name is not null return user.email as email, user.first_name as first_name, user.last_name as last_name, - user.full_name as name, user.github_username as github_username, user.team_name as team_name, + user.full_name as full_name, user.github_username as github_username, user.team_name as team_name, user.employee_type as employee_type, manager.email as manager_email, user.slack_id as slack_id, user.is_active as is_active, REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_read,