From 70de9ec7c2926578a3033260e3384be1f4a0b59a Mon Sep 17 00:00:00 2001 From: Kate Piette Date: Mon, 30 Oct 2017 13:08:58 -0400 Subject: [PATCH] Remove jobs that created fake data --- app/jobs/create_fake_org_categories.rb | 167 --------- .../create_real_addresses_for_fake_orgs.rb | 44 --- app/jobs/parse_data_to_csvs.rb | 330 ------------------ lib/tasks/create_fake_org_categories.rake | 3 - .../create_real_addresses_for_fake_orgs.rake | 3 - lib/tasks/parse_data_to_csvs.rake | 3 - 6 files changed, 550 deletions(-) delete mode 100644 app/jobs/create_fake_org_categories.rb delete mode 100644 app/jobs/create_real_addresses_for_fake_orgs.rb delete mode 100644 app/jobs/parse_data_to_csvs.rb delete mode 100644 lib/tasks/create_fake_org_categories.rake delete mode 100644 lib/tasks/create_real_addresses_for_fake_orgs.rake delete mode 100644 lib/tasks/parse_data_to_csvs.rake diff --git a/app/jobs/create_fake_org_categories.rb b/app/jobs/create_fake_org_categories.rb deleted file mode 100644 index e5660dd29..000000000 --- a/app/jobs/create_fake_org_categories.rb +++ /dev/null @@ -1,167 +0,0 @@ -require 'csv' -require 'json' -require 'net/http' - -class CreateFakeOrgCategories - - def initialize() - @services_map = [] - end - - def create_orgs() - file = File.read("/tmp/ohana-api/data/oe.json") - json = JSON.parse(file) - random_num = random_num(100) - 500.times.each do |n| - categories_list = [] - finance_array = [] - capital_array = [] - procurement_array =[] - legal_array = [] - manufacturing_array = [] - marketing_array = [] - mentoring_array = [] - networking_array = [] - planning_array = [] - rd_array = [] - regulartory_array = [] - space_array = [] - hr_array = [] - industries_list = [] - communities_list = [] - categories_from_db = json['taxonomy']['top_level'][0] - if random_num < 15 - categories_list.push('Financial Managment') - add_subcategory_id(categories_from_db, 0, finance_array) - end - if random_num > 10 && random_num < 40 - categories_list.push('Capital') - add_subcategory_id(categories_from_db, 1, capital_array) - end - if random_num > 20 && random_num < 70 - categories_list.push('Legal Services') - add_subcategory_id(categories_from_db , 2, legal_array) - end - if random_num > 30 && random_num < 80 - categories_list.push('Marketing/Sales') - add_subcategory_id(categories_from_db, 3, marketing_array) - end - if random_num > 40 && random_num < 60 - categories_list.push('Networking') - add_subcategory_id(categories_from_db, 4, networking_array) - end - if random_num > 50 && random_num < 60 - categories_list.push('Manufacturing/Logistics') - add_subcategory_id(categories_from_db, 5, manufacturing_array) - end - if random_num > 60 && random_num < 70 - categories_list.push('Procurement') - add_subcategory_id(categories_from_db, 6, procurement_array) - end - if random_num > 70 && random_num < 80 - categories_list.push('Planning/Management') - add_subcategory_id(categories_from_db, 7, planning_array) - end - if random_num > 80 && random_num < 100 - categories_list.push('R&D/Commercialization') - add_subcategory_id(categories_from_db, 8, rd_array) - end - if random_num > 90 && random_num < 100 - categories_list.push('Regulatory Compliance') - add_subcategory_id(categories_from_db, 9, regulartory_array) - end - if random_num > 15 && random_num < 35 - categories_list.push('Physical Space') - add_subcategory_id(categories_from_db, 10, space_array) - end - if random_num > 13 && random_num < 50 - categories_list.push('Mentoring/Counseling') - add_subcategory_id(categories_from_db, 11, mentoring_array) - end - if random_num > 3 && random_num < 45 - categories_list.push('Human Resources & Workforce Development') - add_subcategory_id(categories_from_db, 12, hr_array) - end - # INDUSTRIES - industries_from_db = json['taxonomy']['top_level'][4] - add_other_tax_id(industries_from_db, industries_list) - #COMMUNITIES - communities_from_db = json['taxonomy']['top_level'][3] - add_other_tax_id(communities_from_db, communities_list) - @services_map.push(map_to_categories( - categories_list, - finance_array, - capital_array, - procurement_array, - legal_array, - manufacturing_array, - marketing_array, - mentoring_array, - networking_array, - planning_array, - rd_array, - regulartory_array, - space_array, - hr_array, - industries_list, - communities_list - )) - end - send_to_csv() - end - - def add_subcategory_id(categories_from_db, category_int, subcat_arr) - subcats_from_db = categories_from_db['second_level'][category_int]['third_level'] - subcats_from_db.each do |subcat_from_db| - num = random_num(1.00) - if num > 0.66 - subcat_arr.push(subcat_from_db["@title"]) - end - end - end - - def add_other_tax_id(data_from_db, array) - data_from_db['second_level'].each do |d| - num = random_num(1.00) - if num > 0.33 && num < 0.66 - array.push(d["@title"]) - end - end - end - - def map_to_categories(categories_list,finance_array,capital_array,procurement_array,legal_array,manufacturing_array,marketing_array,mentoring_array,networking_array,planning_array,rd_array,regulartory_array,space_array,hr_array,industries_list,communities_list) - { - categories_list: categories_list.join(','), - FinanceSub: finance_array ? finance_array.join(',') : nil, - CapitalSub: capital_array ? capital_array.join(',') : nil, - ProcurementSub: procurement_array ? procurement_array.join(',') : nil, - LegalSub: legal_array ? legal_array.join(',') : nil, - ManufacturingSub: manufacturing_array ? manufacturing_array.join(',') : nil, - MarketingSub: marketing_array ? marketing_array.join(',') : nil, - MentoringSub: mentoring_array ? mentoring_array.join(',') : nil, - NetworkingSub: networking_array ? networking_array.join(',') : nil, - PlanningSub: planning_array ? planning_array.join(',') : nil, - RDSub: rd_array ? rd_array.join(',') : nil, - RegulatorySub: regulartory_array ? regulartory_array.join(',') : nil, - SpaceSub: space_array ? space_array.join(',') : nil, - HRSub: hr_array ? hr_array.join(',') : nil, - industries_list: industries_list.join(','), - communities_list: communities_list.join(','), - } - end - - def send_to_csv() - CSV.open("/tmp/ohana-api/data/fake_org_categories.csv", "wb") do |csv| - csv << @services_map.first.keys - @services_map.each do |hash| - csv << hash.values - end - end - end - - private - - def random_num(num) - Random.new.rand(num) - end -end diff --git a/app/jobs/create_real_addresses_for_fake_orgs.rb b/app/jobs/create_real_addresses_for_fake_orgs.rb deleted file mode 100644 index 9bab09695..000000000 --- a/app/jobs/create_real_addresses_for_fake_orgs.rb +++ /dev/null @@ -1,44 +0,0 @@ -require 'csv' - -class CreateRealAddressesForFakeOrgs - - def initialize() - @NE_Lat = 38.79309367009685 - @NE_Lng = -121.16951677656246 - @SE_Lat = 38.32058063517934 - @NW_Lng = -121.77376482343746 - @locations_map = [] - end - - def magic() - 200.times.each do |n| - lat = random_num(@NE_Lat, @SE_Lat) - lng = random_num(@NE_Lng, @NW_Lng) - address = Geocoder.address([lat, lng]) - addressArr = address.split(', '); - address = { - street: addressArr[0], - city: addressArr[1], - state_zip: addressArr[2], - country: addressArr[3] - } - if (!@locations_map.include?(address)) - @locations_map.push(address) - end - end - send_to_csv() - end - - def random_num(x, y) - Random.new.rand(y..x) - end - - def send_to_csv() - CSV.open("/tmp/ohana-api/data/real_addresses.csv", "wb") do |csv| - csv << @locations_map.first.keys - @locations_map.each do |hash| - csv << hash.values - end - end - end -end diff --git a/app/jobs/parse_data_to_csvs.rb b/app/jobs/parse_data_to_csvs.rb deleted file mode 100644 index ae4a6268f..000000000 --- a/app/jobs/parse_data_to_csvs.rb +++ /dev/null @@ -1,330 +0,0 @@ -require 'csv' -require 'json' -require 'net/http' - -class ParseDataToCsvs - - def initialize() - @orgs_map = [] - @locations_map = [] - @addresses_map = [] - @mail_addresses_map = [] - @contacts_map = [] - @phones_map = [] - @services_map = [] - - @org_id = 0 - @location_id = 0 - @address_id = 0 - @mail_address_id = 0 - @contact_id = 0 - @phone_id = 0 - @service_id = 0 - end - - def parse_csv() - file = File.open("/tmp/ohana-api/data/city-of-sac-csv/fake_data.csv") - CSV.foreach(file, headers: true) do |row| - taxonomy_id_array = assign_taxonomies(row) - @orgs_map.push(map_to_organizations(row)) - @locations_map.push(map_to_locations(row, 'L1')) - @addresses_map.push(map_to_addresses(row, 'L1')) - if !row['M1Street1'].nil? - @mail_addresses_map.push(map_to_mail_addresses(row)) - end - @contacts_map.push(map_to_contacts(row)) - @phones_map.push(map_to_phones(row)) - @services_map.push(map_to_services(row, 'S1', taxonomy_id_array)) - $i = 2 - 4.times.each do |n| - location_key = 'L' + $i.to_s - service_key = 'S' + $i.to_s - if !row[location_key<<'LocName'].nil? - @locations_map.push(map_to_locations(row, 'L'<<$i.to_s)) - @addresses_map.push(map_to_addresses(row, 'L'<<$i.to_s)) - end - if !row[service_key<<'ServiceName'].nil? - @services_map.push(map_to_services(row, 'S'<<$i.to_s, taxonomy_id_array)) - end - $i += 1 - end - end - create_csvs() - end - - def create_csvs() - CSV.open("/tmp/ohana-api/data/organizations.csv", "wb") do |csv| - csv << @orgs_map.first.keys - @orgs_map.each do |hash| - csv << hash.values - end - end - CSV.open("/tmp/ohana-api/data/locations.csv", "wb") do |csv| - csv << @locations_map.first.keys - @locations_map.each do |hash| - csv << hash.values - end - end - CSV.open("/tmp/ohana-api/data/addresses.csv", "wb") do |csv| - csv << @addresses_map.first.keys - @addresses_map.each do |hash| - csv << hash.values - end - end - CSV.open("/tmp/ohana-api/data/mail_addresses.csv", "wb") do |csv| - csv << @mail_addresses_map.first.keys - @mail_addresses_map.each do |hash| - csv << hash.values - end - end - CSV.open("/tmp/ohana-api/data/contacts.csv", "wb") do |csv| - csv << @contacts_map.first.keys - @contacts_map.each do |hash| - csv << hash.values - end - end - CSV.open("/tmp/ohana-api/data/phones.csv", "wb") do |csv| - csv << @phones_map.first.keys - @phones_map.each do |hash| - csv << hash.values - end - end - CSV.open("/tmp/ohana-api/data/services.csv", "wb") do |csv| - csv << @services_map.first.keys - @services_map.each do |hash| - csv << hash.values - end - end - end - - def map_to_organizations(row) - @org_id += 1 - { - id: @org_id, - accreditations: nil, - alternate_name: row['B1AltName'], - date_incorporated: nil, - description: row['B1Description'], - email: row['B1Email'], - funding_sources: nil, - legal_status: nil, - licenses: nil, - name: row['B1OrgName'], - tax_id: nil, - tax_status: nil, - website: row['B1Website'], - twitter: row['B1Twitter'], - facebook: row['B1Facebook'], - linkedin: row['B1LinkedIn'], - } - end - - def map_to_locations(row, key) - @location_id += 1 - { - id: @location_id, - organization_id: @org_id, - accessibility: nil, - admin_emails: nil, - alternate_name: nil, - description: row[key + 'LocDesc'], - email: nil, - languages: nil, - latitude: nil, - longitude: nil, - name: row[key + 'LocName'], - transportation: nil, - virtual: nil, - website: nil, - } - end - - def map_to_addresses(row, key) - @address_id += 1 - { - id: @address_id, - location_id: @location_id, - address_1: row[key + 'Street1'], - address_2: row[key + 'Street2'], - city: row[key + 'City'], - state_province: row[key + 'State'], - postal_code: row[key + 'ZIP'], - country: 'US', - } - end - - def map_to_mail_addresses(row) - @mail_address_id += 1 - { - id: @mail_address_id, - location_id: @location_id, - attention: row['B1OrgName'], - address_1: row['M1Street1'], - address_2: row['M1Street2'], - city: row['M1City'], - state_province: row['M1State'], - postal_code: row['M1ZIP'], - country: 'US', - } - end - - def map_to_contacts(row) - @contact_id += 1 - { - id: @contact_id, - location_id: nil, - organization_id: @org_id, - service_id: nil, - name: row['A1Name'], - title: row['A1Title'], - email: row['A1Email'], - department: nil, - } - end - - def map_to_phones(row) - @phone_id += 1 - { - id: @phone_id, - contact_id: @contact_id, - location_id: nil, - organization_id: @org_id, - service_id: nil, - number: row['A1Phone'], - extension: nil, - department: nil, - number_type: 'voice', - vanity_number: nil, - country_prefix: nil, - } - end - - def map_to_services(row, key, taxonomy_array) - @service_id += 1 - { - id: @service_id, - location_id: @location_id, - program_id: nil, - accepted_payments: nil, - alternate_name: nil, - description: row[key + 'ServiceDesc'], - eligibility: nil, - email: nil, - fees: row[key + 'Fee'], - funding_sources: nil, - application_process: nil, - interpretation_sources: nil, - keywords: nil, - languages: nil, - name: row[key+ 'ServiceName'], - required_documents: nil, - service_areas: nil, - status: 'active', - wait_time: nil, - website: row[key + 'URL'], - taxonomy_ids: taxonomy_array.join(',') - } - end - - def add_subcategory_id(array, categories_from_db, category_int, column) - subcats_from_db = categories_from_db['second_level'][category_int]['third_level'] - if (!column.nil?) - subcats = column.split(', ') - subcats.each do |subcat| - subcats_from_db.each do |subcat_from_db| - if subcat === subcat_from_db["@title"] - array.push(subcat_from_db["@id"]) - end - end - end - end - end - - def add_other_tax_id(array, data_from_db, column) - focus = column.split(', ') - focus.each do |type| - data_from_db['second_level'].each do |d| - if type === d["@title"] - array.push(d["@id"]) - end - if type === "NOT TARGETED" - array.push("104") - end - end - end - end - - def assign_taxonomies(row) - taxonomy_id_array = [] - file = File.read("/tmp/ohana-api/data/oe.json") - json = JSON.parse(file) - - # CATEGORIES - categories_from_db = json['taxonomy']['top_level'][0] - categories = row['S1Categories'].split(',').map(&:strip) - categories.each do |category| - case category - when 'Financial Management' - taxonomy_id_array.push("101-01") - add_subcategory_id(taxonomy_id_array, categories_from_db, 0, row['S1FinanceSub']) - when 'Capital' - taxonomy_id_array.push("101-02") - add_subcategory_id(taxonomy_id_array, categories_from_db, 1, row['S1CapitalSub']) - when 'Legal Services' - taxonomy_id_array.push("101-03") - add_subcategory_id(taxonomy_id_array, categories_from_db, 2, row['S1LegalSub']) - when 'Marketing/Sales' - taxonomy_id_array.push("101-04") - add_subcategory_id(taxonomy_id_array, categories_from_db, 3, row['S1MarketingSub']) - when 'Networking' - taxonomy_id_array.push("101-05") - add_subcategory_id(taxonomy_id_array, categories_from_db, 4, row['S1NetworkingSub']) - when 'Manufacturing/Logistics' - taxonomy_id_array.push("101-06") - add_subcategory_id(taxonomy_id_array, categories_from_db, 5, row['S1ManufacturingSub']) - when 'Procurement' - taxonomy_id_array.push("101-07") - add_subcategory_id(taxonomy_id_array, categories_from_db, 6, row['S1ProcurementSub']) - when 'Planning/Management' - taxonomy_id_array.push("101-08") - add_subcategory_id(taxonomy_id_array, categories_from_db, 7, row['S1PlanningSub']) - when 'R&D/Commercialization' - taxonomy_id_array.push("101-09") - add_subcategory_id(taxonomy_id_array, categories_from_db, 8, row['S1RDSub']) - when 'Regulatory Compliance' - taxonomy_id_array.push("101-10") - add_subcategory_id(taxonomy_id_array, categories_from_db, 9, row['S1RegulatorySub']) - when 'Physical Space' - taxonomy_id_array.push("101-11") - add_subcategory_id(taxonomy_id_array, categories_from_db, 10, row['S1SpaceSub']) - when 'Mentoring/Counseling' - taxonomy_id_array.push("101-12") - add_subcategory_id(taxonomy_id_array, categories_from_db, 11, row['S1MentoringSub']) - when 'Human Resources & Workforce Development' - taxonomy_id_array.push("101-13") - add_subcategory_id(taxonomy_id_array, categories_from_db, 12, row['S1HRSub']) - end - end - # BUSINESS TYPES - if !(row['S1Type'].nil?) - business_types_from_db = json['taxonomy']['top_level'][1] - add_other_tax_id(taxonomy_id_array, business_types_from_db, row['S1Type']) - end - # BUSINESS STAGES - if !(row['S1Stage'].nil?) - business_stages_from_db = json['taxonomy']['top_level'][2] - add_other_tax_id(taxonomy_id_array, business_stages_from_db, row['S1Stage']) - end - # UNDERSERVED COMMUNITIES - if !(row['S1Community'].nil?) - communities_from_db = json['taxonomy']['top_level'][3] - add_other_tax_id(taxonomy_id_array, communities_from_db, row['S1Community']) - end - # INDUSTRIES - if !(row['S1Industry'].nil?) - industries_from_db = json['taxonomy']['top_level'][4] - add_other_tax_id(taxonomy_id_array, industries_from_db, row['S1Industry']) - end - taxonomy_id_array - end -end diff --git a/lib/tasks/create_fake_org_categories.rake b/lib/tasks/create_fake_org_categories.rake deleted file mode 100644 index 2f3aae8c9..000000000 --- a/lib/tasks/create_fake_org_categories.rake +++ /dev/null @@ -1,3 +0,0 @@ -task create_fake_org_categories: :environment do - CreateFakeOrgCategories.new.create_orgs() -end diff --git a/lib/tasks/create_real_addresses_for_fake_orgs.rake b/lib/tasks/create_real_addresses_for_fake_orgs.rake deleted file mode 100644 index b82bfb98f..000000000 --- a/lib/tasks/create_real_addresses_for_fake_orgs.rake +++ /dev/null @@ -1,3 +0,0 @@ -task create_real_addresses_for_fake_orgs: :environment do - CreateRealAddressesForFakeOrgs.new.magic() -end diff --git a/lib/tasks/parse_data_to_csvs.rake b/lib/tasks/parse_data_to_csvs.rake deleted file mode 100644 index 54b672403..000000000 --- a/lib/tasks/parse_data_to_csvs.rake +++ /dev/null @@ -1,3 +0,0 @@ -task parse_data_to_csvs: :environment do - ParseDataToCsvs.new.parse_csv() -end