Skip to content

Commit

Permalink
cronjob for user report scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
rishisankar committed Dec 24, 2020
1 parent 8eccd2e commit cb1bdcf
Show file tree
Hide file tree
Showing 8 changed files with 117 additions and 89 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# Django #
*.log
log/
*.pot
*.pyc
__pycache__
Expand Down
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ WORKDIR /code
COPY requirements.txt /code/
RUN pip install -r requirements.txt
COPY . /code/
RUN chmod +x /code/setup.sh
RUN ./setup.sh
RUN chmod +x /code/start.sh
88 changes: 88 additions & 0 deletions knovigo/places/report_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# merge this with scraper.py later

import urllib.request
import csv
from datetime import datetime
from io import StringIO
import pytz
from dateutil import parser
import os

from .models import UserReport

def scrape_user_report_data():
if len(UserReport.objects.filter(from_google_form=True)) == 0:
timefilter = parser.parse("01/01/2000 00:00:00 -0800")
else:
timefilter = UserReport.objects.filter(from_google_form=True).order_by("-created").first().created

with urllib.request.urlopen('https://docs.google.com/spreadsheets/d/1fJ4hGyMX1wqMs6G9tA5wcogKWj29QY73iQ-r5SG74V4/gviz/tq?tqx=out:csv') as f:
raw_csv = f.read().decode('utf-8')
file = StringIO(raw_csv)
reader = csv.reader(file,delimiter=',')
# skip header row
next(reader)
percents_dict = {'0 - 25%': 0, '25 - 50%': 1, '50 - 75%': 2, '75 - 100%': 3}

count = 0

for row in reader:
timestamp = parser.parse(row[0] + " -0800")
if timestamp <= timefilter:
continue
daystring = timestamp.strftime("%m/%d/%Y ")
place = row[1]
start = parser.parse(daystring + row[2] + " -0800")
end = parser.parse(daystring + row[3] + " -0800")
social_distancing = percents_dict[row[4]]
mask_wearing = percents_dict[row[5]]
crowded = percents_dict[row[6]]
covid_notes = row[7]
covid_protocol = int(row[8])
other_comments = row[9]

if row[11] == '':
# before this data was collected
masks_required_checkbox = 2
staff_masks_checkbox = 2
plexiglass_checkbox = 2
line_outside_checkbox = 2
capacity_checkbox = 2
takeout_checkbox = 2
dine_in_checkbox = 2
outdoor_seating_checkbox = 2
social_distancing_checkbox = 2
bathroom_checkbox = 2
wifi_checkbox = 2
outlets_checkbox = 2
else:
masks_required_checkbox = 1 if "Masks required" in row[11] else 0
staff_masks_checkbox = 1 if "Staff wears masks" in row[11] else 0
plexiglass_checkbox = 1 if "Plexiglass at cashier" in row[11] else 0
line_outside_checkbox = 1 if "Line outside" in row[11] else 0
capacity_checkbox = 1 if "Limited capacity" in row[11] else 0
takeout_checkbox = 1 if "Takeout" in row[11] else 0
dine_in_checkbox = 1 if "Dine-in" in row[11] else 0
outdoor_seating_checkbox = 1 if "Outdoor seating" in row[11] else 0
social_distancing_checkbox = 1 if "Social distancing enforced" in row[11] else 0
bathroom_checkbox = 2
wifi_checkbox = 2
outlets_checkbox = 2

UserReport.objects.create(user_id=None, place_id=None, geohash_id=None, from_google_form=True,
created=timestamp, start=start, end=end, density_rating=crowded, social_distancing_rating=social_distancing,
mask_rating=mask_wearing, covid_rating=covid_protocol, masks_required_checkbox=masks_required_checkbox,
staff_masks_checkbox=staff_masks_checkbox, plexiglass_checkbox=plexiglass_checkbox, line_outside_checkbox=line_outside_checkbox,
capacity_checkbox=capacity_checkbox, takeout_checkbox=takeout_checkbox, dine_in_checkbox=dine_in_checkbox,
outdoor_seating_checkbox=outdoor_seating_checkbox, social_distancing_checkbox=social_distancing_checkbox,
bathroom_checkbox=bathroom_checkbox, wifi_checkbox=wifi_checkbox, outlets_checkbox=outlets_checkbox)

count += 1

print(datetime.now().strftime("[%m/%d/%Y %H:%M:%S] ") + "Collected " + str(count) + " new user reports from Google Forms.")

return count




90 changes: 3 additions & 87 deletions knovigo/places/views.py
Original file line number Diff line number Diff line change
@@ -1,92 +1,8 @@
from django.shortcuts import render
from django.http import JsonResponse

# Create your views here.


## user report stuff (temporary, move to scraper.py once cronjobs are setup)

import urllib.request
import csv
from datetime import datetime
from io import StringIO
import pytz
from dateutil import parser

from .models import UserReport

# manually trigger user report scrape (can probably delete later)
from .report_scraper import scrape_user_report_data
def get_user_report_data(request):
if len(UserReport.objects.filter(from_google_form=True)) == 0:
timefilter = parser.parse("01/01/2000 00:00:00 -0800")
else:
timefilter = UserReport.objects.filter(from_google_form=True).order_by("-created").first().created

with urllib.request.urlopen('https://docs.google.com/spreadsheets/d/1fJ4hGyMX1wqMs6G9tA5wcogKWj29QY73iQ-r5SG74V4/gviz/tq?tqx=out:csv') as f:
raw_csv = f.read().decode('utf-8')
file = StringIO(raw_csv)
reader = csv.reader(file,delimiter=',')
# skip header row
next(reader)
percents_dict = {'0 - 25%': 0, '25 - 50%': 1, '50 - 75%': 2, '75 - 100%': 3}

count = 0

for row in reader:
timestamp = parser.parse(row[0] + " -0800")
if timestamp <= timefilter:
continue
daystring = timestamp.strftime("%m/%d/%Y ")
place = row[1]
start = parser.parse(daystring + row[2] + " -0800")
end = parser.parse(daystring + row[3] + " -0800")
social_distancing = percents_dict[row[4]]
mask_wearing = percents_dict[row[5]]
crowded = percents_dict[row[6]]
covid_notes = row[7]
covid_protocol = int(row[8])
other_comments = row[9]

if row[11] == '':
# before this data was collected
masks_required_checkbox = 2
staff_masks_checkbox = 2
plexiglass_checkbox = 2
line_outside_checkbox = 2
capacity_checkbox = 2
takeout_checkbox = 2
dine_in_checkbox = 2
outdoor_seating_checkbox = 2
social_distancing_checkbox = 2
bathroom_checkbox = 2
wifi_checkbox = 2
outlets_checkbox = 2
else:
masks_required_checkbox = 1 if "Masks required" in row[11] else 0
staff_masks_checkbox = 1 if "Staff wears masks" in row[11] else 0
plexiglass_checkbox = 1 if "Plexiglass at cashier" in row[11] else 0
line_outside_checkbox = 1 if "Line outside" in row[11] else 0
capacity_checkbox = 1 if "Limited capacity" in row[11] else 0
takeout_checkbox = 1 if "Takeout" in row[11] else 0
dine_in_checkbox = 1 if "Dine-in" in row[11] else 0
outdoor_seating_checkbox = 1 if "Outdoor seating" in row[11] else 0
social_distancing_checkbox = 1 if "Social distancing enforced" in row[11] else 0
bathroom_checkbox = 2
wifi_checkbox = 2
outlets_checkbox = 2

UserReport.objects.create(user_id=None, place_id=None, geohash_id=None, from_google_form=True,
created=timestamp, start=start, end=end, density_rating=crowded, social_distancing_rating=social_distancing,
mask_rating=mask_wearing, covid_rating=covid_protocol, masks_required_checkbox=masks_required_checkbox,
staff_masks_checkbox=staff_masks_checkbox, plexiglass_checkbox=plexiglass_checkbox, line_outside_checkbox=line_outside_checkbox,
capacity_checkbox=capacity_checkbox, takeout_checkbox=takeout_checkbox, dine_in_checkbox=dine_in_checkbox,
outdoor_seating_checkbox=outdoor_seating_checkbox, social_distancing_checkbox=social_distancing_checkbox,
bathroom_checkbox=bathroom_checkbox, wifi_checkbox=wifi_checkbox, outlets_checkbox=outlets_checkbox)

count += 1

print("[Places scraper] Collected " + str(count) + " new user reports from Google Forms.")
count = scrape_user_report_data()
return JsonResponse({'count': count})




9 changes: 8 additions & 1 deletion knovigo/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"""

from pathlib import Path
import os

# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
Expand All @@ -37,7 +38,13 @@
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'knovigo.places'
'knovigo.places',
'django_crontab'
]

# use https://crontab.guru/ for cronjob syntax
CRONJOBS = [
('0 * * * *', 'knovigo.places.report_scraper.scrape_user_report_data', '>> ' + os.path.join(BASE_DIR,'log/places_cron.log') + ' 2>&1')
]

MIDDLEWARE = [
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ python-dateutil>=2.8.1
python-geohash>=0.8.5
requests
git+https://github.com/m-wrzr/populartimes
django-crontab>=0.7.1
6 changes: 6 additions & 0 deletions setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/sh

apt-get -y update && apt-get -y upgrade
apt-get -y install cron

pip install -r requirements.txt
9 changes: 8 additions & 1 deletion start.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
#!/bin/sh

#pip install -r requirements.txt
#./setup.sh

service cron start
mkdir log 2> /dev/null
rm -f rm -f log/places_cron.log

python manage.py makemigrations places
python manage.py migrate

python manage.py crontab add
python manage.py crontab add

python manage.py runserver 0.0.0.0:8000

0 comments on commit cb1bdcf

Please sign in to comment.