Skip to content

add data and date

add data and date #17

Workflow file for this run

name: All-In-One Workflow
on:
push:
branches: [ main, github-actions]
schedule:
- cron: '0 0 * * *' # Runs every day at midnight (UTC)
env:
DB_FILENAME: "production.db"
CREATE_DB: true
DATA_PATH: './data'
CURRENT_SEASON: 2023
jobs:
setup:
runs-on: ubuntu-latest
services:
selenium:
image: selenium/standalone-firefox
env:
SE_NODE_MAX_SESSIONS: 3
options: >-
--shm-size="2g"
ports:
- 4444:4444
- 7900:7900
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 1
clean: true
- name: Download database
id: download_artifact
uses: actions/download-artifact@v4
with:
name: $DB_FILENAME
path: $DB_FILENAME
- name: Create DB
if: ${{ failure() && steps.download_artifact.conclusion == 'failure' }}
run: |
pip install -r requirements.txt
python scripts/create_database.py --filename $DB_FILENAME
- name: Run Selenium crawler script
id: crawler
if: ${{ always() }}
# if: ${{ hashFiles('$DB_FILENAME') != '' }}
run: |
pip install -r requirements.txt
mkdir -p data
echo $(date -d "2 days ago" +"%Y-%m-%d")
python scripts/crawl_concurrently.py --path DATA_PATH --season $CURRENT_SEASON --server http://localhost:4444 --data 2024-05-03
python scripts/insert_data.py -db $DB_FILENAME --data $(ls $DATA_PATH/*.json)
- name: Upload database as artifact
if: ${{ steps.crawler.conclusion == 'success' }}
uses: actions/upload-artifact@v4
with:
name: $DB_FILENAME
path: $DB_FILENAME
overwrite: true