Skip to content

Bundle Data

Bundle Data #2238

Workflow file for this run

name: Bundle Data
on:
workflow_dispatch:
schedule:
- cron: '0 */6 * * *'
concurrency: ci-bundle-${{ github.ref }}
permissions:
contents: write
jobs:
bundle-data:
name: Bundle Data
runs-on: ubuntu-latest
if: github.repository == 'pypi-data/pypi-json-data'
env:
sqlite_version: "3440000"
steps:
- name: Install tools
shell: bash
run: |
set -euo pipefail
IFS=$'\n\t'
sudo apt install -y jq sqlite3 curl tar pv wget build-essential libdeflate-tools
- name: Cache Sqlite
id: cache-sqlite
uses: actions/cache@v3
with:
path: /usr/local/bin/sqlite3
key: ${{ runner.os }}-sqlite-${{ env.sqlite_version }}
- name: Setup sqlite
if: ${{ steps.cache-sqlite.outputs.cache-hit != 'true' }}
run: |
wget https://www.sqlite.org/2023/sqlite-autoconf-${{ env.sqlite_version }}.tar.gz -O sqlite.tar.gz
mkdir sqlite/
tar xf sqlite.tar.gz --strip-components=1 --directory=sqlite/
cd sqlite/
./configure
make
sudo make install
- name: Create
shell: bash
run: |
curl -L -sS https://github.com/pypi-data/pypi-json-data/archive/${{ github.sha }}.tar.gz |
tar -xOzf - --wildcards '*/release_data/*/*.json' |
jq -rc 'values[]' | pv -c -f -i5 -l | gzip -1 > all_data.txt.gz;
- run: |
mkdir scripts/
wget -nv https://raw.githubusercontent.com/pypi-data/pypi-json-data/${{ github.sha }}/scripts/schema.sql -O scripts/schema.sql
wget -nv https://raw.githubusercontent.com/pypi-data/pypi-json-data/${{ github.sha }}/scripts/create_sqlite.sh
chmod +x create_sqlite.sh
./create_sqlite.sh
- name: Get current time
uses: josStorer/get-current-time@v2
id: current-time
with:
format: YYYY-MM-DD
utcOffset: "+00:00"
- name: Publish
uses: softprops/action-gh-release@v1
with:
draft: false
name: "Latest Release"
tag_name: "latest"
target_commitish: "empty"
files: 'pypi-data.sqlite.gz'
body: |
Automated bundling of release data into a sqlite database.
Updated on ${{ steps.current-time.outputs.formattedTime }}
See the readme for more information.