diff --git a/.gitignore b/.gitignore index 68bc17f9..22124d82 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,8 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + + +# Folder where training outputs are stored +bertmesh_outs/ +wandb/ diff --git a/README.md b/README.md index 3c624990..7535dde7 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # Grants Tagger Light ๐Ÿ”– -Light weight repository for grant tagger model deployment and inference. +Lightweight repository for grant tagger model deployment and inference. Adapted from [the original repository](https://github.com/wellcometrust/grants_tagger) Grants tagger is a machine learning powered tool that -assigns biomedically related tags to grants proposals. +assigns biomedical-related tags to grant proposals. Those tags can be custom to the organisation or based upon a preexisting ontology like MeSH. @@ -12,16 +12,16 @@ Wellcome Trust for internal use but both the models and the code will be made available in a reusable manner. This work started as a means to automate the tags of one -funding division within Wellcome but currently it has expanded +funding division within Wellcome, but currently it has expanded into the development and automation of a complete set of tags that can cover past and future directions for the organisation. Science tags refer to the custom tags for the Science funding -division. These tags are higly specific to the research Wellcome -funds so it is not advisable to use them. +division. These tags are highly specific to the research Wellcome +funds, so it is not advisable to use them. MeSH tags are subset of tags from the MeSH ontology that aim to -tags grants according to: +tag grants according to: - diseases - themes of research Those tags are generic enough to be used by other biomedical funders @@ -41,82 +41,150 @@ For GPU-support: `poetry install --with gpu` For training the model, we recommend installing the version of this package with GPU support. -For infenrece, CPU-support should suffice. +For inference, CPU-support should suffice. ## 2. Activate the environment `poetry shell` You now have access to the `grants-tagger` command line interface! +## OPTIONAL: 3. Install MantisNLP `remote` to connect to a remote AWS instances +`pip install git+https://github.com/ivyleavedtoadflax/remote.py.git` +Then add your instance +`remote config add [instance_name]` +And then connect and attach to your machine with a tunnel +`remote connect -p 1234:localhost:1234 -v` # โŒจ๏ธ Commands -| Commands | | needs dev | -| --------------- | ------------------------------------------------------------ | --------- | -| โš™๏ธ preprocess | preprocess data to use for training | False | -| ๐Ÿ”ฅ train | trains a new model | True | -| ๐Ÿ“ˆ evaluate | evaluate performance of pretrained model | True | -| ๐Ÿ”– predict | predict tags given a grant abstract using a pretrained model | False | -| ๐ŸŽ› tune | tune params and threshold | True | -| โฌ‡๏ธ download | download data from EPMC | False | +| Commands | Description | Needs dev | +|-----------------|--------------------------------------------------------------|-----------| +| ๐Ÿ”ฅ train | preprocesses the data and trains a new model | True | +| โš™ preprocess | (Optional) preprocess and save the data outside training | False | +| ๐Ÿ“ˆ evaluate | evaluate performance of pretrained model | True | +| ๐Ÿ”– predict | predict tags given a grant abstract using a pretrained model | False | +| ๐ŸŽ› tune | tune params and threshold | True | +| โฌ‡ download | download data from EPMC | False | + in square brackets the commands that are not implemented yet -## โš™๏ธ Preprocess +## โš™๏ธPreprocess + +This process is optional to run, since it can be directly managed by the `Train` process. +- If you run it manually, it will store the data in local first, which can help if you need finetune in the future, +rerun, etc. +- If not run it, the `train` step will preprocess and then run, without any extra I/O operations on disk, +which may add latency depending on the infrastructure. + +It requires data in `jsonl` format for parallelization purposes. In `data/raw` you can find `allMesH_2021.jsonl` +already prepared for the preprocessing step. + +If your data is in `json` format, trasnform it to `jsonl` with tools as `jq` or using Python. +You can use an example of `allMeSH_2021.json` conversion to `jsonl` in `scripts/mesh_json_to_jsonl.py`: + +```bash +python scripts/mesh_json_to_jsonl.py --input_path data/raw/allMeSH_2021.json --output_path data/raw/test.jsonl --filter_years 2020,2021 +``` + +Each dataset needs its own preprocessing so the current preprocess works with the `allMeSH_2021.jsonl` one. -Preprocess creates a JSONL datafile with `text`, `tags` and `meta` as keys. -Text and tags are used for training whereas meta can be useful during annotation -or to analyse predictions and performance. Each dataset needs its own -preprocessing so the current preprocess works with the bioasq-mesh one. If you want to use a different dataset see section on bringing your own data under development. -#### bioasq-mesh -``` +### Preprocessing bertmesh - Usage: grants-tagger preprocess bioasq-mesh [OPTIONS] [INPUT_PATH] - [TRAIN_OUTPUT_PATH] - [LABEL_BINARIZER_PATH] - -โ•ญโ”€ Arguments โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ -โ”‚ input_path [INPUT_PATH] path to BioASQ JSON data [default: None] โ”‚ -โ”‚ train_output_path [TRAIN_OUTPUT_PATH] path to JSONL output file that will be generated for the train set [default: None] โ”‚ -โ”‚ label_binarizer_path [LABEL_BINARIZER_PATH] path to pickle file that will contain the label binarizer [default: None] โ”‚ -โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ -โ•ญโ”€ Options โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ -โ”‚ --test-output-path TEXT path to JSONL output file that will be generated for the test set [default: None] โ”‚ -โ”‚ --mesh-tags-path TEXT path to mesh tags to filter [default: None] โ”‚ -โ”‚ --test-split FLOAT split percentage for test data. if None no split. [default: 0.01] โ”‚ -โ”‚ --filter-years TEXT years to keep in form min_year,max_year with both inclusive [default: None] โ”‚ -โ”‚ --config PATH path to config files that defines arguments [default: None] โ”‚ -โ”‚ --n-max INTEGER Maximum limit on the number of datapoints in the set (including training and test) [default: None] โ”‚ -โ”‚ --help Show this message and exit. โ”‚ -โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ +``` + Usage: grants-tagger preprocess mesh [OPTIONS] DATA_PATH SAVE_TO_PATH + MODEL_KEY + +โ•ญโ”€ Arguments โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ +โ”‚ * data_path TEXT Path to mesh.jsonl [default: None] [required] โ”‚ +โ”‚ * save_to_path TEXT Path to save the serialized PyArrow dataset after preprocessing [default: None] [required] โ”‚ +โ”‚ * model_key TEXT Key to use when loading tokenizer and label2id. Leave blank if training from scratch [default: None] [required] โ”‚ +โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ +โ•ญโ”€ Options โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ +โ”‚ --test-size FLOAT Fraction of data to use for testing in (0,1] or number of rows [default: None] โ”‚ +โ”‚ --num-proc INTEGER Number of processes to use for preprocessing [default: 8] โ”‚ +โ”‚ --max-samples INTEGER Maximum number of samples to use for preprocessing [default: -1] โ”‚ +โ”‚ --batch-size INTEGER Size of the preprocessing batch [default: 256] โ”‚ +โ”‚ --tags TEXT Comma-separated tags you want to include in the dataset (the rest will be discarded) [default: None] โ”‚ +โ”‚ --train-years TEXT Comma-separated years you want to include in the training dataset [default: None] โ”‚ +โ”‚ --test-years TEXT Comma-separated years you want to include in the test dataset [default: None] โ”‚ +โ”‚ --help Show this message and exit. โ”‚ +โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ ``` ## ๐Ÿ”ฅ Train -Train acts as the entry point command for training all models. Currently we only support -the BertMesh model. The command will train a model and save it to the specified path. +The command will train a model and save it to the specified path. Currently we support on BertMesh. ### bertmesh ``` - - Usage: grants-tagger train bertmesh [OPTIONS] MODEL_KEY DATA_PATH - MODEL_SAVE_PATH - -โ•ญโ”€ Arguments โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ -โ”‚ * model_key TEXT Pretrained model key. Local path or HF location [default: None] [required] โ”‚ -โ”‚ * data_path TEXT Path to data in jsonl format. Must contain text and tags field [default: None] [required] โ”‚ -โ”‚ * model_save_path TEXT Path to save model to [default: None] [required] โ”‚ -โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ -โ•ญโ”€ Options โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ -โ”‚ --help Show this message and exit. โ”‚ -โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ - + Usage: grants-tagger train bertmesh [OPTIONS] MODEL_KEY DATA_PATH + +โ•ญโ”€ Arguments โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ +โ”‚ * model_key TEXT Pretrained model key. Local path or HF location [default: None] [required] โ”‚ +โ”‚ * data_path TEXT Path to allMeSH_2021.jsonl (or similar) or to a folder after preprocessing and saving to disk [default: None] [required] โ”‚ +โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ +โ•ญโ”€ Options โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ +โ”‚ --test-size FLOAT Fraction of data to use for testing (0,1] or number of rows [default: None] โ”‚ +โ”‚ --num-proc INTEGER Number of processes to use for preprocessing [default: 8] โ”‚ +โ”‚ --max-samples INTEGER Maximum number of samples to use from the json [default: -1] โ”‚ +โ”‚ --shards INTEGER Number os shards to divide training IterativeDataset to (improves performance) [default: 8] โ”‚ +โ”‚ --from-checkpoint TEXT Name of the checkpoint to resume training [default: None] โ”‚ +โ”‚ --tags TEXT Comma-separated tags you want to include in the dataset (the rest will be discarded) [default: None] โ”‚ +โ”‚ --train-years TEXT Comma-separated years you want to include in the training dataset [default: None] โ”‚ +โ”‚ --test-years TEXT Comma-separated years you want to include in the test dataset [default: None] โ”‚ +โ”‚ --help Show this message and exit. โ”‚ +โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ ``` +#### About `model_key` +`model_key` possible values are: +- A HF location for a pretrained / finetuned model +- "" to load a model by default and train from scratch (`microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract`) + +#### About `sharding` +`sharding` was proposed by [Hugging Face](https://github.com/huggingface/datasets/issues/2252#issuecomment-825596467) +to improve performance on big datasets. To enable it: +- set shards to something bigger than 1 (Recommended: same number as cpu cores) + +#### Other arguments +Besides those arguments, feel free to add any other TrainingArgument from Hugging Face or Wand DB. +This is the example used to train reaching a ~0.6 F1, also available at `examples/train_by_epochs.sh` +```commandline +grants-tagger train bertmesh \ + "" \ + [YOUR_PREPROCESSED_FOLDER] \ + --output_dir [YOUR_OUTPUT_FOLDER] \ + --per_device_train_batch_size 16 \ + --per_device_eval_batch_size 1 \ + --multilabel_attention True \ + --freeze_backbone unfreeze \ + --num_train_epochs 7 \ + --learning_rate 5e-5 \ + --dropout 0.1 \ + --hidden_size 1024 \ + --warmup_steps 5000 \ + --max_grad_norm 2.0 \ + --scheduler_type cosine_hard_restart \ + --weight_decay 0.2 \ + --correct_bias True \ + --threshold 0.25 \ + --prune_labels_in_evaluation True \ + --hidden_dropout_prob 0.2 \ + --attention_probs_dropout_prob 0.2 \ + --fp16 \ + --torch_compile \ + --evaluation_strategy epoch \ + --eval_accumulation_steps 20 \ + --save_strategy epoch \ + --wandb_project wellcome-mesh \ + --wandb_name test-train-all \ + --wandb_api_key ${WANDB_API_KEY} +``` ## ๐Ÿ“ˆ Evaluate @@ -137,7 +205,6 @@ not made into production this is the way to evaluate. The plan is to extend evaluate to all models when train starts training explicit model approaches. ``` - Usage: grants-tagger evaluate model [OPTIONS] MODEL_PATH DATA_PATH โ•ญโ”€ Arguments โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ @@ -157,7 +224,6 @@ evaluate to all models when train starts training explicit model approaches. ### grants Evaluate an xlinear model on grants data. ``` - Usage: grants-tagger evaluate grants [OPTIONS] MODEL_PATH DATA_PATH LABEL_BINARIZER_PATH @@ -182,7 +248,6 @@ Predict assigns tags on a given abstract text that you can pass as argument. ``` - Usage: grants-tagger predict [OPTIONS] TEXT MODEL_PATH โ•ญโ”€ Arguments โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ @@ -195,7 +260,6 @@ Predict assigns tags on a given abstract text that you can pass as argument. โ”‚ --threshold FLOAT [default: 0.5] โ”‚ โ”‚ --help Show this message and exit. โ”‚ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ - ``` ## ๐ŸŽ› Tune @@ -203,7 +267,6 @@ Optimise the threshold used for tag decisions. ### threshold ``` - Usage: grants-tagger tune threshold [OPTIONS] DATA_PATH MODEL_PATH LABEL_BINARIZER_PATH THRESHOLDS_PATH @@ -224,12 +287,14 @@ Optimise the threshold used for tag decisions. ## โฌ‡๏ธ Download -This commands enables you to download mesh data from EPMC +The project has references to `dvc` big files. You can just do `dvc pull` and retrieve those, +including `allMeSH_2021.json` and `allMeSH_2021.jsonl` to train `bertmesh`. + +Also, this commands enables you to download mesh data from EPMC ### epmc-mesh ``` - Usage: grants-tagger download epmc-mesh [OPTIONS] DOWNLOAD_PATH โ•ญโ”€ Arguments โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ @@ -248,12 +313,18 @@ Install development dependencies via: ## ๐Ÿ“‹ Env variables +Variable | Required for | Description +--------------------- |--------------| ---------- +WANDB_API_KEY | train | key to dump the results to Weights&Biases +AWS_ACCESS_KEY_ID | train | access key to pull data from dvc on S3 +AWS_SECRET_ACCESS_KEY | train | secret key to pull data from dvc on S3 + If you want to participate to BIOASQ competition you need to set some variables. Variable | Required for | Description --------------------- | ------------------ | ---------- BIOASQ_USERNAME | bioasq | username with which registered in BioASQ -BIOASQ_PASSWORD | bioasq | password --//-- +BIOASQ_PASSWORD | bioasq | password If you use [direnv](https://direnv.net) then you can use it to populate your `.envrc` which will export the variables automatically, otherwise @@ -294,6 +365,9 @@ and you would be able to run `grants_tagger preprocess epmc_mesh ...` ## ๐Ÿšฆ Test +To run the test you need to have installed the `dev` dependencies first. +This is done by running `poetry install --with dev` after you are in the sell (`poetry shell`) + Run tests with `pytest`. If you want to write some additional tests, they should go in the subfolde `tests/` diff --git a/data/grants_comparison/.gitignore b/data/grants_comparison/.gitignore index 1357bfdf..1fd228cc 100644 --- a/data/grants_comparison/.gitignore +++ b/data/grants_comparison/.gitignore @@ -1,2 +1,2 @@ -/comparison.csv /meshterms_list.txt +/comparison.csv diff --git a/data/grants_comparison/comparison.csv.dvc b/data/grants_comparison/comparison.csv.dvc deleted file mode 100644 index 96a38344..00000000 --- a/data/grants_comparison/comparison.csv.dvc +++ /dev/null @@ -1,4 +0,0 @@ -outs: -- md5: a445d8b6396364ef601638e57d6c9a46 - size: 845271 - path: comparison.csv diff --git a/data/grants_comparison/mesh_tree_letters_list.txt b/data/grants_comparison/mesh_tree_letters_list.txt new file mode 100644 index 00000000..270dd6a8 --- /dev/null +++ b/data/grants_comparison/mesh_tree_letters_list.txt @@ -0,0 +1,4 @@ +Information Sources: L +Phenomena and Processes: G +Geographicals: Z +Diseases: C diff --git a/data/raw/.gitignore b/data/raw/.gitignore index e17ccf76..b0254cca 100644 --- a/data/raw/.gitignore +++ b/data/raw/.gitignore @@ -1,3 +1,5 @@ /allMeSH_2021.json +/allMeSH_2021.jsonl /desc2021.xml /disease_tags_validation_grants.xlsx +/active_grants_last_5_years.csv diff --git a/data/raw/active_grants_last_5_years.csv.dvc b/data/raw/active_grants_last_5_years.csv.dvc new file mode 100644 index 00000000..19717091 --- /dev/null +++ b/data/raw/active_grants_last_5_years.csv.dvc @@ -0,0 +1,4 @@ +outs: +- md5: d664be2a9000d44bb0325f364ec20e27 + size: 4953477 + path: active_grants_last_5_years.csv diff --git a/data/raw/allMeSH_2021.jsonl.dvc b/data/raw/allMeSH_2021.jsonl.dvc new file mode 100644 index 00000000..f2f43ce8 --- /dev/null +++ b/data/raw/allMeSH_2021.jsonl.dvc @@ -0,0 +1,4 @@ +outs: +- md5: 94f18c3918b180728a553123edb2ee32 + size: 27914288461 + path: allMeSH_2021.jsonl diff --git a/examples/augment.sh b/examples/augment.sh new file mode 100644 index 00000000..9ad482b1 --- /dev/null +++ b/examples/augment.sh @@ -0,0 +1,3 @@ +grants-tagger augment mesh [FOLDER_AFTER_PREPROCESSING] [SET_YOUR_OUTPUT_FOLDER_HERE] \ + --min-examples 25 \ + --concurrent-calls 25 \ No newline at end of file diff --git a/examples/augment_specific_tags.sh b/examples/augment_specific_tags.sh new file mode 100644 index 00000000..3ce920c8 --- /dev/null +++ b/examples/augment_specific_tags.sh @@ -0,0 +1,5 @@ +# Augments data using a file with 1 label per line and years +grants-tagger augment mesh [FOLDER_AFTER_PREPROCESSING] [SET_YOUR_OUTPUT_FOLDER_HERE] \ + --tags-file-path tags_to_augment.txt \ + --examples 25 \ + --concurrent-calls 25 \ No newline at end of file diff --git a/examples/preprocess_and_train_by_epochs.sh b/examples/preprocess_and_train_by_epochs.sh new file mode 100644 index 00000000..0e2f2f29 --- /dev/null +++ b/examples/preprocess_and_train_by_epochs.sh @@ -0,0 +1,37 @@ +# Run on g5.12xlarge instance + +# Without saving (on-the-fly) +SOURCE="data/raw/allMeSH_2021.jsonl" + +grants-tagger train bertmesh \ + "" \ + $SOURCE \ + --test-size 25000 \ + --train-years 2016,2017,2018,2019 \ + --test-years 2020,2021 \ + --output_dir bertmesh_outs/pipeline_test/ \ + --per_device_train_batch_size 16 \ + --per_device_eval_batch_size 1 \ + --multilabel_attention True \ + --freeze_backbone unfreeze \ + --num_train_epochs 7 \ + --learning_rate 5e-5 \ + --dropout 0.1 \ + --hidden_size 1024 \ + --warmup_steps 5000 \ + --max_grad_norm 2.0 \ + --scheduler_type cosine_hard_restart \ + --weight_decay 0.2 \ + --correct_bias True \ + --threshold 0.25 \ + --prune_labels_in_evaluation True \ + --hidden_dropout_prob 0.2 \ + --attention_probs_dropout_prob 0.2 \ + --fp16 \ + --torch_compile \ + --evaluation_strategy epochs \ + --eval_accumulation_steps 20 \ + --save_strategy epochs \ + --wandb_project wellcome-mesh \ + --wandb_name test-train-all \ + --wandb_api_key ${WANDB_API_KEY} diff --git a/examples/preprocess_and_train_by_steps.sh b/examples/preprocess_and_train_by_steps.sh new file mode 100644 index 00000000..83ec7478 --- /dev/null +++ b/examples/preprocess_and_train_by_steps.sh @@ -0,0 +1,39 @@ +# Run on g5.12xlarge instance + +# Without saving (on-the-fly) +SOURCE="data/raw/allMeSH_2021.jsonl" + +grants-tagger train bertmesh \ + "" \ + $SOURCE \ + --test-size 25000 \ + --train-years 2016,2017,2018,2019 \ + --test-years 2020,2021 \ + --output_dir bertmesh_outs/pipeline_test/ \ + --per_device_train_batch_size 16 \ + --per_device_eval_batch_size 1 \ + --multilabel_attention True \ + --freeze_backbone unfreeze \ + --num_train_epochs 7 \ + --learning_rate 5e-5 \ + --dropout 0.1 \ + --hidden_size 1024 \ + --warmup_steps 5000 \ + --max_grad_norm 2.0 \ + --scheduler_type cosine_hard_restart \ + --weight_decay 0.2 \ + --correct_bias True \ + --threshold 0.25 \ + --prune_labels_in_evaluation True \ + --hidden_dropout_prob 0.2 \ + --attention_probs_dropout_prob 0.2 \ + --fp16 \ + --torch_compile \ + --evaluation_strategy steps \ + --eval_steps 50000 \ + --eval_accumulation_steps 20 \ + --save_strategy steps \ + --save_steps 50000 \ + --wandb_project wellcome-mesh \ + --wandb_name test-train-all \ + --wandb_api_key ${WANDB_API_KEY} diff --git a/examples/preprocess_splitting_by_fract.sh b/examples/preprocess_splitting_by_fract.sh new file mode 100644 index 00000000..526133f2 --- /dev/null +++ b/examples/preprocess_splitting_by_fract.sh @@ -0,0 +1,2 @@ +grants-tagger preprocess mesh data/raw/allMeSH_2021.jsonl [SET_YOUR_OUTPUT_FOLDER_HERE] '' \ + --test-size 0.05 \ No newline at end of file diff --git a/examples/preprocess_splitting_by_rows.sh b/examples/preprocess_splitting_by_rows.sh new file mode 100644 index 00000000..42ba15a4 --- /dev/null +++ b/examples/preprocess_splitting_by_rows.sh @@ -0,0 +1,2 @@ +grants-tagger preprocess mesh data/raw/allMeSH_2021.jsonl [SET_YOUR_OUTPUT_FOLDER_HERE] '' \ + --test-size 25000 \ No newline at end of file diff --git a/examples/preprocess_splitting_by_years.sh b/examples/preprocess_splitting_by_years.sh new file mode 100644 index 00000000..629e74fa --- /dev/null +++ b/examples/preprocess_splitting_by_years.sh @@ -0,0 +1,4 @@ +grants-tagger preprocess mesh data/raw/allMeSH_2021.jsonl [SET_YOUR_OUTPUT_FOLDER_HERE] '' \ + --test-size 25000 \ + --train-years 2016,2017,2018,2019 \ + --test-years 2020,2021 \ No newline at end of file diff --git a/examples/resume_train_by_epoch.sh b/examples/resume_train_by_epoch.sh new file mode 100644 index 00000000..38b520b4 --- /dev/null +++ b/examples/resume_train_by_epoch.sh @@ -0,0 +1,37 @@ +# Run on g5.12xlarge instance + +# After preprocessing +SOURCE="[SET_YOUR_PREPROCESSING_FOLDER_HERE]" + +# Checkpoint +CHECKPOINT="checkpoint-100000" + +grants-tagger train bertmesh \ + bertmesh_outs/pipeline_test/$CHECKPOINT \ + $SOURCE \ + --output_dir bertmesh_outs/pipeline_test/ \ + --per_device_train_batch_size 16 \ + --per_device_eval_batch_size 1 \ + --multilabel_attention True \ + --freeze_backbone unfreeze \ + --num_train_epochs 3 \ + --learning_rate 5e-5 \ + --dropout 0.1 \ + --hidden_size 1024 \ + --warmup_steps 0 \ + --max_grad_norm 2.0 \ + --scheduler_type cosine_hard_restart \ + --weight_decay 0.2 \ + --correct_bias True \ + --threshold 0.25 \ + --prune_labels_in_evaluation True \ + --hidden_dropout_prob 0.2 \ + --attention_probs_dropout_prob 0.2 \ + --fp16 \ + --torch_compile \ + --evaluation_strategy epoch \ + --eval_accumulation_steps 20 \ + --save_strategy epoch \ + --wandb_project wellcome-mesh \ + --wandb_name test-train-all \ + --wandb_api_key ${WANDB_API_KEY} \ No newline at end of file diff --git a/examples/resume_train_by_steps.sh b/examples/resume_train_by_steps.sh new file mode 100644 index 00000000..3c251cf3 --- /dev/null +++ b/examples/resume_train_by_steps.sh @@ -0,0 +1,39 @@ +# Run on g5.12xlarge instance + +# After preprocessing +SOURCE="[SET_YOUR_PREPROCESSING_FOLDER_HERE]" + +# Checkpoint +CHECKPOINT="checkpoint-100000" + +grants-tagger train bertmesh \ + bertmesh_outs/pipeline_test/$CHECKPOINT \ + $SOURCE \ + --output_dir bertmesh_outs/pipeline_test/ \ + --per_device_train_batch_size 16 \ + --per_device_eval_batch_size 1 \ + --multilabel_attention True \ + --freeze_backbone unfreeze \ + --num_train_epochs 3 \ + --learning_rate 5e-5 \ + --dropout 0.1 \ + --hidden_size 1024 \ + --warmup_steps 0 \ + --max_grad_norm 2.0 \ + --scheduler_type cosine_hard_restart \ + --weight_decay 0.2 \ + --correct_bias True \ + --threshold 0.25 \ + --prune_labels_in_evaluation True \ + --hidden_dropout_prob 0.2 \ + --attention_probs_dropout_prob 0.2 \ + --fp16 \ + --torch_compile \ + --evaluation_strategy steps \ + --eval_steps 10000 \ + --eval_accumulation_steps 20 \ + --save_strategy steps \ + --save_steps 10000 \ + --wandb_project wellcome-mesh \ + --wandb_name test-train-all \ + --wandb_api_key ${WANDB_API_KEY} \ No newline at end of file diff --git a/examples/train_by_epochs.sh b/examples/train_by_epochs.sh new file mode 100644 index 00000000..bf06afac --- /dev/null +++ b/examples/train_by_epochs.sh @@ -0,0 +1,34 @@ +# Run on g5.12xlarge instance + +# After preprocessing +SOURCE="[SET_YOUR_PREPROCESSING_FOLDER_HERE]" + +grants-tagger train bertmesh \ + "" \ + $SOURCE \ + --output_dir bertmesh_outs/pipeline_test/ \ + --per_device_train_batch_size 16 \ + --per_device_eval_batch_size 1 \ + --multilabel_attention True \ + --freeze_backbone unfreeze \ + --num_train_epochs 7 \ + --learning_rate 5e-5 \ + --dropout 0.1 \ + --hidden_size 1024 \ + --warmup_steps 5000 \ + --max_grad_norm 2.0 \ + --scheduler_type cosine_hard_restart \ + --weight_decay 0.2 \ + --correct_bias True \ + --threshold 0.25 \ + --prune_labels_in_evaluation True \ + --hidden_dropout_prob 0.2 \ + --attention_probs_dropout_prob 0.2 \ + --fp16 \ + --torch_compile \ + --evaluation_strategy epoch \ + --eval_accumulation_steps 20 \ + --save_strategy epoch \ + --wandb_project wellcome-mesh \ + --wandb_name test-train-all \ + --wandb_api_key ${WANDB_API_KEY} diff --git a/examples/train_by_steps.sh b/examples/train_by_steps.sh new file mode 100644 index 00000000..67cabcd6 --- /dev/null +++ b/examples/train_by_steps.sh @@ -0,0 +1,36 @@ +# Run on g5.12xlarge instance + +# After preprocessing +SOURCE="[SET_YOUR_PREPROCESSING_FOLDER_HERE]" + +grants-tagger train bertmesh \ + "" \ + $SOURCE \ + --output_dir bertmesh_outs/pipeline_test/ \ + --per_device_train_batch_size 16 \ + --per_device_eval_batch_size 1 \ + --multilabel_attention True \ + --freeze_backbone unfreeze \ + --num_train_epochs 7 \ + --learning_rate 5e-5 \ + --dropout 0.1 \ + --hidden_size 1024 \ + --warmup_steps 5000 \ + --max_grad_norm 2.0 \ + --scheduler_type cosine_hard_restart \ + --weight_decay 0.2 \ + --correct_bias True \ + --threshold 0.25 \ + --prune_labels_in_evaluation True \ + --hidden_dropout_prob 0.2 \ + --attention_probs_dropout_prob 0.2 \ + --fp16 \ + --torch_compile \ + --evaluation_strategy steps \ + --eval_steps 10000 \ + --eval_accumulation_steps 20 \ + --save_strategy steps \ + --save_steps 10000 \ + --wandb_project wellcome-mesh \ + --wandb_name test-train-all \ + --wandb_api_key ${WANDB_API_KEY} diff --git a/grants_tagger_light/augmentation/JsonParser.py b/grants_tagger_light/augmentation/JsonParser.py new file mode 100644 index 00000000..fe9a2e97 --- /dev/null +++ b/grants_tagger_light/augmentation/JsonParser.py @@ -0,0 +1,67 @@ +""" +From langchain: https://raw.githubusercontent.com/langchain-ai/langchain/master/libs/langchain/langchain/output_parsers/json.py +""" + +import json +import re + + +class JsonParser: + def __init(self): + """Class to parse json produced by LLMs. Inspiration taken from langchain. + It fixes quotes, it escapes separators, etc.""" + pass + + @staticmethod + def _replace_new_line(match: re.Match[str]) -> str: + value = match.group(2) + value = re.sub(r"\n", r"\\n", value) + value = re.sub(r"\r", r"\\r", value) + value = re.sub(r"\t", r"\\t", value) + value = re.sub('"', r"\"", value) + + return match.group(1) + value + match.group(3) + + @staticmethod + def _custom_parser(multiline_string: str) -> str: + """ + The LLM response for `action_input` may be a multiline + string containing unescaped newlines, tabs or quotes. This function + replaces those characters with their escaped counterparts. + (newlines in JSON must be double-escaped: `\\n`) + """ + if isinstance(multiline_string, (bytes, bytearray)): + multiline_string = multiline_string.decode() + + multiline_string = re.sub( + r'("action_input"\:\s*")(.*)(")', + JsonParser._replace_new_line, + multiline_string, + flags=re.DOTALL, + ) + + return multiline_string + + @staticmethod + def parse_json(json_string: str) -> dict: + """ + Parse a JSON string from LLM response + + Args: + json_string: The Markdown string. + + Returns: + The parsed JSON object as a Python dictionary. + """ + json_str = json_string + + # Strip whitespace and newlines from the start and end + json_str = json_str.strip() + + # handle newlines and other special characters inside the returned value + json_str = JsonParser._custom_parser(json_str) + + # Parse the JSON string into a Python dictionary + parsed = json.loads(json_str) + + return parsed diff --git a/grants_tagger_light/augmentation/__init__.py b/grants_tagger_light/augmentation/__init__.py new file mode 100644 index 00000000..81ed5b19 --- /dev/null +++ b/grants_tagger_light/augmentation/__init__.py @@ -0,0 +1,8 @@ +import typer +from .augment import augment_cli + +augment_app = typer.Typer() +augment_app.command( + "mesh", + context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, +)(augment_cli) diff --git a/grants_tagger_light/augmentation/augment.py b/grants_tagger_light/augmentation/augment.py new file mode 100644 index 00000000..05a38cda --- /dev/null +++ b/grants_tagger_light/augmentation/augment.py @@ -0,0 +1,232 @@ +import json +import multiprocessing +import os + +import typer +from loguru import logger +import numpy as np + + +from grants_tagger_light.augmentation.augment_openai import AugmentOpenAI + +from datasets import load_from_disk + +from grants_tagger_light.augmentation.parallel_augment_openai import ( + ParallelAugmentOpenAI, +) + +augment_app = typer.Typer() + + +def _count_elements_in_sublist(sublist): + element_count = {} + for element in sublist: + if element in element_count: + element_count[element] += 1 + else: + element_count[element] = 1 + return element_count + + +def _merge_dicts(dict_list): + merged_dict = {} + for d in dict_list: + for key, value in d.items(): + if key in merged_dict: + merged_dict[key] += value + else: + merged_dict[key] = value + return merged_dict + + +def augment( + data_path: str, + save_to_path: str, + model_key: str = "gpt-3.5-turbo", + num_proc: int = os.cpu_count(), + batch_size: int = 64, + min_examples: int = None, + examples: int = 25, + prompt_template: str = "grants_tagger_light/augmentation/prompt.template", + concurrent_calls: int = os.cpu_count() * 2, + temperature: float = 1.5, + tags_file_path: str = None, +): + if model_key.strip().lower() not in ["gpt-3.5-turbo", "text-davinci", "gpt-4"]: + raise NotImplementedError( + f"{model_key} not implemented as an augmentation framework" + ) + + dset = load_from_disk(os.path.join(data_path, "dataset")) + if "train" in dset: + dset = dset["train"] + logger.info("Obtaining count values from the labels...") + pool = multiprocessing.Pool(processes=num_proc) + element_counts_list = pool.map(_count_elements_in_sublist, dset["meshMajor"]) + pool.close() + pool.join() + + merged_element_counts = _merge_dicts(element_counts_list) + sorted_merged_element_counts = sorted( + merged_element_counts.items(), key=lambda x: x[1], reverse=True + ) + sorted_merged_element_counts_dict = dict(sorted_merged_element_counts) + if tags_file_path is not None: + with open(tags_file_path, "r") as f: + tags = f.read().split("\n") + logger.info( + f"Tags file path found. Filtering {len(tags)} tags " + f"(examples found: {tags[:15]}...)" + ) + sorted_merged_element_counts_dict = { + k: v for k, v in sorted_merged_element_counts_dict.items() if k in tags + } + + if min_examples is not None: + sorted_merged_element_counts_dict = { + k: v + for k, v in sorted_merged_element_counts_dict.items() + if v < min_examples + } + + with open(f"{save_to_path}.count", "w") as f: + f.write(json.dumps(sorted_merged_element_counts_dict, indent=2)) + + tags_to_augment = list(sorted_merged_element_counts_dict.keys()) + + biggest_tags_to_augment = [ + f"{k}({sorted_merged_element_counts_dict[k]})" for k in tags_to_augment[:5] + ] + smallest_tags_to_augment = [ + f"{k}({sorted_merged_element_counts_dict[k]})" for k in tags_to_augment[-5:] + ] + + logger.info( + f"Augmenting a total of {len(tags_to_augment)} tags, " + f"from {biggest_tags_to_augment} to {smallest_tags_to_augment}" + ) + + logger.info("Collecting existing examples of those tags to send in the prompt") + + dset = dset.filter( + lambda x: any(np.isin(tags_to_augment, x["meshMajor"])), num_proc=num_proc + ) + + dset = dset.map( + lambda _, y: {"idx": y}, + with_indices=True, + batched=True, + batch_size=batch_size, + desc="Creating idx", + num_proc=num_proc, + ) + + if concurrent_calls == 1: + openai = AugmentOpenAI + else: + openai = ParallelAugmentOpenAI + + collect_concurrent_calls = [] + + for t in tags_to_augment: + if len(collect_concurrent_calls) >= concurrent_calls: + openai(prompt_template_path=prompt_template, model_key=model_key).generate( + collect_concurrent_calls, + dset, + save_to_path, + model_key, + temperature=temperature, + num_proc=num_proc, + ) + collect_concurrent_calls = [] + else: + collect_concurrent_calls.append((t, examples)) + + # Remaining rows of the last batch + if len(collect_concurrent_calls) > 0: + openai(prompt_template_path=prompt_template, model_key=model_key).generate( + collect_concurrent_calls, + dset, + save_to_path, + model_key, + temperature=temperature, + num_proc=num_proc, + ) + + +@augment_app.command() +def augment_cli( + data_path: str = typer.Argument(..., help="Path to mesh.jsonl"), + save_to_path: str = typer.Argument( + ..., help="Path to save the serialized PyArrow dataset after preprocessing" + ), + model_key: str = typer.Option( + "gpt-3.5-turbo", + help="LLM to use data augmentation. By now, only `openai` is supported", + ), + num_proc: int = typer.Option( + os.cpu_count(), help="Number of processes to use for data augmentation" + ), + batch_size: int = typer.Option( + 64, help="Preprocessing batch size (for dataset, filter, map, ...)" + ), + min_examples: int = typer.Option( + None, + help="Minimum number of examples to require. " + "Less than that will trigger data augmentation.", + ), + examples: int = typer.Option(25, help="Examples to generate per each tag."), + prompt_template: str = typer.Option( + "grants_tagger_light/augmentation/prompt.template", + help="File to use as a prompt. " + "Make sure to ask the LLM to return a dict with two fields: " + "`abstract` and `tags`", + ), + concurrent_calls: int = typer.Option( + os.cpu_count() * 2, + min=1, + help="Concurrent calls with 1 tag each to the different model", + ), + temperature: float = typer.Option( + 1.5, + min=0, + max=2, + help="A value between 0 and 2. The bigger - the more creative.", + ), + tags_file_path: str = typer.Option( + None, + help="Text file containing one line per tag to be considered. " + "The rest will be discarded.", + ), +): + if not os.path.isdir(data_path): + logger.error( + "The data path should be a folder with saved data from " + "`preprocessing` step." + ) + exit(-1) + + if tags_file_path is None and min_examples is None: + logger.error( + "To understand which tags need to be augmented, " + "set either --min-examples or --tags-file-path" + ) + exit(-1) + + if float(temperature) > 2.0 or float(temperature) < -2.0: + logger.error("Temperature should be in the range [-2, 2]") + exit(-1) + + augment( + data_path, + save_to_path, + model_key=model_key, + num_proc=num_proc, + batch_size=batch_size, + min_examples=min_examples, + examples=examples, + prompt_template=prompt_template, + concurrent_calls=concurrent_calls, + temperature=temperature, + tags_file_path=tags_file_path, + ) diff --git a/grants_tagger_light/augmentation/augment_openai.py b/grants_tagger_light/augmentation/augment_openai.py new file mode 100644 index 00000000..22c6dcd4 --- /dev/null +++ b/grants_tagger_light/augmentation/augment_openai.py @@ -0,0 +1,183 @@ +import datetime +import json +import math +import os +import uuid + +import openai + +from loguru import logger +import numpy as np + +from grants_tagger_light.augmentation.JsonParser import JsonParser + + +class AugmentOpenAI: + def __init__(self, prompt_template_path, model_key="gpt-3.5-turbo"): + if "OPENAI_API_KEY" not in os.environ: + logger.error( + "OPENAI_API_KEY not found in env vars. " + "Please define it before running this program." + ) + with open(prompt_template_path, "r") as f: + self.prompt_template = f.read() + self.model_key = model_key + + def _create_message(self, abstract, tag): + prompt = self.prompt_template.replace("{TOPIC}", tag) + prompt = prompt.replace("{ABSTRACT}", abstract) + + return [{"role": "user", "content": prompt}] + + @staticmethod + def _parse_response(answer, metadata): + print(json.dumps(answer, indent=2)) + with open(metadata["save_to_path"], "a") as f: + try: + json_response = JsonParser.parse_json(answer) + except Exception as e: + logger.info(f"Error processing output: {e}. Skipping...") + return + + res = { + "journal": metadata["model_key"], + "meshMajor": metadata["tags"], + "year": metadata["year"], + "abstractText": json_response["abstract"] + .replace("'", "") + .replace('"', ""), + "pmid": uuid.uuid4().hex, + "title": json_response["title"].replace("'", "").replace('"', ""), + "existing_example": metadata["existing_example"] + .replace("'", "") + .replace('"', ""), + "required_examples": metadata["required_examples"], + "featured_tag": metadata["featured_tag"], + } + + f.write(json.dumps(res)) + f.write("\n") + f.flush() + + logger.info(f"Data received successfully for {metadata['featured_tag']}") + + @staticmethod + def process_choices(choices, metadata): + for c in choices: + if "message" in c: + if "content" in c["message"]: + AugmentOpenAI._parse_response(c["message"]["content"], metadata) + + @staticmethod + def _process_response(result): + AugmentOpenAI.process_choices(result.choices, result.metadata) + + def _prepare_request( + self, + tag, + missing_num, + dset, + temperature, + top_p, + presence_penalty, + num_proc, + model_key, + save_to_path, + ): + year = datetime.date.today().year + + logger.info(f"Augmenting {tag} with {missing_num} examples") + # RAG: I select similar articles to provide them to the LLM + + tmp_dset = dset.filter( + lambda x: any(np.isin([tag], x["meshMajor"])), num_proc=num_proc + ) + + # abstracts_num = [i for i in range(len(tmp_dset))] + # random.shuffle(abstracts_num) + + required_examples = missing_num + existing_examples = min(required_examples, len(tmp_dset)) + + n_per_example = math.ceil(required_examples / existing_examples) + logger.info( + f"Augmenting {tag} with {required_examples} examples, " + f"using {existing_examples} in RAG mode" + ) + + for i in range(existing_examples): + abstract = tmp_dset["abstractText"][i] + tags = tmp_dset["meshMajor"][i] + data = { + "model": self.model_key, + "n": n_per_example, + "temperature": temperature, + "top_p": top_p, + "presence_penalty": presence_penalty, + "messages": self._create_message(abstract, tag), + } + + metadata = { + "featured_tag": tag, + "tags": tags, + "required_examples": missing_num, + "existing_example": abstract, + "year": year, + "model_key": model_key, + "save_to_path": save_to_path, + } + + yield data, metadata + + def _make_requests( + self, + collect_concurrent_calls, + dset, + temperature, + top_p, + presence_penalty, + num_proc, + model_key, + save_to_path, + ): + for num in range(len(collect_concurrent_calls)): + tag = collect_concurrent_calls[num][0] + missing_num = collect_concurrent_calls[num][1] + + for data, metadata in self._prepare_request( + tag, + missing_num, + dset, + temperature, + top_p, + presence_penalty, + num_proc, + model_key, + save_to_path, + ): + chat_completion = openai.ChatCompletion.create(**data) + chat_completion.metadata = metadata + + self._process_response(chat_completion) + + def generate( + self, + collect_concurrent_calls, + dset, + save_to_path, + model_key, + temperature=1.5, + top_p=1, + presence_penalty=0, + num_proc=os.cpu_count(), + ): + self._make_requests( + collect_concurrent_calls=collect_concurrent_calls, + dset=dset, + temperature=temperature, + top_p=top_p, + presence_penalty=presence_penalty, + num_proc=num_proc, + model_key=model_key, + save_to_path=save_to_path, + ) diff --git a/grants_tagger_light/augmentation/parallel_augment_openai.py b/grants_tagger_light/augmentation/parallel_augment_openai.py new file mode 100644 index 00000000..4d9088d3 --- /dev/null +++ b/grants_tagger_light/augmentation/parallel_augment_openai.py @@ -0,0 +1,79 @@ +import os + +from loguru import logger +from openai_multi_client import OpenAIMultiClient + +from grants_tagger_light.augmentation.augment_openai import AugmentOpenAI + + +class ParallelAugmentOpenAI(AugmentOpenAI): + def __init__(self, prompt_template_path, model_key="gpt-3.5-turbo"): + super().__init__(prompt_template_path, model_key) + self.api = OpenAIMultiClient( + endpoint="chats", data_template={"model": self.model_key} + ) + + @staticmethod + def _process_response(result): + if result.failed: + logger.warning( + f"Failed to get augmentation for {result.metadata['featured_tag']}" + ) + return + choices = result.response["choices"] + AugmentOpenAI.process_choices(choices, result.metadata) + + def _make_requests( + self, + collect_concurrent_calls, + dset, + temperature, + top_p, + presence_penalty, + num_proc, + model_key, + save_to_path, + ): + for num in range(len(collect_concurrent_calls)): + tag = collect_concurrent_calls[num][0] + missing_num = collect_concurrent_calls[num][1] + + for data, metadata in self._prepare_request( + tag, + missing_num, + dset, + temperature, + top_p, + presence_penalty, + num_proc, + model_key, + save_to_path, + ): + self.api.request( + data=data, metadata=metadata, callback=self._process_response + ) + + def generate( + self, + collect_concurrent_calls, + dset, + save_to_path, + model_key, + temperature=1.5, + top_p=1, + presence_penalty=0, + num_proc=os.cpu_count(), + ): + self.api.run_request_function( + self._make_requests, + collect_concurrent_calls=collect_concurrent_calls, + dset=dset, + temperature=temperature, + top_p=top_p, + presence_penalty=presence_penalty, + num_proc=num_proc, + model_key=model_key, + save_to_path=save_to_path, + ) + + self.api.pull_all() diff --git a/grants_tagger_light/augmentation/prompt.template b/grants_tagger_light/augmentation/prompt.template new file mode 100644 index 00000000..45eb75d3 --- /dev/null +++ b/grants_tagger_light/augmentation/prompt.template @@ -0,0 +1,12 @@ +You will act as a Data Augmentation engine. I will provide an ABSTRACT and a TOPIC and you will create a json with two fields: +1. 'abstract': a new abstract, created using Data Augmentation, using the ABSTRACT I've sent to you as an inspiration. Make sure the abstract you generate is quite different to the ABSTRACT I've sent to you but keeping the TOPIC. +2. 'title': a sentence summarizing the abstract you have created. + +Make sure the json is well formed. +======================= + +ABSTRACT: +{ABSTRACT} + +TOPIC: +{TOPIC} \ No newline at end of file diff --git a/grants_tagger_light/cli.py b/grants_tagger_light/cli.py index 2a9585a9..62b9cf2a 100644 --- a/grants_tagger_light/cli.py +++ b/grants_tagger_light/cli.py @@ -2,6 +2,7 @@ import typer +from grants_tagger_light.augmentation import augment_app from grants_tagger_light.download_epmc import download_epmc_cli from grants_tagger_light.evaluation import evaluate_app from grants_tagger_light.predict import predict_cli @@ -12,11 +13,13 @@ logger = logging.getLogger(__name__) -app = typer.Typer() +app = typer.Typer(pretty_exceptions_enable=False) app.add_typer(preprocess_app, name="preprocess") +app.add_typer(augment_app, name="augment") app.add_typer(evaluate_app, name="evaluate") + app.command("predict")(predict_cli) tune_app = typer.Typer() diff --git a/grants_tagger_light/evaluation/evaluate_model.py b/grants_tagger_light/evaluation/evaluate_model.py index 4f7c8352..eb43b720 100644 --- a/grants_tagger_light/evaluation/evaluate_model.py +++ b/grants_tagger_light/evaluation/evaluate_model.py @@ -56,7 +56,7 @@ def evaluate_model( Y_pred_proba = sp.csr_matrix(Y_pred_proba) - if type(threshold) != list: + if not isinstance(threshold, list): threshold = [threshold] widths = (12, 5, 5, 5) diff --git a/grants_tagger_light/models/bert_mesh/model.py b/grants_tagger_light/models/bert_mesh/model.py index 48613074..bc14d850 100644 --- a/grants_tagger_light/models/bert_mesh/model.py +++ b/grants_tagger_light/models/bert_mesh/model.py @@ -3,6 +3,8 @@ import torch import torch.nn.functional as F +# from loguru import logger + class MultiLabelAttention(torch.nn.Module): def __init__(self, D_in, num_labels): @@ -28,22 +30,42 @@ def __init__( self.config.auto_map = {"AutoModel": "model.BertMesh"} self.pretrained_model = self.config.pretrained_model self.num_labels = self.config.num_labels + self.hidden_size = getattr(self.config, "hidden_size", 512) + self.dropout = getattr(self.config, "dropout", 0.1) + self.multilabel_attention = getattr(self.config, "multilabel_attention", False) + self.id2label = self.config.id2label self.bert = AutoModel.from_pretrained(self.pretrained_model) # 768 self.multilabel_attention_layer = MultiLabelAttention( 768, self.num_labels ) # num_labels, 768 - self.linear_1 = torch.nn.Linear(768, self.hidden_size) # num_labels, 512 - self.linear_2 = torch.nn.Linear(self.hidden_size, 1) # num_labels, 1 + self.linear_1 = torch.nn.Linear(768, self.hidden_size) # 768, 1024 + self.linear_2 = torch.nn.Linear(self.hidden_size, 1) # 1024, 1 self.linear_out = torch.nn.Linear(self.hidden_size, self.num_labels) self.dropout_layer = torch.nn.Dropout(self.dropout) + def freeze_backbone(self): + for param in self.bert.parameters(): + param.requires_grad = False + + def unfreeze_backbone(self, only_bias=False): + for name, param in self.bert.named_parameters(): + if only_bias: + if "bias" in name.lower(): + # logger.info(f"Unfreezing {name}") + param.requires_grad = True + else: + param.requires_grad = False + else: + # logger.info(f"Unfreezing {name}") + param.requires_grad = True + def forward(self, input_ids, labels=None, **kwargs): - if type(input_ids) is list: + if isinstance(input_ids, list): # coming from tokenizer input_ids = torch.tensor(input_ids) @@ -52,19 +74,18 @@ def forward(self, input_ids, labels=None, **kwargs): attention_outs = self.multilabel_attention_layer(hidden_states) outs = torch.nn.functional.relu(self.linear_1(attention_outs)) outs = self.dropout_layer(outs) - outs = torch.sigmoid(self.linear_2(outs)) + outs = self.linear_2(outs) outs = torch.flatten(outs, start_dim=1) else: cls = self.bert(input_ids=input_ids)[1] outs = torch.nn.functional.relu(self.linear_1(cls)) outs = self.dropout_layer(outs) - outs = torch.sigmoid(self.linear_out(outs)) + outs = self.linear_out(outs) if labels is not None: - loss = F.binary_cross_entropy(outs, labels.float()) + loss = F.binary_cross_entropy_with_logits(outs, labels.float()) else: loss = -1 - return SequenceClassifierOutput( loss=loss, logits=outs, diff --git a/grants_tagger_light/preprocessing/__init__.py b/grants_tagger_light/preprocessing/__init__.py index 5b66021a..3a7024d1 100644 --- a/grants_tagger_light/preprocessing/__init__.py +++ b/grants_tagger_light/preprocessing/__init__.py @@ -1,8 +1,8 @@ import typer - from .preprocess_mesh import preprocess_mesh_cli preprocess_app = typer.Typer() -preprocess_app.command("bioasq-mesh")(preprocess_mesh_cli) - -__all__ = ["preprocess_app"] +preprocess_app.command( + "mesh", + context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, +)(preprocess_mesh_cli) diff --git a/grants_tagger_light/preprocessing/preprocess_mesh.py b/grants_tagger_light/preprocessing/preprocess_mesh.py index 5957d235..ef1ca963 100644 --- a/grants_tagger_light/preprocessing/preprocess_mesh.py +++ b/grants_tagger_light/preprocessing/preprocess_mesh.py @@ -1,135 +1,289 @@ -""" -Preprocess JSON Mesh data from BioASQ to JSONL -""" import json -from pathlib import Path -from typing import Optional +import tempfile -import pandas as pd import typer +import time +from transformers import AutoTokenizer +from datasets import load_dataset +from grants_tagger_light.models.bert_mesh import BertMesh +import os +from loguru import logger from tqdm import tqdm +import numpy as np +from datasets.dataset_dict import DatasetDict -from grants_tagger_light.utils import ( - write_jsonl, -) +from grants_tagger_light.utils.years_tags_parser import parse_tags, parse_years +preprocess_app = typer.Typer() -def yield_raw_data(input_path): - with open(input_path, encoding="latin-1") as f_i: - f_i.readline() # skip first line ({"articles":[) which is not valid JSON - for i, line in enumerate(f_i): - item = json.loads(line[:-2]) - yield item + +def _tokenize(batch, tokenizer: AutoTokenizer, x_col: str): + return tokenizer( + batch[x_col], + padding="max_length", + truncation=True, + max_length=512, + ) + + +def _map_label_to_ids(labels, label2id): + return [label2id[label] for label in labels] -def process_data(item, filter_tags=None, filter_years=None): - text = item["abstractText"] - tags = item["meshMajor"] - journal = item["journal"] - year = item["year"] - if filter_tags: - tags = list(set(tags).intersection(filter_tags)) - if not tags: - return - if filter_years: - min_year, max_year = filter_years.split(",") - if year > int(max_year): - return - if year < int(min_year): - return - data = {"text": text, "tags": tags, "meta": {"journal": journal, "year": year}} - return data +def _encode_labels(sample, label2id): + return {"label_ids": [_map_label_to_ids(x, label2id) for x in sample["meshMajor"]]} -def yield_data(input_path, filter_tags, filter_years, buffer_size=10_000): - data_batch = [] - for item in tqdm(yield_raw_data(input_path), total=15_000_000): # approx 15M docs - processed_item = process_data(item, filter_tags, filter_years) +def _filter_rows_by_years(sample, years): + return [x in years for x in sample["year"]] - if processed_item: - data_batch.append(processed_item) - if len(data_batch) >= buffer_size: - yield data_batch - data_batch = [] +def create_sample_file(jsonl_file, lines): + with open(jsonl_file, "r") as input_file: + with tempfile.NamedTemporaryFile(mode="w", delete=False) as tmp_file: + for _ in range(lines): + line = input_file.readline() + if not line: + break + tmp_file.write(line) - if data_batch: - yield data_batch + return tmp_file.name def preprocess_mesh( - raw_data_path, - processed_data_path, - mesh_tags_path=None, - filter_years=None, - n_max=None, - buffer_size=10_000, + data_path: str, + model_key: str, + save_to_path: str = None, + test_size: float = None, + num_proc: int = os.cpu_count(), + max_samples: int = -1, + batch_size: int = 256, + tags: list = None, + train_years: list = None, + test_years: list = None, ): - if mesh_tags_path: - filter_tags_data = pd.read_csv(mesh_tags_path) - filter_tags = filter_tags_data["DescriptorName"].tolist() - filter_tags = set(filter_tags) + if max_samples != -1: + logger.info(f"Filtering examples to {max_samples}") + data_path = create_sample_file(data_path, max_samples) + + if not model_key: + label2id = None + id2label = None + tokenizer = AutoTokenizer.from_pretrained( + "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract" + ) else: - filter_tags = None + # Load the model to get its label2id + tokenizer = AutoTokenizer.from_pretrained(model_key) + model = BertMesh.from_pretrained(model_key, trust_remote_code=True) + + id2label = model.id2label + label2id = {v: k for k, v in id2label.items()} + + # We only have 1 file, so no sharding is available https://huggingface.co/docs/datasets/loading#multiprocessing + dset = load_dataset("json", data_files=data_path, num_proc=1) + # By default, any dataset loaded is set to 'train' using the previous command + if "train" in dset: + dset = dset["train"] + + years = list() + if train_years is not None and len(train_years) > 0: + years.extend(train_years) + if test_years is not None and len(test_years) > 0: + years.extend(test_years) + + if len(years) > 0: + logger.info(f"Removing all years which are not in {years}") + dset = dset.filter( + lambda x: any(np.isin(years, [str(x["year"])])), num_proc=num_proc + ) + + if tags is None: + tags = [] + + if len(tags) > 0: + logger.info(f"Removing all tags which are not in {tags}") + dset = dset.filter( + lambda x: any(np.isin(tags, x["meshMajor"])), num_proc=num_proc + ) + + # Remove unused columns to save space & time + dset = dset.remove_columns(["journal", "pmid", "title"]) + + t1 = time.time() + dset = dset.map( + _tokenize, + batched=True, + batch_size=batch_size, + num_proc=num_proc, + desc="Tokenizing", + fn_kwargs={"tokenizer": tokenizer, "x_col": "abstractText"}, + load_from_cache_file=False, + ) + logger.info("Time taken to tokenize: {}".format(time.time() - t1)) + + # Generate label2id if None + if label2id is None: + logger.info("Getting the labels...") + dset = dset.map( + lambda x: {"meshMajor": x["meshMajor"]}, + batched=True, + batch_size=batch_size, + num_proc=num_proc, + desc="Getting labels", + ) + + # Most efficient way to do dedup of labels + unique_labels_set = set() - if filter_years: - min_year, max_year = filter_years.split(",") - filter_years = [int(min_year), int(max_year)] + logger.info("Obtaining unique values from the labels...") + # Iterate through the lists and add elements to the set + for arr in tqdm(dset["meshMajor"]): + unique_labels_set.update(arr) - # If only using a tiny set of data, need to reduce buffer size - if n_max is not None and n_max < buffer_size: - buffer_size = n_max + # Most efficient way to do dictionary creation + logger.info("Creating label2id dictionary...") + label2id = dict() + id2label = dict() + for idx, label in enumerate(tqdm(unique_labels_set)): + label2id.update({label: idx}) + id2label.update({idx: label}) - with open(processed_data_path, "w") as f: - for i, data_batch in enumerate( - yield_data( - raw_data_path, filter_tags, filter_years, buffer_size=buffer_size + t1 = time.time() + dset = dset.map( + _encode_labels, + batched=True, + batch_size=batch_size, + desc="Encoding labels", + num_proc=num_proc, + fn_kwargs={"label2id": label2id}, + ) + logger.info("Time taken to encode labels: {}".format(time.time() - t1)) + + logger.info("Preparing train/test split....") + # Split into train and test + t1 = time.time() + if len(years) > 0: + logger.info("Splitting the dataset by training and test years") + train_dset = dset.filter( + _filter_rows_by_years, + batched=True, + batch_size=batch_size, + desc=f"Creating training dataset with years {train_years}", + num_proc=num_proc, + fn_kwargs={"years": train_years}, + ) + test_dset = dset.filter( + _filter_rows_by_years, + batched=True, + batch_size=batch_size, + desc=f"Creating test dataset with years {test_years}", + num_proc=num_proc, + fn_kwargs={"years": test_years}, + ) + + if test_size is None or test_size == 1.0: + test_size = len(test_dset) + logger.info(f"Using the whole dataset of {test_size} rows") + dset = DatasetDict({"train": train_dset, "test": test_dset}) + else: + if test_size > 1.0: + test_size = int(test_size) + logger.info(f"Using a test_size frac or number of rows of of {test_size}") + dset = DatasetDict( + { + "train": train_dset, + "test": test_dset.train_test_split(test_size)["test"], + } + ) + + else: + if test_size is None: + test_size = 0.05 + logger.info( + f"Test size not found. " + f"Setting it to a frac of the whole dataset equal to " + f"{test_size}" ) - ): - write_jsonl(f, data_batch) - if n_max and (i + 1) * buffer_size >= n_max: - break + elif test_size > 1.0: + test_size = int(test_size) + dset = dset.train_test_split(test_size=test_size) + logger.info("Time taken to split into train and test: {}".format(time.time() - t1)) -preprocess_mesh_app = typer.Typer() + # If running from Training, by default it will be None so that we don't spend time + # on serializing the data # to disk if we are going to load it afterwards + if save_to_path is not None: + logger.info("Saving to disk...") + dset.save_to_disk(os.path.join(save_to_path, "dataset"), num_proc=num_proc) + with open(os.path.join(save_to_path, "label2id"), "w") as f: + json.dump(label2id, f) + with open(os.path.join(save_to_path, "id2label"), "w") as f: + json.dump(id2label, f) + return dset, label2id, id2label -@preprocess_mesh_app.command() + +@preprocess_app.command() def preprocess_mesh_cli( - input_path: Optional[str] = typer.Argument(None, help="path to BioASQ JSON data"), - train_output_path: Optional[str] = typer.Argument( - None, help="path to JSONL output file that will be generated for the train set" - ), - label_binarizer_path: Optional[Path] = typer.Argument( - None, help="path to pickle file that will contain the label binarizer" + data_path: str = typer.Argument(..., help="Path to mesh.jsonl"), + save_to_path: str = typer.Argument( + ..., help="Path to save the serialized PyArrow dataset after preprocessing" ), - test_output_path: Optional[str] = typer.Option( - None, help="path to JSONL output file that will be generated for the test set" + model_key: str = typer.Argument( + ..., + help="Key to use when loading tokenizer and label2id. " + "Leave blank if training from scratch", # noqa ), - mesh_tags_path: Optional[str] = typer.Option( - None, help="path to mesh tags to filter" + test_size: float = typer.Option( + None, help="Fraction of data to use for testing in (0,1] or number of rows" ), - test_split: Optional[float] = typer.Option( - 0.01, help="split percentage for test data. if None no split." + num_proc: int = typer.Option( + os.cpu_count(), help="Number of processes to use for preprocessing" ), - filter_years: Optional[str] = typer.Option( - None, help="years to keep in form min_year,max_year with both inclusive" + max_samples: int = typer.Option( + -1, + help="Maximum number of samples to use for preprocessing", ), - n_max: Optional[int] = typer.Option( + batch_size: int = typer.Option(256, help="Size of the preprocessing batch"), + tags: str = typer.Option( None, - help=""" - Maximum limit on the number of datapoints in the set - (including training and test)""", + help="Comma-separated tags you want to include in the dataset " + "(the rest will be discarded)", + ), + train_years: str = typer.Option( + None, help="Comma-separated years you want to include in the training dataset" + ), + test_years: str = typer.Option( + None, help="Comma-separated years you want to include in the test dataset" ), ): - preprocess_mesh( - input_path, - train_output_path, - mesh_tags_path=mesh_tags_path, - filter_years=filter_years, - n_max=n_max, - ) + if not data_path.endswith("jsonl"): + logger.error( + "It seems your input MeSH data is not in `jsonl` format. " + "Please, run first `scripts/mesh_json_to_jsonlpy.`" + ) + exit(-1) + if train_years is not None: + if test_years is None: + logger.error("--train-years require --test-years") + exit(-1) -if __name__ == "__main__": - typer.run(preprocess_mesh) + if test_years is not None: + if train_years is None: + logger.error("--test-years require --train-years") + exit(-1) + + preprocess_mesh( + data_path=data_path, + model_key=model_key, + test_size=test_size, + num_proc=num_proc, + max_samples=max_samples, + batch_size=batch_size, + save_to_path=save_to_path, + tags=parse_tags(tags), + train_years=parse_years(train_years), + test_years=parse_years(test_years), + ) diff --git a/grants_tagger_light/training/__init__.py b/grants_tagger_light/training/__init__.py index b56c3c2a..e47c1bd0 100644 --- a/grants_tagger_light/training/__init__.py +++ b/grants_tagger_light/training/__init__.py @@ -1,6 +1,8 @@ import typer - from .train import train_bertmesh_cli train_app = typer.Typer() -train_app.command("bertmesh")(train_bertmesh_cli) +train_app.command( + "bertmesh", + context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, +)(train_bertmesh_cli) diff --git a/grants_tagger_light/training/cli_args/__init__.py b/grants_tagger_light/training/cli_args/__init__.py new file mode 100644 index 00000000..30ffb4a7 --- /dev/null +++ b/grants_tagger_light/training/cli_args/__init__.py @@ -0,0 +1,9 @@ +from .train_args import BertMeshTrainingArguments +from .wandb_args import WandbArguments +from .bertmesh_args import BertMeshModelArguments + +__all__ = [ + "BertMeshTrainingArguments", + "WandbArguments", + "BertMeshModelArguments", +] diff --git a/grants_tagger_light/training/cli_args/bertmesh_args.py b/grants_tagger_light/training/cli_args/bertmesh_args.py new file mode 100644 index 00000000..c59a3364 --- /dev/null +++ b/grants_tagger_light/training/cli_args/bertmesh_args.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass, field + + +@dataclass +class BertMeshModelArguments: + pretrained_model_key: str = field( + default="microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract" + ) + hidden_size: int = field(default=1024) + dropout: float = field(default=0.1) + multilabel_attention: bool = field(default=True) + freeze_backbone: str = field(default="unfreeze") # unfreeze, unfreeze_bias, freeze + hidden_dropout_prob: float = field(default=0.2) + attention_probs_dropout_prob: float = field(default=0.2) diff --git a/grants_tagger_light/training/cli_args/train_args.py b/grants_tagger_light/training/cli_args/train_args.py new file mode 100644 index 00000000..e8bc1b4d --- /dev/null +++ b/grants_tagger_light/training/cli_args/train_args.py @@ -0,0 +1,79 @@ +from transformers import TrainingArguments +from dataclasses import dataclass, field +import torch + + +@dataclass +class BertMeshTrainingArguments(TrainingArguments): + """ + This class inherits from transformers.TrainingArguments + and implements some better defaults for convenience. + """ + + output_dir: str = field(default="bertmesh_outs/default") + overwrite_output_dir: bool = field(default=True) + + evaluation_strategy: str = field(default="epoch") # no | epoch | steps + # eval_steps: int = 1 + + save_strategy: str = field(default="epoch") # no | epoch | steps + # save_steps: int = 1 + save_total_limit: int = field(default=5) + + metric_for_best_model: str = field( + default="eval_loss" + ) # can switch later to micro-f1 + greater_is_better: bool = field(default=False) + load_best_model_at_end: bool = field(default=True) + + per_device_train_batch_size: int = field( + default=8 + ) # set to 256 in grants-tagger repo + per_device_eval_batch_size: int = field(default=8) + gradient_accumulation_steps: int = field(default=1) + group_by_length: bool = field(default=False) # TODO test this + + # Learning rate & num train epochs are taken from grants_tagger repo + # (BertMeSH paper does not specify these hparams) + num_train_epochs: int = field(default=5) + learning_rate: float = field(default=1e-4) + + seed: int = field(default=42) + data_seed: int = field(default=42) + + report_to: str = field(default="wandb") + + optim: str = field( + default="adamw_torch_fused" + ) # TODO add support for adamw_apex_fused; use adamw_anyprecision if using bf16 + + fp16: bool = field(default=False) # TODO test if micro-f1 is maintained + + dataloader_num_workers: int = field(default=8) + dataloader_pin_memory: bool = field(default=True) + + gradient_checkpointing: bool = field(default=False) + + auto_find_batch_size: bool = field(default=False) # TODO test this + + torch_compile: bool = field(default=False) # TODO make compilation + # torch_compile_backend: str = field(default="inductor") + # torch_compile_mode: str = field( + # default="default" + # ) # default | reduce-overhead | max-autotune + + correct_bias: bool = field(default=True) + weight_decay: float = field(default=0.1) + prune_labels_in_evaluation: bool = field(default=False) + threshold: float = field(default=0.5) + scheduler_type: str = field(default="cosine_hard_restart") + save_steps: int = field(default=500) + eval_steps: int = field(default=None) + max_steps: int = field(default=-1) + no_cuda: bool = field(default=False) + warmup_steps: int = field(default=0) + + def __post_init__(self): + super().__post_init__() + if "fused" in self.optim and not torch.cuda.is_available(): + self.optim = "adamw_torch" diff --git a/grants_tagger_light/training/cli_args/wandb_args.py b/grants_tagger_light/training/cli_args/wandb_args.py new file mode 100644 index 00000000..b004353f --- /dev/null +++ b/grants_tagger_light/training/cli_args/wandb_args.py @@ -0,0 +1,50 @@ +import os +from dataclasses import dataclass, field, fields + + +@dataclass +class WandbArguments: + """ + Wandb arguments for training. Will set according env variables if not set. + Each field is a lowercase version of the env variable name. + For all wandb envs, see: https://docs.wandb.ai/guides/track/environment-variables + """ + + wandb_api_key: str = field( + default=None, + metadata={"help": "Wandb API key"}, + ) + + wandb_project: str = field( + default=None, + metadata={"help": "Wandb project name"}, + ) + + wandb_name: str = field( + default=None, + metadata={"help": "Wandb run name"}, + ) + + wandb_notes: str = field( + default=None, + metadata={"help": "Wandb run notes. Markdown allowed."}, + ) + + wandb_tags: list[str] = field( + default_factory=list, + metadata={"help": "Wandb run tags. Comma separated."}, + ) + + def __post_init__(self): + if len(self.wandb_tags) == 0: + self.wandb_tags = None + + # Check if env variables are set, and if not set them to this class' values + for field_ in fields(self): + env_var_name = field_.name.upper() + env_var = os.environ.get(env_var_name) + if not env_var: + if isinstance(getattr(self, field_.name), list): + os.environ[env_var_name] = ",".join(getattr(self, field_.name)) + else: + os.environ[env_var_name] = str(getattr(self, field_.name)) diff --git a/grants_tagger_light/training/dataloaders/__init__.py b/grants_tagger_light/training/dataloaders/__init__.py new file mode 100644 index 00000000..9fef3884 --- /dev/null +++ b/grants_tagger_light/training/dataloaders/__init__.py @@ -0,0 +1,3 @@ +from .multilabel_collator import MultilabelDataCollator + +__all__ = ["MultilabelDataCollator"] diff --git a/grants_tagger_light/training/dataloaders/multilabel_collator.py b/grants_tagger_light/training/dataloaders/multilabel_collator.py new file mode 100644 index 00000000..132d38e6 --- /dev/null +++ b/grants_tagger_light/training/dataloaders/multilabel_collator.py @@ -0,0 +1,49 @@ +from typing import List, Any, Mapping +from sklearn.preprocessing import MultiLabelBinarizer +import numpy as np +import torch + + +class MultilabelDataCollator: + def __init__(self, label2id: dict): + self.mlb = MultiLabelBinarizer(classes=list(label2id.values())) + self.mlb.fit([list(label2id.values())]) + + def __call__(self, features: List[Any]): + """ + Andrei: Inspired from implementation in + https://github.com/huggingface/transformers/blob/v4.30.0/src/transformers/data/data_collator.py#L105 + """ + + if not isinstance(features[0], Mapping): + features = [vars(f) for f in features] + first = features[0] + batch = {} + + # Special handling for labels. + # Ensure that tensor is created with the correct type + # (it should be automatically the case, but let's make sure of it.) + + labels_as_np = np.array( + [self.mlb.transform([f["label_ids"]]) for f in features] + ) + + batch["labels"] = torch.tensor(labels_as_np).squeeze(1) + + # Handling of all other possible keys. + # Again, we will use the first element to figure out which + # key/values are not None for this model. + for k, v in first.items(): + if ( + k not in ("label", "label_ids") + and v is not None + and not isinstance(v, str) + ): + if isinstance(v, torch.Tensor): + batch[k] = torch.stack([f[k] for f in features]) + elif isinstance(v, np.ndarray): + batch[k] = torch.tensor(np.stack([f[k] for f in features])) + else: + batch[k] = torch.tensor([f[k] for f in features]) + + return batch diff --git a/grants_tagger_light/training/train.py b/grants_tagger_light/training/train.py index f88d54c2..680e8fd6 100644 --- a/grants_tagger_light/training/train.py +++ b/grants_tagger_light/training/train.py @@ -1,112 +1,187 @@ -from transformers import AutoTokenizer, Trainer, TrainingArguments, EvalPrediction -from datasets import Dataset +from transformers import ( + Trainer, + TrainingArguments, + EvalPrediction, + HfArgumentParser, + AutoConfig, + AdamW, + get_cosine_schedule_with_warmup, + get_constant_schedule_with_warmup, + get_cosine_with_hard_restarts_schedule_with_warmup, + get_linear_schedule_with_warmup, +) from grants_tagger_light.models.bert_mesh import BertMesh -import json +from grants_tagger_light.preprocessing.preprocess_mesh import preprocess_mesh +from grants_tagger_light.training.cli_args import ( + BertMeshTrainingArguments, + WandbArguments, + BertMeshModelArguments, +) +from grants_tagger_light.training.dataloaders import ( + MultilabelDataCollator, +) +from sklearn.metrics import classification_report +from loguru import logger +from pprint import pformat import typer import numpy as np -from sklearn.metrics import classification_report +import os +import transformers +import json +from datasets import load_from_disk +from grants_tagger_light.utils.sharding import Sharding +from grants_tagger_light.utils.years_tags_parser import parse_years, parse_tags -def load_data( +transformers.set_seed(42) + + +def train_bertmesh( + model_key: str, data_path: str, - tokenizer: AutoTokenizer, - label2id: dict, - test_size: float = 0.1, - num_proc: int = 8, + training_args: TrainingArguments, + model_args: BertMeshModelArguments = None, + max_samples: int = -1, + test_size: float = None, + num_proc: int = os.cpu_count(), + shards: int = os.cpu_count(), + from_checkpoint: str = None, + tags: list = None, + train_years: list = None, + test_years: list = None, ): - def _datagen(data_path: str): - """ - Loads the data from the given path. The data should be in jsonl format, - with each line containing a text and tags field. - The tags field should be a list of strings. - """ - with open(data_path, "r") as f: - for line in f: - sample = json.loads(line) - yield sample - - def _tokenize(batch): - return tokenizer( - batch["text"], padding="max_length", truncation=True, max_length=512 + if not model_key: + assert isinstance(model_args, BertMeshModelArguments), ( + "If model_key is not provided, " + "must provide model_args of type BertMeshModelArguments" + ) # noqa + + logger.info(f"Preprocessing the dataset at {data_path}...") + if os.path.isdir(data_path): + logger.info( + "Train/test data found in a folder, which means you preprocessed and " + "save the data before. Loading that split from disk..." + ) + dset = load_from_disk(os.path.join(data_path, "dataset")) + with open(os.path.join(data_path, "label2id"), "r") as f: + label2id = json.load(f) + with open(os.path.join(data_path, "id2label"), "r") as f: + id2label = json.load(f) + else: + logger.info("Preprocessing the data on the fly...") + dset, label2id, id2label = preprocess_mesh( + data_path=data_path, + model_key=model_key, + test_size=test_size, + num_proc=num_proc, + max_samples=max_samples, + batch_size=training_args.per_device_train_batch_size, + tags=tags, + train_years=train_years, + test_years=test_years, ) - def _label_encode(batch): - batch["labels"] = [ - [label2id[tag] for tag in tags if tag in label2id] for tags in batch["tags"] - ] - return batch - - def _one_hot(batch): - batch["labels"] = [ - [1 if i in labels else 0 for i in range(len(label2id))] - for labels in batch["labels"] - ] - return batch - - dset = Dataset.from_generator(_datagen, gen_kwargs={"data_path": data_path}) - dset = dset.map( - _tokenize, batched=True, batch_size=32, num_proc=num_proc, desc="Tokenizing" - ) - - dset = dset.map( - _label_encode, - batched=True, - batch_size=32, - num_proc=num_proc, - desc="Encoding labels", - ) - - dset = dset.map( - _one_hot, - batched=True, - batch_size=32, - num_proc=num_proc, - desc="One-hot labels", - ) - - # Split into train and test - dset = dset.train_test_split(test_size=test_size) - - return dset["train"], dset["test"] + train_dset, val_dset = dset["train"], dset["test"] + + metric_labels = [] + for x in train_dset["label_ids"]: + metric_labels.extend(x) + + train_dset_size = len(train_dset) + logger.info(f"Training dataset size: {train_dset_size}") + if max_samples > 0: + train_dset_size = min(max_samples, train_dset_size) + logger.info(f"Training max samples: {train_dset_size}.") + train_dset.filter( + lambda example, idx: idx < train_dset_size, + with_indices=True, + num_proc=num_proc, + ) + else: + logger.info("Training with all data...") + if shards > 0: + logger.info("Sharding training dataset...") + train_dset = Sharding(num_shards=shards).shard(train_dset) -def train_bertmesh(model_key: str, data_path: str, **user_args): - model = BertMesh.from_pretrained(model_key, trust_remote_code=True) - tokenizer = AutoTokenizer.from_pretrained(model_key) + if not model_key: + logger.info( + f"Model key not found. " + f"Training from scratch {model_args.pretrained_model_key}" + ) - label2id = {v: k for k, v in model.id2label.items()} + # Instantiate model from scratch + logger.info(f"Loading `{model_args.pretrained_model_key}` tokenizer...") + config = AutoConfig.from_pretrained(model_args.pretrained_model_key) + + config.update( + { + "pretrained_model": model_args.pretrained_model_key, + "num_labels": len(label2id), + "hidden_size": model_args.hidden_size, + "dropout": model_args.dropout, + "multilabel_attention": model_args.multilabel_attention, + "label2id": label2id, + "id2label": id2label, + "freeze_backbone": model_args.freeze_backbone, + "hidden_dropout_prob": model_args.hidden_dropout_prob, + "attention_probs_dropout_prob": model_args.attention_probs_dropout_prob, + } + ) + logger.info(f"Hidden size: {config.hidden_size}") + logger.info(f"Dropout: {config.dropout}") + logger.info(f"Multilabel Attention: {config.multilabel_attention}") + logger.info(f"Freeze Backbone: {config.freeze_backbone}") + logger.info(f"Num labels: {config.num_labels}") + logger.info(f"hidden_dropout_prob: {config.hidden_dropout_prob}") + logger.info( + f"attention_probs_dropout_prob: {config.attention_probs_dropout_prob}" + ) - train_dset, val_dset = load_data(data_path, tokenizer, label2id=label2id) + model = BertMesh(config) - training_args = { - "output_dir": "model_output", - "overwrite_output_dir": True, - "num_train_epochs": 1, - "per_device_train_batch_size": 4, - "per_device_eval_batch_size": 4, - "warmup_steps": 500, - "weight_decay": 0.01, - "learning_rate": 1e-5, - "evaluation_strategy": "steps", - "eval_steps": 100, - "do_eval": True, - "label_names": ["labels"], - } + else: + logger.info(f"Training from pretrained key {model_key}") + model = BertMesh.from_pretrained(model_key, trust_remote_code=True) - training_args.update(user_args) + if model_args.freeze_backbone is None: + model_args.freeze_backbone = "freeze" - training_args = TrainingArguments(**training_args) + if model_args.freeze_backbone.lower().strip() == "unfreeze": + logger.info("Unfreezing weights&biases in the backbone") + model.unfreeze_backbone() + elif model_args.freeze_backbone.lower().strip() == "unfreeze_bias": + logger.info("Unfreezing only biases in the backbone") + model.unfreeze_backbone(only_bias=True) + elif model_args.freeze_backbone.lower().strip() == "freeze": + logger.info("Freezing backbone") + model.freeze_backbone() def sklearn_metrics(prediction: EvalPrediction): + # This is a batch, so it's an array (rows) of array (labels) + # Array of arrays with probas [[5.4e-5 1.3e-3...] [5.4e-5 1.3e-3...] ... ] y_pred = prediction.predictions + # Transformed to 0-1 if bigger than threshold [[0 1 0...] [0 0 1...] ... ] + y_pred = np.int64(y_pred > training_args.threshold) + + # Array of arrays with 0/1 [[0 0 1 ...] [0 1 0 ...] ... ] y_true = prediction.label_ids - # TODO make thresh configurable or return metrics for multiple thresholds - # e.g. 0.5:0.95:0.05 + # report = classification_report(y_pred, y_true, output_dict=True) + + if training_args.prune_labels_in_evaluation: + mask = np.zeros(y_pred.shape, dtype=bool) + mask[np.arange(y_pred.shape[0])[:, np.newaxis], metric_labels] = True - y_pred = np.int64(y_pred > 0.5) + filtered_y_pred = y_pred[mask].reshape(y_pred.shape[0], -1) + filtered_y_true = y_true[mask].reshape(y_true.shape[0], -1) + else: + filtered_y_pred = y_pred + filtered_y_true = y_true - report = classification_report(y_true, y_pred, output_dict=True) + report = classification_report( + filtered_y_pred, filtered_y_true, output_dict=True + ) metric_dict = { "micro_avg": report["micro avg"], @@ -117,21 +192,92 @@ def sklearn_metrics(prediction: EvalPrediction): return metric_dict + logger.info("Collating labels...") + collator = MultilabelDataCollator(label2id=label2id) + + if shards > 0: + logger.info("Calculating max steps for IterableDatasets shards...") + max_steps = Sharding.calculate_max_steps(training_args, train_dset_size) + training_args.max_steps = max_steps + + optimizer = AdamW( + model.parameters(), + lr=training_args.learning_rate, + weight_decay=training_args.weight_decay, + correct_bias=training_args.correct_bias + if hasattr(training_args, "correct_bias") + else True, + ) + + if training_args.warmup_steps is None: + training_args.warmup_steps = 0 + + if training_args.scheduler_type.lower().strip() == "cosine": + scheduler = get_cosine_schedule_with_warmup( + optimizer, + num_warmup_steps=training_args.warmup_steps, + num_training_steps=training_args.max_steps, + ) + elif training_args.scheduler_type.lower().strip() == "constant": + scheduler = get_constant_schedule_with_warmup( + optimizer, num_warmup_steps=training_args.warmup_steps + ) + elif training_args.scheduler_type.lower().strip() == "cosine_hard_restart": + scheduler = get_cosine_with_hard_restarts_schedule_with_warmup( + optimizer, + num_warmup_steps=training_args.warmup_steps, + num_training_steps=training_args.max_steps, + num_cycles=training_args.num_train_epochs, + ) + elif training_args.scheduler_type.lower().strip() == "linear": + scheduler = get_linear_schedule_with_warmup( + optimizer, + num_warmup_steps=training_args.warmup_steps, + num_training_steps=training_args.max_steps, + ) + else: + logger.warning( + f"{training_args.scheduler_type}: not found or not valid. " + f"Falling back to `linear`" + ) + scheduler = get_linear_schedule_with_warmup( + optimizer, + num_warmup_steps=training_args.warmup_steps, + num_training_steps=training_args.max_steps, + ) + + logger.info(f"Optimizer: {optimizer}") + logger.info(f"Scheduler: {training_args.scheduler_type}") + + training_args.optim = optimizer + training_args.lr_scheduler_type = scheduler + trainer = Trainer( model=model, args=training_args, train_dataset=train_dset, eval_dataset=val_dset, + data_collator=collator, compute_metrics=sklearn_metrics, + optimizers=(optimizer, scheduler), ) + # logger.info(training_args) + if from_checkpoint is None: + logger.info("Training...") + else: + logger.info(f"Resuming training from checkpoint: {from_checkpoint}") trainer.train() - metrics = trainer.evaluate(eval_dataset=val_dset) + logger.info("Saving the model...") + trainer.save_model(os.path.join(training_args.output_dir, "best")) - print(metrics) + logger.info("Evaluating...") + metrics = trainer.evaluate(eval_dataset=val_dset) - trainer.save_model(training_args.output_dir) + logger.info(pformat(metrics)) + with open(os.path.join(training_args.output_dir, "metrics"), "w") as f: + f.write(pformat(metrics)) train_app = typer.Typer() @@ -139,17 +285,73 @@ def sklearn_metrics(prediction: EvalPrediction): @train_app.command() def train_bertmesh_cli( + ctx: typer.Context, model_key: str = typer.Argument( - ..., help="Pretrained model key. Local path or HF location" + ..., help="Pretrained model key. " "Local path or HF location" ), data_path: str = typer.Argument( ..., - help="Path to data in jsonl format. Must contain text and tags field", + help="Path to allMeSH_2021.jsonl (or similar) " + "or to a folder after preprocessing and saving to disk", + ), + test_size: float = typer.Option( + None, help="Fraction of data to use for testing (0,1] or number of rows" + ), + num_proc: int = typer.Option( + os.cpu_count(), help="Number of processes to use for preprocessing" + ), + max_samples: int = typer.Option( + -1, + help="Maximum number of samples to use from the json", + ), + shards: int = typer.Option( + os.cpu_count(), + help="Number os shards to divide training " + "IterativeDataset to (improves performance)", + ), + from_checkpoint: str = typer.Option( + None, help="Name of the checkpoint to resume training" + ), + tags: str = typer.Option( + None, + help="Comma-separated tags you want to include in the dataset " + "(the rest will be discarded)", + ), + train_years: str = typer.Option( + None, help="Comma-separated years you want to include in the training dataset" + ), + test_years: str = typer.Option( + None, help="Comma-separated years you want to include in the test dataset" ), - model_save_path: str = typer.Argument(..., help="Path to save model to"), ): - train_bertmesh(model_key, data_path, model_save_path) - - -if __name__ == "__main__": - train_app() + parser = HfArgumentParser( + ( + BertMeshTrainingArguments, + WandbArguments, + BertMeshModelArguments, + ) + ) + ( + training_args, + wandb_args, + model_args, + ) = parser.parse_args_into_dataclasses(ctx.args) + + logger.info("Model args: {}".format(pformat(model_args))) + logger.info("Training args: {}".format(pformat(training_args))) + logger.info("Wandb args: {}".format(pformat(wandb_args))) + + train_bertmesh( + model_key=model_key, + data_path=data_path, + training_args=training_args, + model_args=model_args, + max_samples=max_samples, + test_size=test_size, + num_proc=num_proc, + shards=shards, + from_checkpoint=from_checkpoint, + tags=parse_tags(tags), + train_years=parse_years(train_years), + test_years=parse_years(test_years), + ) diff --git a/grants_tagger_light/utils/sharding.py b/grants_tagger_light/utils/sharding.py new file mode 100644 index 00000000..c7250a3a --- /dev/null +++ b/grants_tagger_light/utils/sharding.py @@ -0,0 +1,73 @@ +from datasets import IterableDataset +from loguru import logger +import math + + +class Sharding: + def __init__(self, num_shards=100): + """ + Sharding to prevent processing time issues, suggested here https://github.com/huggingface/datasets/issues/2252 + Args: + num_shards: num of shards to split the train dataset into. + """ + self.num_shards = num_shards + + @classmethod + def gen_from_shards(cls, _shards): + for shard in _shards: + for example in shard: + yield example + + def shard(self, dataset): + shards = [ + dataset.shard(num_shards=self.num_shards, index=index, contiguous=True) + for index in range(self.num_shards) + ] + + return IterableDataset.from_generator( + self.gen_from_shards, gen_kwargs={"_shards": shards} + ) + + @staticmethod + def calculate_max_steps(training_args, train_dset_size): + """This is needed when using IterableDatasets, + as there is no __len__ in advance since the dataset is a + generator with yield, so it does not know when to end. + Source: https://discuss.huggingface.co/t/streaming-dataset-into-trainer-does-not-implement-len-max-steps-has-to-be-specified/32893/6 + + Example: allMeSH_2021.json has 15.559.157 rows, with 5% for test + > 15559157-0.05*15559157 = 14781199.15 training rows + let's suppose batch size is 8 + > 14781199.15 / 8 = 1847649.89375 + if accumulation_steps is 1, then + > 1847649.89375 / 1 = 1847649.89375 + """ # noqa + + # From https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_tf.html + train_batch_size = training_args.per_device_train_batch_size + logger.info(f"train_dset_size: {train_dset_size}") + logger.info(f"train_batch_size: {train_batch_size}") + + num_update_steps_per_epoch = train_dset_size / train_batch_size + num_update_steps_per_epoch = math.ceil(num_update_steps_per_epoch) + logger.info(f"num_update_steps_per_epoch: {num_update_steps_per_epoch}") + + accumulation_steps = max(training_args.gradient_accumulation_steps, 1) + logger.info(f"accumulation steps: {accumulation_steps}") + + num_update_steps_per_epoch /= accumulation_steps + num_update_steps_per_epoch = max(num_update_steps_per_epoch, 1) + logger.info(f"num_update_steps_per_epoch: {num_update_steps_per_epoch}") + + epochs = max(training_args.num_train_epochs, 1) + logger.info(f"epochs: {epochs}") + + gpus = max(training_args._n_gpu, 1) + logger.info(f"gpus using HF: {gpus}") + + total_steps = num_update_steps_per_epoch * epochs + logger.info(f"total_steps: {total_steps}") + + total_steps_per_gpu = math.ceil(total_steps / gpus) + logger.info(f"total_steps_per_gpu: {total_steps_per_gpu}") + return total_steps_per_gpu diff --git a/grants_tagger_light/utils/utils.py b/grants_tagger_light/utils/utils.py index 05aa36ae..8264ffe2 100644 --- a/grants_tagger_light/utils/utils.py +++ b/grants_tagger_light/utils/utils.py @@ -155,7 +155,7 @@ def convert_dvc_to_sklearn_params(parameters): return {} # indication of sklearn pipeline - has_nested_params = any([v for v in parameters.values() if type(v) is dict]) + has_nested_params = any([v for v in parameters.values() if isinstance(v, dict)]) if has_nested_params: return { f"{pipeline_name}__{param_name}": param_value diff --git a/grants_tagger_light/utils/years_tags_parser.py b/grants_tagger_light/utils/years_tags_parser.py new file mode 100644 index 00000000..12c65894 --- /dev/null +++ b/grants_tagger_light/utils/years_tags_parser.py @@ -0,0 +1,19 @@ +from loguru import logger + + +def parse_tags(tags): + tags_list = [] + if tags is not None: + filter_tags_list = list(filter(lambda x: x.strip() != "", tags.split(","))) + tags_list = [str(y) for y in filter_tags_list] + logger.info(f"Tags to be considered: {tags_list}") + return tags_list + + +def parse_years(years): + years_list = [] + if years is not None: + filter_years_list = list(filter(lambda x: x.strip() != "", years.split(","))) + years_list = [str(y) for y in filter_years_list] + logger.info(f"Years to be considered: {years_list}") + return years_list diff --git a/models/.gitignore b/models/.gitignore index ff69c5ca..78b3fe46 100644 --- a/models/.gitignore +++ b/models/.gitignore @@ -1 +1,3 @@ /xlinear-0.2.5 +bertmesh/model/epoch-3 +/WellcomeBertMesh-fromhub-11-07-2023 diff --git a/pipelines/bertmesh/dvc.lock b/pipelines/bertmesh/dvc.lock new file mode 100644 index 00000000..85a33960 --- /dev/null +++ b/pipelines/bertmesh/dvc.lock @@ -0,0 +1,19 @@ +schema: '2.0' +stages: + train: + cmd: python ../../grants_tagger_light/training/train.py --model_key "" --data_path + ../../data/raw/allMeSH_2021.json --output_dir ../../bertmesh_outs/pipeline_test + --max_samples 10 --per_device_train_batch_size 4 --per_device_eval_batch_size + 4 + deps: + - path: ../../data/raw/allMeSH_2021.json + md5: e827a6b8062d1312664dcf075c12d89f + size: 27547042745 + params: + ../../grants_tagger_light/training/cli_args/train_args.py: + BertMeshTrainingArguments: {} + outs: + - path: ../../bertmesh_outs/pipeline_test/best + md5: b96ff54ecd600460dbb18b3e82d8b517.dir + size: 439905869 + nfiles: 3 diff --git a/pipelines/bertmesh/dvc.yaml b/pipelines/bertmesh/dvc.yaml index ded61706..f302c669 100644 --- a/pipelines/bertmesh/dvc.yaml +++ b/pipelines/bertmesh/dvc.yaml @@ -1,68 +1,22 @@ vars: - - data: "../../data/" - - models: "../../models/" - - results: "../../results" - - experiment_name: "transformers-bertmesh" - - scripts_folder: "../../grants_tagger/bertmesh" + - data_path: "../../data/raw/allMeSH_2021.json" + - script_loc: "../../grants_tagger_light/training" + - output_dir: "../../bertmesh_outs/pipeline_test" stages: - prepare_data: - cmd: | - python ${scripts_folder}/prepare_data.py ${data}/processed/train_mesh2021.jsonl ${data}/processed/bertmesh/X.npy ${data}/processed/bertmesh/Y.npz ${models}/bertmesh/label_binarizer.pkl --years ${prepare_data.years} --pretrained-model ${train.pretrained_model} - python ${scripts_folder}/prepare_data.py ${data}/processed/train_mesh2021.jsonl ${data}/processed/bertmesh/X_test.npy ${data}/processed/bertmesh/Y_test.npz ${models}/bertmesh/label_binarizer.pkl --years ${prepare_data.test_years} --pretrained-model ${train.pretrained_model} - deps: - - ${scripts_folder}/prepare_data.py - params: - - prepare_data.years - - train.pretrained_model - outs: - - ${data}/processed/bertmesh/X.npy - - ${data}/processed/bertmesh/Y.npz - - ${data}/processed/bertmesh/X_test.npy - - ${data}/processed/bertmesh/Y_test.npz - - ${models}/bertmesh/label_binarizer.pkl train: - cmd: python ${scripts_folder}/train_torch.py ${data}/processed/bertmesh/X.npy ${data}/processed/bertmesh/Y.npz ${models}/bertmesh/model/ ${models}/bertmesh/label_binarizer.pkl - --train-info ${results}/bertmesh_train_info.json - --learning-rate ${train.learning_rate} --batch-size ${train.batch_size} --epochs ${train.epochs} - --pretrained-model ${train.pretrained_model} --multilabel-attention --hidden-size ${train.hidden_size} - --clip-norm ${train.clip_norm} --dropout ${train.dropout} --train-metrics-path train_metrics.json - --warmup-steps ${train.warmup_steps} --val-x-path ${data}/processed/bertmesh/X_test.npy - --val-y-path ${data}/processed/bertmesh/Y_test.npz --experiment-name ${experiment_name} + cmd: >- + python ${script_loc}/train.py + --model_key "" + --data_path ${data_path} + --output_dir ${output_dir} + --max_samples 10 + --per_device_train_batch_size 4 + --per_device_eval_batch_size 4 deps: - - ${scripts_folder}/train_torch.py - - ${scripts_folder}/model.py - - ${data}/processed/bertmesh/X.npy - - ${data}/processed/bertmesh/Y.npz - - ${models}/bertmesh/label_binarizer.pkl + - ${data_path} params: - - train.learning_rate - - train.epochs - - train.batch_size - - train.pretrained_model - - train.hidden_size - - train.clip_norm - - train.dropout - - train.warmup_steps + - ../../grants_tagger_light/training/cli_args/train_args.py: + - BertMeshTrainingArguments outs: - - ${models}/bertmesh/model/pytorch_model.bin - - ${models}/bertmesh/model/config.json - plots: - - train_metrics.json: - cache: false - evaluate: - cmd: python ${scripts_folder}/evaluate.py ${data}/processed/bertmesh/X_test.npy ${data}/processed/bertmesh/Y_test.npz ${models}/bertmesh/model/ --batch-size ${evaluate.batch_size} --results-path results.json --pr-curve-path pr_curve.json --experiment-name ${experiment_name} - deps: - - ${scripts_folder}/evaluate.py - - ${data}/processed/bertmesh/X_test.npy - - ${data}/processed/bertmesh/Y_test.npz - - ${models}/bertmesh/model/pytorch_model.bin - - ${models}/bertmesh/model/config.json - params: - - train.batch_size - metrics: - - results.json: - cache: false - plots: - - pr_curve.json: - cache: false + - ${output_dir}/best diff --git a/pipelines/bertmesh/params.yaml b/pipelines/bertmesh/params.yaml deleted file mode 100644 index a63dc724..00000000 --- a/pipelines/bertmesh/params.yaml +++ /dev/null @@ -1,16 +0,0 @@ -prepare_data: - years: 2016,2019 - test_years: 2020,2021 - -train: - learning_rate: 5e-5 - batch_size: 64 # with nn.DataParallel divide by 8 - epochs: 5 - pretrained_model: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract - hidden_size: 1024 - clip_norm: 5 - dropout: 0.1 - warmup_steps: 1000 - -evaluate: - batch_size: 8 diff --git a/pipelines/generate_grants/.gitignore b/pipelines/generate_grants/.gitignore new file mode 100644 index 00000000..df930645 --- /dev/null +++ b/pipelines/generate_grants/.gitignore @@ -0,0 +1 @@ +/grants_sample.jsonl diff --git a/pipelines/generate_grants/dvc.lock b/pipelines/generate_grants/dvc.lock new file mode 100644 index 00000000..8c6a5d0c --- /dev/null +++ b/pipelines/generate_grants/dvc.lock @@ -0,0 +1,18 @@ +schema: '2.0' +stages: + generate: + cmd: python scripts/create_xlinear_bertmesh_comparison_csv.py --s3-url s3://datalabs-data/dimensions/grants/grants + --num-parquet-files-to-consider 10 --num-samples-per-cat 10 --mesh-metadata-path + data/raw/desc2021.xml --mesh-terms-list-path data/grants_comparison/meshterms_list.txt + --active-portfolio-path data/raw/active_grants_last_5_years.csv --bertmesh-path + Wellcome/WellcomeBertMesh --bertmesh-thresh 0.5 --pre-annotate-bertmesh --xlinear-path + models/xlinear-0.2.5/model --xlinear-label-binarizer-path models/xlinear-0.2.5/label_binarizer.pkl + --xlinear-thresh 0.2 --pre-annotate-xlinear --output-path data/grants_comparison/comparison.csv + deps: + - path: scripts/create_xlinear_bertmesh_comparison_csv.py + md5: 0a91bf23be4068bdc7c4b7a32d80ff2d + size: 8214 + outs: + - path: data/grants_comparison/comparison.csv + md5: bc4fd9f4a670409dad07ffd03cf421f1 + size: 596654 diff --git a/pipelines/generate_grants/dvc.yaml b/pipelines/generate_grants/dvc.yaml index 8718757d..0a982435 100644 --- a/pipelines/generate_grants/dvc.yaml +++ b/pipelines/generate_grants/dvc.yaml @@ -1,13 +1,25 @@ vars: - s3-url: "s3://datalabs-data/dimensions/grants/grants" - - scripts_location: "../../scripts" - - argilla_project_name: "grants" stages: generate: - cmd: python ${scripts_location}/create_grants_sample.py --s3-url ${s3-url} --num-parquet-files-to-consider 10 --num-samples-per-cat 10 --pre-annotate True - outs: - - grants_sample.jsonl - upload: - cmd: python ${scripts_location}/upload_grants_data_to_argilla.py --path grants_sample.jsonl --project ${argilla_project_name} + cmd: >- + python scripts/create_xlinear_bertmesh_comparison_csv.py + --s3-url ${s3-url} + --num-parquet-files-to-consider 10 + --num-samples-per-cat 10 + --mesh-metadata-path data/raw/desc2021.xml + --mesh-terms-list-path data/grants_comparison/meshterms_list.txt + --active-portfolio-path data/raw/active_grants_last_5_years.csv + --bertmesh-path Wellcome/WellcomeBertMesh + --bertmesh-thresh 0.5 + --pre-annotate-bertmesh + --xlinear-path models/xlinear-0.2.5/model + --xlinear-label-binarizer-path models/xlinear-0.2.5/label_binarizer.pkl + --xlinear-thresh 0.2 + --pre-annotate-xlinear + --output-path data/grants_comparison/comparison.csv deps: - - grants_sample.jsonl + - scripts/create_xlinear_bertmesh_comparison_csv.py + wdir: "../.." + outs: + - data/grants_comparison/comparison.csv diff --git a/poetry.lock b/poetry.lock index c464b22f..a98613d6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -30,25 +30,25 @@ testing = ["datasets", "deepspeed", "evaluate", "parameterized", "pytest", "pyte [[package]] name = "aiobotocore" -version = "2.5.0" +version = "2.5.2" description = "Async client for aws services using botocore and aiohttp" optional = false python-versions = ">=3.7" files = [ - {file = "aiobotocore-2.5.0-py3-none-any.whl", hash = "sha256:9a2a022d7b78ec9a2af0de589916d2721cddbf96264401b78d7a73c1a1435f3b"}, - {file = "aiobotocore-2.5.0.tar.gz", hash = "sha256:6a5b397cddd4f81026aa91a14c7dd2650727425740a5af8ba75127ff663faf67"}, + {file = "aiobotocore-2.5.2-py3-none-any.whl", hash = "sha256:337429ffd3cc367532572d40be809a84c7b5335f3f8eca2f23e09dfaa9a9ef90"}, + {file = "aiobotocore-2.5.2.tar.gz", hash = "sha256:e7399f21570db1c287f1c0c814dd3475dfe1c8166722e2c77ce67f172cbcfa89"}, ] [package.dependencies] -aiohttp = ">=3.3.1" -aioitertools = ">=0.5.1" -boto3 = {version = ">=1.26.76,<1.26.77", optional = true, markers = "extra == \"boto3\""} -botocore = ">=1.29.76,<1.29.77" -wrapt = ">=1.10.10" +aiohttp = ">=3.3.1,<4.0.0" +aioitertools = ">=0.5.1,<1.0.0" +boto3 = {version = ">=1.26.161,<1.26.162", optional = true, markers = "extra == \"boto3\""} +botocore = ">=1.29.161,<1.29.162" +wrapt = ">=1.10.10,<2.0.0" [package.extras] -awscli = ["awscli (>=1.27.76,<1.27.77)"] -boto3 = ["boto3 (>=1.26.76,<1.26.77)"] +awscli = ["awscli (>=1.27.161,<1.27.162)"] +boto3 = ["boto3 (>=1.26.161,<1.26.162)"] [[package]] name = "aiohttp" @@ -183,6 +183,20 @@ files = [ {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, ] +[[package]] +name = "aioprocessing" +version = "2.0.1" +description = "A Python 3.5+ library that integrates the multiprocessing module with asyncio." +optional = false +python-versions = ">=3.5" +files = [ + {file = "aioprocessing-2.0.1-py3-none-any.whl", hash = "sha256:8fcac4b0108b72eb9df76e06a9d7e05720ee1e8330829d3fd53fa059879be586"}, + {file = "aioprocessing-2.0.1.tar.gz", hash = "sha256:fe01c7b1a38c78168611d3040e73d93036c3b7c8a649d636dc9ed7a3bc9b1ba2"}, +] + +[package.extras] +dill = ["multiprocess"] + [[package]] name = "aiosignal" version = "1.3.1" @@ -223,13 +237,13 @@ files = [ [[package]] name = "anyio" -version = "3.7.0" +version = "3.7.1" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.7" files = [ - {file = "anyio-3.7.0-py3-none-any.whl", hash = "sha256:eddca883c4175f14df8aedce21054bfca3adb70ffe76a9f607aef9d7fa2ea7f0"}, - {file = "anyio-3.7.0.tar.gz", hash = "sha256:275d9973793619a5374e1c89a4f4ad3f4b0a5510a2b5b939444bee8f4c4d37ce"}, + {file = "anyio-3.7.1-py3-none-any.whl", hash = "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5"}, + {file = "anyio-3.7.1.tar.gz", hash = "sha256:44a3c9aba0f5defa43261a8b3efb97891f2bd7d804e0e1f56419befa1adfc780"}, ] [package.dependencies] @@ -238,7 +252,7 @@ idna = ">=2.8" sniffio = ">=1.1" [package.extras] -doc = ["Sphinx (>=6.1.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme", "sphinxcontrib-jquery"] +doc = ["Sphinx", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-jquery"] test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] trio = ["trio (<0.22)"] @@ -297,13 +311,13 @@ files = [ [[package]] name = "asyncssh" -version = "2.13.1" +version = "2.13.2" description = "AsyncSSH: Asynchronous SSHv2 client and server library" optional = false python-versions = ">= 3.6" files = [ - {file = "asyncssh-2.13.1-py3-none-any.whl", hash = "sha256:c90eb5e2b4f9a7cc6e6af01fd844563d722c0d667f8c5f51fe5b3c2a79fa0575"}, - {file = "asyncssh-2.13.1.tar.gz", hash = "sha256:ebbb83c05c0b45cf230de1ef2f06059e360f9afa5c3ddf60fc92faf7b94ff887"}, + {file = "asyncssh-2.13.2-py3-none-any.whl", hash = "sha256:c7dfe9085c0659acb2ef0d177fb12421e92a20d52b98ab83eed4a5916a1d60cc"}, + {file = "asyncssh-2.13.2.tar.gz", hash = "sha256:991e531c4bb7dbec62b754878d96a3246338aac11a28ce3c3e99018fb2f5828c"}, ] [package.dependencies] @@ -408,36 +422,33 @@ files = [ [[package]] name = "black" -version = "23.3.0" +version = "23.7.0" description = "The uncompromising code formatter." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "black-23.3.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915"}, - {file = "black-23.3.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9"}, - {file = "black-23.3.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2"}, - {file = "black-23.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c"}, - {file = "black-23.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c"}, - {file = "black-23.3.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6"}, - {file = "black-23.3.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b"}, - {file = "black-23.3.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d"}, - {file = "black-23.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70"}, - {file = "black-23.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326"}, - {file = "black-23.3.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b"}, - {file = "black-23.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2"}, - {file = "black-23.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925"}, - {file = "black-23.3.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27"}, - {file = "black-23.3.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331"}, - {file = "black-23.3.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5"}, - {file = "black-23.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961"}, - {file = "black-23.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8"}, - {file = "black-23.3.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30"}, - {file = "black-23.3.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"}, - {file = "black-23.3.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266"}, - {file = "black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab"}, - {file = "black-23.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb"}, - {file = "black-23.3.0-py3-none-any.whl", hash = "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4"}, - {file = "black-23.3.0.tar.gz", hash = "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940"}, + {file = "black-23.7.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:5c4bc552ab52f6c1c506ccae05681fab58c3f72d59ae6e6639e8885e94fe2587"}, + {file = "black-23.7.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:552513d5cd5694590d7ef6f46e1767a4df9af168d449ff767b13b084c020e63f"}, + {file = "black-23.7.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:86cee259349b4448adb4ef9b204bb4467aae74a386bce85d56ba4f5dc0da27be"}, + {file = "black-23.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:501387a9edcb75d7ae8a4412bb8749900386eaef258f1aefab18adddea1936bc"}, + {file = "black-23.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb074d8b213749fa1d077d630db0d5f8cc3b2ae63587ad4116e8a436e9bbe995"}, + {file = "black-23.7.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:b5b0ee6d96b345a8b420100b7d71ebfdd19fab5e8301aff48ec270042cd40ac2"}, + {file = "black-23.7.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:893695a76b140881531062d48476ebe4a48f5d1e9388177e175d76234ca247cd"}, + {file = "black-23.7.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:c333286dc3ddca6fdff74670b911cccedacb4ef0a60b34e491b8a67c833b343a"}, + {file = "black-23.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831d8f54c3a8c8cf55f64d0422ee875eecac26f5f649fb6c1df65316b67c8926"}, + {file = "black-23.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:7f3bf2dec7d541b4619b8ce526bda74a6b0bffc480a163fed32eb8b3c9aed8ad"}, + {file = "black-23.7.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:f9062af71c59c004cd519e2fb8f5d25d39e46d3af011b41ab43b9c74e27e236f"}, + {file = "black-23.7.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:01ede61aac8c154b55f35301fac3e730baf0c9cf8120f65a9cd61a81cfb4a0c3"}, + {file = "black-23.7.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:327a8c2550ddc573b51e2c352adb88143464bb9d92c10416feb86b0f5aee5ff6"}, + {file = "black-23.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1c6022b86f83b632d06f2b02774134def5d4d4f1dac8bef16d90cda18ba28a"}, + {file = "black-23.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:27eb7a0c71604d5de083757fbdb245b1a4fae60e9596514c6ec497eb63f95320"}, + {file = "black-23.7.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:8417dbd2f57b5701492cd46edcecc4f9208dc75529bcf76c514864e48da867d9"}, + {file = "black-23.7.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:47e56d83aad53ca140da0af87678fb38e44fd6bc0af71eebab2d1f59b1acf1d3"}, + {file = "black-23.7.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:25cc308838fe71f7065df53aedd20327969d05671bac95b38fdf37ebe70ac087"}, + {file = "black-23.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:642496b675095d423f9b8448243336f8ec71c9d4d57ec17bf795b67f08132a91"}, + {file = "black-23.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:ad0014efc7acf0bd745792bd0d8857413652979200ab924fbf239062adc12491"}, + {file = "black-23.7.0-py3-none-any.whl", hash = "sha256:9fd59d418c60c0348505f2ddf9609c1e1de8e7493eab96198fc89d9f865e7a96"}, + {file = "black-23.7.0.tar.gz", hash = "sha256:022a582720b0d9480ed82576c920a8c1dde97cc38ff11d8d8859b3bd6ca9eedb"}, ] [package.dependencies] @@ -456,17 +467,17 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "boto3" -version = "1.26.76" +version = "1.26.161" description = "The AWS SDK for Python" optional = false python-versions = ">= 3.7" files = [ - {file = "boto3-1.26.76-py3-none-any.whl", hash = "sha256:b4c2969b7677762914394b8273cc1905dfe5b71f250741c1a575487ae357e729"}, - {file = "boto3-1.26.76.tar.gz", hash = "sha256:30c7d967ed1c6b5a05643e42cae9d4d36c3f1cb6782637ddc7007a104cfd9027"}, + {file = "boto3-1.26.161-py3-none-any.whl", hash = "sha256:f66e5c9dbe7f34383bcf64fa6070771355c11a44dd75c7f1279f2f37e1c89183"}, + {file = "boto3-1.26.161.tar.gz", hash = "sha256:662731e464d14af1035f44fc6a46b0e3112ee011ac0a5ed416d205daa3e15f25"}, ] [package.dependencies] -botocore = ">=1.29.76,<1.30.0" +botocore = ">=1.29.161,<1.30.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.6.0,<0.7.0" @@ -475,13 +486,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.29.76" +version = "1.29.161" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">= 3.7" files = [ - {file = "botocore-1.29.76-py3-none-any.whl", hash = "sha256:70735b00cd529f152992231ca6757e458e5ec25db43767b3526e9a35b2f143b7"}, - {file = "botocore-1.29.76.tar.gz", hash = "sha256:c2f67b6b3f8acf2968eafca06526f07b9fb0d27bac4c68a635d51abb675134a7"}, + {file = "botocore-1.29.161-py3-none-any.whl", hash = "sha256:b906999dd53dda2ef0ef6f7f55fcc81a4b06b9f1c8a9f65c546e0b981f959f5f"}, + {file = "botocore-1.29.161.tar.gz", hash = "sha256:a50edd715eb510343e27849f36483804aae4b871590db4d4996aa53368dcac40"}, ] [package.dependencies] @@ -647,97 +658,97 @@ files = [ [[package]] name = "charset-normalizer" -version = "3.1.0" +version = "3.2.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7.0" files = [ - {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-win32.whl", hash = "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448"}, - {file = "charset_normalizer-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-win32.whl", hash = "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909"}, - {file = "charset_normalizer-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974"}, - {file = "charset_normalizer-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-win32.whl", hash = "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0"}, - {file = "charset_normalizer-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-win32.whl", hash = "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1"}, - {file = "charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b"}, - {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"}, + {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, + {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, ] [[package]] name = "click" -version = "8.1.3" +version = "8.1.5" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" files = [ - {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, - {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, + {file = "click-8.1.5-py3-none-any.whl", hash = "sha256:e576aa487d679441d7d30abb87e1b43d24fc53bffb8758443b1a9e1cee504548"}, + {file = "click-8.1.5.tar.gz", hash = "sha256:4be4b1af8d665c6d942909916d31a213a106800c47d0eeba73d34da3cbc11367"}, ] [package.dependencies] @@ -862,30 +873,34 @@ six = "*" [[package]] name = "cryptography" -version = "41.0.1" +version = "41.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" files = [ - {file = "cryptography-41.0.1-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:f73bff05db2a3e5974a6fd248af2566134d8981fd7ab012e5dd4ddb1d9a70699"}, - {file = "cryptography-41.0.1-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:1a5472d40c8f8e91ff7a3d8ac6dfa363d8e3138b961529c996f3e2df0c7a411a"}, - {file = "cryptography-41.0.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fa01527046ca5facdf973eef2535a27fec4cb651e4daec4d043ef63f6ecd4ca"}, - {file = "cryptography-41.0.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b46e37db3cc267b4dea1f56da7346c9727e1209aa98487179ee8ebed09d21e43"}, - {file = "cryptography-41.0.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d198820aba55660b4d74f7b5fd1f17db3aa5eb3e6893b0a41b75e84e4f9e0e4b"}, - {file = "cryptography-41.0.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:948224d76c4b6457349d47c0c98657557f429b4e93057cf5a2f71d603e2fc3a3"}, - {file = "cryptography-41.0.1-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:059e348f9a3c1950937e1b5d7ba1f8e968508ab181e75fc32b879452f08356db"}, - {file = "cryptography-41.0.1-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:b4ceb5324b998ce2003bc17d519080b4ec8d5b7b70794cbd2836101406a9be31"}, - {file = "cryptography-41.0.1-cp37-abi3-win32.whl", hash = "sha256:8f4ab7021127a9b4323537300a2acfb450124b2def3756f64dc3a3d2160ee4b5"}, - {file = "cryptography-41.0.1-cp37-abi3-win_amd64.whl", hash = "sha256:1fee5aacc7367487b4e22484d3c7e547992ed726d14864ee33c0176ae43b0d7c"}, - {file = "cryptography-41.0.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9a6c7a3c87d595608a39980ebaa04d5a37f94024c9f24eb7d10262b92f739ddb"}, - {file = "cryptography-41.0.1-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5d092fdfedaec4cbbffbf98cddc915ba145313a6fdaab83c6e67f4e6c218e6f3"}, - {file = "cryptography-41.0.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1a8e6c2de6fbbcc5e14fd27fb24414507cb3333198ea9ab1258d916f00bc3039"}, - {file = "cryptography-41.0.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:cb33ccf15e89f7ed89b235cff9d49e2e62c6c981a6061c9c8bb47ed7951190bc"}, - {file = "cryptography-41.0.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f0ff6e18d13a3de56f609dd1fd11470918f770c6bd5d00d632076c727d35485"}, - {file = "cryptography-41.0.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7bfc55a5eae8b86a287747053140ba221afc65eb06207bedf6e019b8934b477c"}, - {file = "cryptography-41.0.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:eb8163f5e549a22888c18b0d53d6bb62a20510060a22fd5a995ec8a05268df8a"}, - {file = "cryptography-41.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:8dde71c4169ec5ccc1087bb7521d54251c016f126f922ab2dfe6649170a3b8c5"}, - {file = "cryptography-41.0.1.tar.gz", hash = "sha256:d34579085401d3f49762d2f7d6634d6b6c2ae1242202e860f4d26b046e3a1006"}, + {file = "cryptography-41.0.2-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:01f1d9e537f9a15b037d5d9ee442b8c22e3ae11ce65ea1f3316a41c78756b711"}, + {file = "cryptography-41.0.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:079347de771f9282fbfe0e0236c716686950c19dee1b76240ab09ce1624d76d7"}, + {file = "cryptography-41.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:439c3cc4c0d42fa999b83ded80a9a1fb54d53c58d6e59234cfe97f241e6c781d"}, + {file = "cryptography-41.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f14ad275364c8b4e525d018f6716537ae7b6d369c094805cae45300847e0894f"}, + {file = "cryptography-41.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:84609ade00a6ec59a89729e87a503c6e36af98ddcd566d5f3be52e29ba993182"}, + {file = "cryptography-41.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:49c3222bb8f8e800aead2e376cbef687bc9e3cb9b58b29a261210456a7783d83"}, + {file = "cryptography-41.0.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d73f419a56d74fef257955f51b18d046f3506270a5fd2ac5febbfa259d6c0fa5"}, + {file = "cryptography-41.0.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:2a034bf7d9ca894720f2ec1d8b7b5832d7e363571828037f9e0c4f18c1b58a58"}, + {file = "cryptography-41.0.2-cp37-abi3-win32.whl", hash = "sha256:d124682c7a23c9764e54ca9ab5b308b14b18eba02722b8659fb238546de83a76"}, + {file = "cryptography-41.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:9c3fe6534d59d071ee82081ca3d71eed3210f76ebd0361798c74abc2bcf347d4"}, + {file = "cryptography-41.0.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a719399b99377b218dac6cf547b6ec54e6ef20207b6165126a280b0ce97e0d2a"}, + {file = "cryptography-41.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:182be4171f9332b6741ee818ec27daff9fb00349f706629f5cbf417bd50e66fd"}, + {file = "cryptography-41.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7a9a3bced53b7f09da251685224d6a260c3cb291768f54954e28f03ef14e3766"}, + {file = "cryptography-41.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f0dc40e6f7aa37af01aba07277d3d64d5a03dc66d682097541ec4da03cc140ee"}, + {file = "cryptography-41.0.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:674b669d5daa64206c38e507808aae49904c988fa0a71c935e7006a3e1e83831"}, + {file = "cryptography-41.0.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7af244b012711a26196450d34f483357e42aeddb04128885d95a69bd8b14b69b"}, + {file = "cryptography-41.0.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9b6d717393dbae53d4e52684ef4f022444fc1cce3c48c38cb74fca29e1f08eaa"}, + {file = "cryptography-41.0.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:192255f539d7a89f2102d07d7375b1e0a81f7478925b3bc2e0549ebf739dae0e"}, + {file = "cryptography-41.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f772610fe364372de33d76edcd313636a25684edb94cee53fd790195f5989d14"}, + {file = "cryptography-41.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:b332cba64d99a70c1e0836902720887fb4529ea49ea7f5462cf6640e095e11d2"}, + {file = "cryptography-41.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9a6673c1828db6270b76b22cc696f40cde9043eb90373da5c2f8f2158957f42f"}, + {file = "cryptography-41.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:342f3767e25876751e14f8459ad85e77e660537ca0a066e10e75df9c9e9099f0"}, + {file = "cryptography-41.0.2.tar.gz", hash = "sha256:7d230bf856164de164ecb615ccc14c7fc6de6906ddd5b491f3af90d3514c925c"}, ] [package.dependencies] @@ -903,13 +918,13 @@ test-randomorder = ["pytest-randomly"] [[package]] name = "datasets" -version = "2.13.0" +version = "2.13.1" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.7.0" files = [ - {file = "datasets-2.13.0-py3-none-any.whl", hash = "sha256:26671d474990ad8fd7388e8c67cde4d72f6c1f0e87af685fc09af5d9a5992274"}, - {file = "datasets-2.13.0.tar.gz", hash = "sha256:b8c3bcf9c3d0c74f101c7645e42231de9f45206a2e742df15799da9bfa625608"}, + {file = "datasets-2.13.1-py3-none-any.whl", hash = "sha256:844d8dbc1759e0b6b8e5063af019dc95d6af07ea075002b03323a280bf8d53f6"}, + {file = "datasets-2.13.1.tar.gz", hash = "sha256:bacb7750b1a434417312b4281a55225a3f7e0163abdd12a2a3e2d700310d5221"}, ] [package.dependencies] @@ -1004,13 +1019,13 @@ files = [ [[package]] name = "distlib" -version = "0.3.6" +version = "0.3.7" description = "Distribution utilities" optional = false python-versions = "*" files = [ - {file = "distlib-0.3.6-py2.py3-none-any.whl", hash = "sha256:f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e"}, - {file = "distlib-0.3.6.tar.gz", hash = "sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46"}, + {file = "distlib-0.3.7-py2.py3-none-any.whl", hash = "sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057"}, + {file = "distlib-0.3.7.tar.gz", hash = "sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8"}, ] [[package]] @@ -1024,6 +1039,20 @@ files = [ {file = "distro-1.8.0.tar.gz", hash = "sha256:02e111d1dc6a50abb8eed6bf31c3e48ed8b0830d1ea2a1b78c61765c2513fdd8"}, ] +[[package]] +name = "docker-pycreds" +version = "0.4.0" +description = "Python bindings for the docker credentials store API" +optional = false +python-versions = "*" +files = [ + {file = "docker-pycreds-0.4.0.tar.gz", hash = "sha256:6ce3270bcaf404cc4c3e27e4b6c70d3521deae82fb508767870fdbf772d584d4"}, + {file = "docker_pycreds-0.4.0-py2.py3-none-any.whl", hash = "sha256:7266112468627868005106ec19cd0d722702d2b7d5912a28e19b826c3d37af49"}, +] + +[package.dependencies] +six = ">=1.4.0" + [[package]] name = "dpath" version = "2.1.6" @@ -1224,13 +1253,13 @@ tests = ["dvc[testing]", "flaky (==3.7.0)", "mypy (==0.910)", "pylint (==2.15.9) [[package]] name = "dvc-objects" -version = "0.23.0" +version = "0.23.1" description = "dvc objects" optional = false python-versions = ">=3.8" files = [ - {file = "dvc-objects-0.23.0.tar.gz", hash = "sha256:9e6a470eb910f3e2bd3156961e4d8617c51be9a464ffce5d8938cb2bc5bfddb3"}, - {file = "dvc_objects-0.23.0-py3-none-any.whl", hash = "sha256:6d4583a56d562cdc4b534790d3963a4401ffa4dde4f50685f3cc4d34c75d2e9b"}, + {file = "dvc-objects-0.23.1.tar.gz", hash = "sha256:159ec9bede7443fbcbc64d33e53071ae51bea86fc82a764ab652655c35b58776"}, + {file = "dvc_objects-0.23.1-py3-none-any.whl", hash = "sha256:118640f4cf83415cd2bf104be39712660e470a9ac061e55ac688f7ab703677c4"}, ] [package.dependencies] @@ -1285,13 +1314,13 @@ tests = ["Pygments (==2.10.0)", "collective.checkdocs (==0.2)", "dvc[testing]", [[package]] name = "dvc-studio-client" -version = "0.10.0" +version = "0.11.0" description = "Small library to post data from DVC/DVCLive to Iterative Studio" optional = false python-versions = ">=3.8" files = [ - {file = "dvc-studio-client-0.10.0.tar.gz", hash = "sha256:1d6c70d7c802e150a61e71c0205555a972898c340f25ab1f759564bd88c0f248"}, - {file = "dvc_studio_client-0.10.0-py3-none-any.whl", hash = "sha256:3b39eb242914b034c3e3634d74ded17cfa2de1d55473c833a511324bae870782"}, + {file = "dvc-studio-client-0.11.0.tar.gz", hash = "sha256:9179acc39bb9acfb54a5369142c835dc2428bd285e41281b005739ce63d9d55b"}, + {file = "dvc_studio_client-0.11.0-py3-none-any.whl", hash = "sha256:2832fe0bdf723dbe51320abcde238bd0a1e1a3befa15c1f05cc9ed2ca25fb39f"}, ] [package.dependencies] @@ -1329,13 +1358,13 @@ tests = ["celery-types (==0.15.0)", "flaky (==3.7.0)", "mypy (==0.971)", "pylint [[package]] name = "exceptiongroup" -version = "1.1.1" +version = "1.1.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"}, - {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"}, + {file = "exceptiongroup-1.1.2-py3-none-any.whl", hash = "sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f"}, + {file = "exceptiongroup-1.1.2.tar.gz", hash = "sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5"}, ] [package.extras] @@ -1372,100 +1401,87 @@ six = ">=1.12,<2.0" [[package]] name = "flufl-lock" -version = "7.1.1" +version = "8.0.1" description = "NFS-safe file locking with timeouts for POSIX and Windows" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "flufl.lock-7.1.1-py3-none-any.whl", hash = "sha256:96d2c0448ba9fd8fc65d5d681ed7217c8e1625149c1c880bba50559bb680a615"}, - {file = "flufl.lock-7.1.1.tar.gz", hash = "sha256:af14172b35bbc58687bd06b70d1693fd8d48cbf0ffde7e51a618c148ae24042d"}, + {file = "flufl_lock-8.0.1-py3-none-any.whl", hash = "sha256:a3df854d76173d59813fdcba91671234b59e2a14db3390793745c77a7bb92d9d"}, + {file = "flufl_lock-8.0.1.tar.gz", hash = "sha256:edb7f1f3f8b4805ef6a6a23b9a3975bfc9b7c15eb33e10b0b086d0caa2a97e04"}, ] [package.dependencies] -atpublic = ">=2.3" -psutil = ">=5.9.0" +atpublic = "*" +psutil = "*" [[package]] name = "frozenlist" -version = "1.3.3" +version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff8bf625fe85e119553b5383ba0fb6aa3d0ec2ae980295aaefa552374926b3f4"}, - {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dfbac4c2dfcc082fcf8d942d1e49b6aa0766c19d3358bd86e2000bf0fa4a9cf0"}, - {file = "frozenlist-1.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b1c63e8d377d039ac769cd0926558bb7068a1f7abb0f003e3717ee003ad85530"}, - {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fdfc24dcfce5b48109867c13b4cb15e4660e7bd7661741a391f821f23dfdca7"}, - {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c926450857408e42f0bbc295e84395722ce74bae69a3b2aa2a65fe22cb14b99"}, - {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1841e200fdafc3d51f974d9d377c079a0694a8f06de2e67b48150328d66d5483"}, - {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f470c92737afa7d4c3aacc001e335062d582053d4dbe73cda126f2d7031068dd"}, - {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:783263a4eaad7c49983fe4b2e7b53fa9770c136c270d2d4bbb6d2192bf4d9caf"}, - {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:924620eef691990dfb56dc4709f280f40baee568c794b5c1885800c3ecc69816"}, - {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ae4dc05c465a08a866b7a1baf360747078b362e6a6dbeb0c57f234db0ef88ae0"}, - {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:bed331fe18f58d844d39ceb398b77d6ac0b010d571cba8267c2e7165806b00ce"}, - {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:02c9ac843e3390826a265e331105efeab489ffaf4dd86384595ee8ce6d35ae7f"}, - {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9545a33965d0d377b0bc823dcabf26980e77f1b6a7caa368a365a9497fb09420"}, - {file = "frozenlist-1.3.3-cp310-cp310-win32.whl", hash = "sha256:d5cd3ab21acbdb414bb6c31958d7b06b85eeb40f66463c264a9b343a4e238642"}, - {file = "frozenlist-1.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:b756072364347cb6aa5b60f9bc18e94b2f79632de3b0190253ad770c5df17db1"}, - {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b4395e2f8d83fbe0c627b2b696acce67868793d7d9750e90e39592b3626691b7"}, - {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14143ae966a6229350021384870458e4777d1eae4c28d1a7aa47f24d030e6678"}, - {file = "frozenlist-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d8860749e813a6f65bad8285a0520607c9500caa23fea6ee407e63debcdbef6"}, - {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23d16d9f477bb55b6154654e0e74557040575d9d19fe78a161bd33d7d76808e8"}, - {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb82dbba47a8318e75f679690190c10a5e1f447fbf9df41cbc4c3afd726d88cb"}, - {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9309869032abb23d196cb4e4db574232abe8b8be1339026f489eeb34a4acfd91"}, - {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a97b4fe50b5890d36300820abd305694cb865ddb7885049587a5678215782a6b"}, - {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c188512b43542b1e91cadc3c6c915a82a5eb95929134faf7fd109f14f9892ce4"}, - {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:303e04d422e9b911a09ad499b0368dc551e8c3cd15293c99160c7f1f07b59a48"}, - {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0771aed7f596c7d73444c847a1c16288937ef988dc04fb9f7be4b2aa91db609d"}, - {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:66080ec69883597e4d026f2f71a231a1ee9887835902dbe6b6467d5a89216cf6"}, - {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:41fe21dc74ad3a779c3d73a2786bdf622ea81234bdd4faf90b8b03cad0c2c0b4"}, - {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f20380df709d91525e4bee04746ba612a4df0972c1b8f8e1e8af997e678c7b81"}, - {file = "frozenlist-1.3.3-cp311-cp311-win32.whl", hash = "sha256:f30f1928162e189091cf4d9da2eac617bfe78ef907a761614ff577ef4edfb3c8"}, - {file = "frozenlist-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:a6394d7dadd3cfe3f4b3b186e54d5d8504d44f2d58dcc89d693698e8b7132b32"}, - {file = "frozenlist-1.3.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8df3de3a9ab8325f94f646609a66cbeeede263910c5c0de0101079ad541af332"}, - {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0693c609e9742c66ba4870bcee1ad5ff35462d5ffec18710b4ac89337ff16e27"}, - {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd4210baef299717db0a600d7a3cac81d46ef0e007f88c9335db79f8979c0d3d"}, - {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:394c9c242113bfb4b9aa36e2b80a05ffa163a30691c7b5a29eba82e937895d5e"}, - {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6327eb8e419f7d9c38f333cde41b9ae348bec26d840927332f17e887a8dcb70d"}, - {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e24900aa13212e75e5b366cb9065e78bbf3893d4baab6052d1aca10d46d944c"}, - {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3843f84a6c465a36559161e6c59dce2f2ac10943040c2fd021cfb70d58c4ad56"}, - {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:84610c1502b2461255b4c9b7d5e9c48052601a8957cd0aea6ec7a7a1e1fb9420"}, - {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:c21b9aa40e08e4f63a2f92ff3748e6b6c84d717d033c7b3438dd3123ee18f70e"}, - {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:efce6ae830831ab6a22b9b4091d411698145cb9b8fc869e1397ccf4b4b6455cb"}, - {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:40de71985e9042ca00b7953c4f41eabc3dc514a2d1ff534027f091bc74416401"}, - {file = "frozenlist-1.3.3-cp37-cp37m-win32.whl", hash = "sha256:180c00c66bde6146a860cbb81b54ee0df350d2daf13ca85b275123bbf85de18a"}, - {file = "frozenlist-1.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:9bbbcedd75acdfecf2159663b87f1bb5cfc80e7cd99f7ddd9d66eb98b14a8411"}, - {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:034a5c08d36649591be1cbb10e09da9f531034acfe29275fc5454a3b101ce41a"}, - {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ba64dc2b3b7b158c6660d49cdb1d872d1d0bf4e42043ad8d5006099479a194e5"}, - {file = "frozenlist-1.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:47df36a9fe24054b950bbc2db630d508cca3aa27ed0566c0baf661225e52c18e"}, - {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:008a054b75d77c995ea26629ab3a0c0d7281341f2fa7e1e85fa6153ae29ae99c"}, - {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:841ea19b43d438a80b4de62ac6ab21cfe6827bb8a9dc62b896acc88eaf9cecba"}, - {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e235688f42b36be2b6b06fc37ac2126a73b75fb8d6bc66dd632aa35286238703"}, - {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca713d4af15bae6e5d79b15c10c8522859a9a89d3b361a50b817c98c2fb402a2"}, - {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ac5995f2b408017b0be26d4a1d7c61bce106ff3d9e3324374d66b5964325448"}, - {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a4ae8135b11652b08a8baf07631d3ebfe65a4c87909dbef5fa0cdde440444ee4"}, - {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4ea42116ceb6bb16dbb7d526e242cb6747b08b7710d9782aa3d6732bd8d27649"}, - {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:810860bb4bdce7557bc0febb84bbd88198b9dbc2022d8eebe5b3590b2ad6c842"}, - {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ee78feb9d293c323b59a6f2dd441b63339a30edf35abcb51187d2fc26e696d13"}, - {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0af2e7c87d35b38732e810befb9d797a99279cbb85374d42ea61c1e9d23094b3"}, - {file = "frozenlist-1.3.3-cp38-cp38-win32.whl", hash = "sha256:899c5e1928eec13fd6f6d8dc51be23f0d09c5281e40d9cf4273d188d9feeaf9b"}, - {file = "frozenlist-1.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:7f44e24fa70f6fbc74aeec3e971f60a14dde85da364aa87f15d1be94ae75aeef"}, - {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2b07ae0c1edaa0a36339ec6cce700f51b14a3fc6545fdd32930d2c83917332cf"}, - {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ebb86518203e12e96af765ee89034a1dbb0c3c65052d1b0c19bbbd6af8a145e1"}, - {file = "frozenlist-1.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5cf820485f1b4c91e0417ea0afd41ce5cf5965011b3c22c400f6d144296ccbc0"}, - {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c11e43016b9024240212d2a65043b70ed8dfd3b52678a1271972702d990ac6d"}, - {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8fa3c6e3305aa1146b59a09b32b2e04074945ffcfb2f0931836d103a2c38f936"}, - {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:352bd4c8c72d508778cf05ab491f6ef36149f4d0cb3c56b1b4302852255d05d5"}, - {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65a5e4d3aa679610ac6e3569e865425b23b372277f89b5ef06cf2cdaf1ebf22b"}, - {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e2c1185858d7e10ff045c496bbf90ae752c28b365fef2c09cf0fa309291669"}, - {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f163d2fd041c630fed01bc48d28c3ed4a3b003c00acd396900e11ee5316b56bb"}, - {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:05cdb16d09a0832eedf770cb7bd1fe57d8cf4eaf5aced29c4e41e3f20b30a784"}, - {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:8bae29d60768bfa8fb92244b74502b18fae55a80eac13c88eb0b496d4268fd2d"}, - {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eedab4c310c0299961ac285591acd53dc6723a1ebd90a57207c71f6e0c2153ab"}, - {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3bbdf44855ed8f0fbcd102ef05ec3012d6a4fd7c7562403f76ce6a52aeffb2b1"}, - {file = "frozenlist-1.3.3-cp39-cp39-win32.whl", hash = "sha256:efa568b885bca461f7c7b9e032655c0c143d305bf01c30caf6db2854a4532b38"}, - {file = "frozenlist-1.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:cfe33efc9cb900a4c46f91a5ceba26d6df370ffddd9ca386eb1d4f0ad97b9ea9"}, - {file = "frozenlist-1.3.3.tar.gz", hash = "sha256:58bcc55721e8a90b88332d6cd441261ebb22342e238296bb330968952fbb3a6a"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"}, + {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"}, + {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"}, + {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"}, + {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"}, + {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"}, + {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"}, + {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"}, + {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"}, + {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, ] [[package]] @@ -1534,13 +1550,13 @@ smmap = ">=3.0.1,<6" [[package]] name = "gitpython" -version = "3.1.31" +version = "3.1.32" description = "GitPython is a Python library used to interact with Git repositories" optional = false python-versions = ">=3.7" files = [ - {file = "GitPython-3.1.31-py3-none-any.whl", hash = "sha256:f04893614f6aa713a60cbbe1e6a97403ef633103cdd0ef5eb6efe0deb98dbe8d"}, - {file = "GitPython-3.1.31.tar.gz", hash = "sha256:8ce3bcf69adfdf7c7d503e78fd3b1c492af782d58893b650adb2ac8912ddd573"}, + {file = "GitPython-3.1.32-py3-none-any.whl", hash = "sha256:e3d59b1c2c6ebb9dfa7a184daf3b6dd4914237e7488a1730a6d8f6f5d0b4187f"}, + {file = "GitPython-3.1.32.tar.gz", hash = "sha256:8d9b8cb1e80b9735e8717c9362079d3ce4c6e5ddeebedd0361b228c3a67a62f6"}, ] [package.dependencies] @@ -1620,13 +1636,13 @@ socks = ["socksio (==1.*)"] [[package]] name = "huggingface-hub" -version = "0.15.1" +version = "0.16.4" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.7.0" files = [ - {file = "huggingface_hub-0.15.1-py3-none-any.whl", hash = "sha256:05b0fb0abbf1f625dfee864648ac3049fe225ac4371c7bafaca0c2d3a2f83445"}, - {file = "huggingface_hub-0.15.1.tar.gz", hash = "sha256:a61b7d1a7769fe10119e730277c72ab99d95c48d86a3d6da3e9f3d0f632a4081"}, + {file = "huggingface_hub-0.16.4-py3-none-any.whl", hash = "sha256:0d3df29932f334fead024afc7cb4cc5149d955238b8b5e42dcf9740d6995a349"}, + {file = "huggingface_hub-0.16.4.tar.gz", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"}, ] [package.dependencies] @@ -1639,15 +1655,16 @@ tqdm = ">=4.42.1" typing-extensions = ">=3.7.4.3" [package.extras] -all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pytest", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] cli = ["InquirerPy (==0.3.4)"] -dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pytest", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +inference = ["aiohttp", "pydantic"] quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"] tensorflow = ["graphviz", "pydot", "tensorflow"] -testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "gradio", "jedi", "numpy", "pytest", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] torch = ["torch"] -typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] +typing = ["pydantic", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] [[package]] name = "hydra-core" @@ -1769,13 +1786,13 @@ files = [ [[package]] name = "joblib" -version = "1.2.0" +version = "1.3.1" description = "Lightweight pipelining with Python functions" optional = false python-versions = ">=3.7" files = [ - {file = "joblib-1.2.0-py3-none-any.whl", hash = "sha256:091138ed78f800342968c523bdde947e7a305b8594b910a0fea2ab83c3c6d385"}, - {file = "joblib-1.2.0.tar.gz", hash = "sha256:e1cee4a79e4af22881164f218d4311f60074197fb707e082e803b61f6d137018"}, + {file = "joblib-1.3.1-py3-none-any.whl", hash = "sha256:89cf0529520e01b3de7ac7b74a8102c90d16d54c64b5dd98cafcd14307fdf915"}, + {file = "joblib-1.3.1.tar.gz", hash = "sha256:1f937906df65329ba98013dc9692fe22a4c5e4a648112de500508b18a21b41e3"}, ] [[package]] @@ -1845,6 +1862,24 @@ files = [ {file = "lit-16.0.6.tar.gz", hash = "sha256:84623c9c23b6b14763d637f4e63e6b721b3446ada40bf7001d8fee70b8e77a9a"}, ] +[[package]] +name = "loguru" +version = "0.7.0" +description = "Python logging made (stupidly) simple" +optional = false +python-versions = ">=3.5" +files = [ + {file = "loguru-0.7.0-py3-none-any.whl", hash = "sha256:b93aa30099fa6860d4727f1b81f8718e965bb96253fa190fab2077aaad6d15d3"}, + {file = "loguru-0.7.0.tar.gz", hash = "sha256:1612053ced6ae84d7959dd7d5e431a0532642237ec21f7fd83ac73fe539e03e1"}, +] + +[package.dependencies] +colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} +win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} + +[package.extras] +dev = ["Sphinx (==5.3.0)", "colorama (==0.4.5)", "colorama (==0.4.6)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v0.990)", "pre-commit (==3.2.1)", "pytest (==6.1.2)", "pytest (==7.2.1)", "pytest-cov (==2.12.1)", "pytest-cov (==4.0.0)", "pytest-mypy-plugins (==1.10.1)", "pytest-mypy-plugins (==1.9.3)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.2.0)", "tox (==3.27.1)", "tox (==4.4.6)"] + [[package]] name = "markupsafe" version = "2.1.3" @@ -2146,59 +2181,97 @@ files = [ antlr4-python3-runtime = "==4.9.*" PyYAML = ">=5.1.0" +[[package]] +name = "openai" +version = "0.27.8" +description = "Python client library for the OpenAI API" +optional = false +python-versions = ">=3.7.1" +files = [ + {file = "openai-0.27.8-py3-none-any.whl", hash = "sha256:e0a7c2f7da26bdbe5354b03c6d4b82a2f34bd4458c7a17ae1a7092c3e397e03c"}, + {file = "openai-0.27.8.tar.gz", hash = "sha256:2483095c7db1eee274cebac79e315a986c4e55207bb4fa7b82d185b3a2ed9536"}, +] + +[package.dependencies] +aiohttp = "*" +requests = ">=2.20" +tqdm = "*" + +[package.extras] +datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"] +embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] +wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] + +[[package]] +name = "openai-multi-client" +version = "0.1.1" +description = "A parallel client for OpenAI API (and more)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "openai_multi_client-0.1.1-py3-none-any.whl", hash = "sha256:894bb336ea63cf555693a80988e830a2c4dfbe669ab2076100073a854c7d7835"}, + {file = "openai_multi_client-0.1.1.tar.gz", hash = "sha256:0433b7fa717f522ab403ba4b689ff09220e843aa2c2c1251a6f9f14545600e49"}, +] + +[package.dependencies] +aioprocessing = "*" +openai = "*" +tenacity = "*" + [[package]] name = "orjson" -version = "3.9.1" +version = "3.9.2" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" optional = false python-versions = ">=3.7" files = [ - {file = "orjson-3.9.1-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:c4434b7b786fdc394b95d029fb99949d7c2b05bbd4bf5cb5e3906be96ffeee3b"}, - {file = "orjson-3.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09faf14f74ed47e773fa56833be118e04aa534956f661eb491522970b7478e3b"}, - {file = "orjson-3.9.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:503eb86a8d53a187fe66aa80c69295a3ca35475804da89a9547e4fce5f803822"}, - {file = "orjson-3.9.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:20f2804b5a1dbd3609c086041bd243519224d47716efd7429db6c03ed28b7cc3"}, - {file = "orjson-3.9.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fd828e0656615a711c4cc4da70f3cac142e66a6703ba876c20156a14e28e3fa"}, - {file = "orjson-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec53d648176f873203b9c700a0abacab33ca1ab595066e9d616f98cdc56f4434"}, - {file = "orjson-3.9.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e186ae76b0d97c505500664193ddf508c13c1e675d9b25f1f4414a7606100da6"}, - {file = "orjson-3.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d4edee78503016f4df30aeede0d999b3cb11fb56f47e9db0e487bce0aaca9285"}, - {file = "orjson-3.9.1-cp310-none-win_amd64.whl", hash = "sha256:a4cc5d21e68af982d9a2528ac61e604f092c60eed27aef3324969c68f182ec7e"}, - {file = "orjson-3.9.1-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:761b6efd33c49de20dd73ce64cc59da62c0dab10aa6015f582680e0663cc792c"}, - {file = "orjson-3.9.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31229f9d0b8dc2ef7ee7e4393f2e4433a28e16582d4b25afbfccc9d68dc768f8"}, - {file = "orjson-3.9.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0b7ab18d55ecb1de543d452f0a5f8094b52282b916aa4097ac11a4c79f317b86"}, - {file = "orjson-3.9.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db774344c39041f4801c7dfe03483df9203cbd6c84e601a65908e5552228dd25"}, - {file = "orjson-3.9.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ae47ef8c0fe89c4677db7e9e1fb2093ca6e66c3acbee5442d84d74e727edad5e"}, - {file = "orjson-3.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:103952c21575b9805803c98add2eaecd005580a1e746292ed2ec0d76dd3b9746"}, - {file = "orjson-3.9.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2cb0121e6f2c9da3eddf049b99b95fef0adf8480ea7cb544ce858706cdf916eb"}, - {file = "orjson-3.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:24d4ddaa2876e657c0fd32902b5c451fd2afc35159d66a58da7837357044b8c2"}, - {file = "orjson-3.9.1-cp311-none-win_amd64.whl", hash = "sha256:0b53b5f72cf536dd8aa4fc4c95e7e09a7adb119f8ff8ee6cc60f735d7740ad6a"}, - {file = "orjson-3.9.1-cp37-cp37m-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:d4b68d01a506242316a07f1d2f29fb0a8b36cee30a7c35076f1ef59dce0890c1"}, - {file = "orjson-3.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9dd4abe6c6fd352f00f4246d85228f6a9847d0cc14f4d54ee553718c225388f"}, - {file = "orjson-3.9.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9e20bca5e13041e31ceba7a09bf142e6d63c8a7467f5a9c974f8c13377c75af2"}, - {file = "orjson-3.9.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8ae0467d01eb1e4bcffef4486d964bfd1c2e608103e75f7074ed34be5df48cc"}, - {file = "orjson-3.9.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:06f6ab4697fab090517f295915318763a97a12ee8186054adf21c1e6f6abbd3d"}, - {file = "orjson-3.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8515867713301fa065c58ec4c9053ba1a22c35113ab4acad555317b8fd802e50"}, - {file = "orjson-3.9.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:393d0697d1dfa18d27d193e980c04fdfb672c87f7765b87952f550521e21b627"}, - {file = "orjson-3.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d96747662d3666f79119e5d28c124e7d356c7dc195cd4b09faea4031c9079dc9"}, - {file = "orjson-3.9.1-cp37-none-win_amd64.whl", hash = "sha256:6d173d3921dd58a068c88ec22baea7dbc87a137411501618b1292a9d6252318e"}, - {file = "orjson-3.9.1-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:d1c2b0b4246c992ce2529fc610a446b945f1429445ece1c1f826a234c829a918"}, - {file = "orjson-3.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19f70ba1f441e1c4bb1a581f0baa092e8b3e3ce5b2aac2e1e090f0ac097966da"}, - {file = "orjson-3.9.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:375d65f002e686212aac42680aed044872c45ee4bc656cf63d4a215137a6124a"}, - {file = "orjson-3.9.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4751cee4a7b1daeacb90a7f5adf2170ccab893c3ab7c5cea58b45a13f89b30b3"}, - {file = "orjson-3.9.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78d9a2a4b2302d5ebc3695498ebc305c3568e5ad4f3501eb30a6405a32d8af22"}, - {file = "orjson-3.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46b4facc32643b2689dfc292c0c463985dac4b6ab504799cf51fc3c6959ed668"}, - {file = "orjson-3.9.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ec7c8a0f1bf35da0d5fd14f8956f3b82a9a6918a3c6963d718dfd414d6d3b604"}, - {file = "orjson-3.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d3a40b0fbe06ccd4d6a99e523d20b47985655bcada8d1eba485b1b32a43e4904"}, - {file = "orjson-3.9.1-cp38-none-win_amd64.whl", hash = "sha256:402f9d3edfec4560a98880224ec10eba4c5f7b4791e4bc0d4f4d8df5faf2a006"}, - {file = "orjson-3.9.1-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:49c0d78dcd34626e2e934f1192d7c052b94e0ecadc5f386fd2bda6d2e03dadf5"}, - {file = "orjson-3.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:125f63e56d38393daa0a1a6dc6fedefca16c538614b66ea5997c3bd3af35ef26"}, - {file = "orjson-3.9.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:08927970365d2e1f3ce4894f9ff928a7b865d53f26768f1bbdd85dd4fee3e966"}, - {file = "orjson-3.9.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f9a744e212d4780ecd67f4b6b128b2e727bee1df03e7059cddb2dfe1083e7dc4"}, - {file = "orjson-3.9.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d1dbf36db7240c61eec98c8d21545d671bce70be0730deb2c0d772e06b71af3"}, - {file = "orjson-3.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80a1e384626f76b66df615f7bb622a79a25c166d08c5d2151ffd41f24c4cc104"}, - {file = "orjson-3.9.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:15d28872fb055bf17ffca913826e618af61b2f689d2b170f72ecae1a86f80d52"}, - {file = "orjson-3.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1e4d905338f9ef32c67566929dfbfbb23cc80287af8a2c38930fb0eda3d40b76"}, - {file = "orjson-3.9.1-cp39-none-win_amd64.whl", hash = "sha256:48a27da6c7306965846565cc385611d03382bbd84120008653aa2f6741e2105d"}, - {file = "orjson-3.9.1.tar.gz", hash = "sha256:db373a25ec4a4fccf8186f9a72a1b3442837e40807a736a815ab42481e83b7d0"}, + {file = "orjson-3.9.2-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:7323e4ca8322b1ecb87562f1ec2491831c086d9faa9a6c6503f489dadbed37d7"}, + {file = "orjson-3.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1272688ea1865f711b01ba479dea2d53e037ea00892fd04196b5875f7021d9d3"}, + {file = "orjson-3.9.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0b9a26f1d1427a9101a1e8910f2e2df1f44d3d18ad5480ba031b15d5c1cb282e"}, + {file = "orjson-3.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6a5ca55b0d8f25f18b471e34abaee4b175924b6cd62f59992945b25963443141"}, + {file = "orjson-3.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:877872db2c0f41fbe21f852ff642ca842a43bc34895b70f71c9d575df31fffb4"}, + {file = "orjson-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a39c2529d75373b7167bf84c814ef9b8f3737a339c225ed6c0df40736df8748"}, + {file = "orjson-3.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:84ebd6fdf138eb0eb4280045442331ee71c0aab5e16397ba6645f32f911bfb37"}, + {file = "orjson-3.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5a60a1cfcfe310547a1946506dd4f1ed0a7d5bd5b02c8697d9d5dcd8d2e9245e"}, + {file = "orjson-3.9.2-cp310-none-win_amd64.whl", hash = "sha256:c290c4f81e8fd0c1683638802c11610b2f722b540f8e5e858b6914b495cf90c8"}, + {file = "orjson-3.9.2-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:02ef014f9a605e84b675060785e37ec9c0d2347a04f1307a9d6840ab8ecd6f55"}, + {file = "orjson-3.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:992af54265ada1c1579500d6594ed73fe333e726de70d64919cf37f93defdd06"}, + {file = "orjson-3.9.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a40958f7af7c6d992ee67b2da4098dca8b770fc3b4b3834d540477788bfa76d3"}, + {file = "orjson-3.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93864dec3e3dd058a2dbe488d11ac0345214a6a12697f53a63e34de7d28d4257"}, + {file = "orjson-3.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16fdf5a82df80c544c3c91516ab3882cd1ac4f1f84eefeafa642e05cef5f6699"}, + {file = "orjson-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:275b5a18fd9ed60b2720543d3ddac170051c43d680e47d04ff5203d2c6d8ebf1"}, + {file = "orjson-3.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b9aea6dcb99fcbc9f6d1dd84fca92322fda261da7fb014514bb4689c7c2097a8"}, + {file = "orjson-3.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7d74ae0e101d17c22ef67b741ba356ab896fc0fa64b301c2bf2bb0a4d874b190"}, + {file = "orjson-3.9.2-cp311-none-win_amd64.whl", hash = "sha256:6320b28e7bdb58c3a3a5efffe04b9edad3318d82409e84670a9b24e8035a249d"}, + {file = "orjson-3.9.2-cp37-cp37m-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:368e9cc91ecb7ac21f2aa475e1901204110cf3e714e98649c2502227d248f947"}, + {file = "orjson-3.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58e9e70f0dcd6a802c35887f306b555ff7a214840aad7de24901fc8bd9cf5dde"}, + {file = "orjson-3.9.2-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00c983896c2e01c94c0ef72fd7373b2aa06d0c0eed0342c4884559f812a6835b"}, + {file = "orjson-3.9.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ee743e8890b16c87a2f89733f983370672272b61ee77429c0a5899b2c98c1a7"}, + {file = "orjson-3.9.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7b065942d362aad4818ff599d2f104c35a565c2cbcbab8c09ec49edba91da75"}, + {file = "orjson-3.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e46e9c5b404bb9e41d5555762fd410d5466b7eb1ec170ad1b1609cbebe71df21"}, + {file = "orjson-3.9.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8170157288714678ffd64f5de33039e1164a73fd8b6be40a8a273f80093f5c4f"}, + {file = "orjson-3.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e3e2f087161947dafe8319ea2cfcb9cea4bb9d2172ecc60ac3c9738f72ef2909"}, + {file = "orjson-3.9.2-cp37-none-win_amd64.whl", hash = "sha256:d7de3dbbe74109ae598692113cec327fd30c5a30ebca819b21dfa4052f7b08ef"}, + {file = "orjson-3.9.2-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8cd4385c59bbc1433cad4a80aca65d2d9039646a9c57f8084897549b55913b17"}, + {file = "orjson-3.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a74036aab1a80c361039290cdbc51aa7adc7ea13f56e5ef94e9be536abd227bd"}, + {file = "orjson-3.9.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1aaa46d7d4ae55335f635eadc9be0bd9bcf742e6757209fc6dc697e390010adc"}, + {file = "orjson-3.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e52c67ed6bb368083aa2078ea3ccbd9721920b93d4b06c43eb4e20c4c860046"}, + {file = "orjson-3.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a6cdfcf9c7dd4026b2b01fdff56986251dc0cc1e980c690c79eec3ae07b36e7"}, + {file = "orjson-3.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1882a70bb69595b9ec5aac0040a819e94d2833fe54901e2b32f5e734bc259a8b"}, + {file = "orjson-3.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:fc05e060d452145ab3c0b5420769e7356050ea311fc03cb9d79c481982917cca"}, + {file = "orjson-3.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f8bc2c40d9bb26efefb10949d261a47ca196772c308babc538dd9f4b73e8d386"}, + {file = "orjson-3.9.2-cp38-none-win_amd64.whl", hash = "sha256:3164fc20a585ec30a9aff33ad5de3b20ce85702b2b2a456852c413e3f0d7ab09"}, + {file = "orjson-3.9.2-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:7a6ccadf788531595ed4728aa746bc271955448d2460ff0ef8e21eb3f2a281ba"}, + {file = "orjson-3.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3245d230370f571c945f69aab823c279a868dc877352817e22e551de155cb06c"}, + {file = "orjson-3.9.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:205925b179550a4ee39b8418dd4c94ad6b777d165d7d22614771c771d44f57bd"}, + {file = "orjson-3.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0325fe2d69512187761f7368c8cda1959bcb75fc56b8e7a884e9569112320e57"}, + {file = "orjson-3.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:806704cd58708acc66a064a9a58e3be25cf1c3f9f159e8757bd3f515bfabdfa1"}, + {file = "orjson-3.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03fb36f187a0c19ff38f6289418863df8b9b7880cdbe279e920bef3a09d8dab1"}, + {file = "orjson-3.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:20925d07a97c49c6305bff1635318d9fc1804aa4ccacb5fb0deb8a910e57d97a"}, + {file = "orjson-3.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:eebfed53bec5674e981ebe8ed2cf00b3f7bcda62d634733ff779c264307ea505"}, + {file = "orjson-3.9.2-cp39-none-win_amd64.whl", hash = "sha256:869b961df5fcedf6c79f4096119b35679b63272362e9b745e668f0391a892d39"}, + {file = "orjson-3.9.2.tar.gz", hash = "sha256:24257c8f641979bf25ecd3e27251b5cc194cdd3a6e96004aac8446f5e63d9664"}, ] [[package]] @@ -2270,15 +2343,25 @@ files = [ {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"}, ] +[[package]] +name = "pathtools" +version = "0.1.2" +description = "File system general utilities" +optional = false +python-versions = "*" +files = [ + {file = "pathtools-0.1.2.tar.gz", hash = "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0"}, +] + [[package]] name = "platformdirs" -version = "3.6.0" +version = "3.9.1" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." optional = false python-versions = ">=3.7" files = [ - {file = "platformdirs-3.6.0-py3-none-any.whl", hash = "sha256:ffa199e3fbab8365778c4a10e1fbf1b9cd50707de826eb304b50e57ec0cc8d38"}, - {file = "platformdirs-3.6.0.tar.gz", hash = "sha256:57e28820ca8094678b807ff529196506d7a21e17156cb1cddb3e74cebce54640"}, + {file = "platformdirs-3.9.1-py3-none-any.whl", hash = "sha256:ad8291ae0ae5072f66c16945166cb11c63394c7a3ad1b1bc9828ca3162da8c2f"}, + {file = "platformdirs-3.9.1.tar.gz", hash = "sha256:1b42b450ad933e981d56e59f1b97495428c9bd60698baab9f3eb3d00d5822421"}, ] [package.extras] @@ -2287,13 +2370,13 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest- [[package]] name = "pluggy" -version = "1.1.0" +version = "1.2.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.7" files = [ - {file = "pluggy-1.1.0-py3-none-any.whl", hash = "sha256:d81d19a3a88d82ed06998353ce5d5c02587ef07ee2d808ae63904ab0ccef0087"}, - {file = "pluggy-1.1.0.tar.gz", hash = "sha256:c500b592c5512df35622e4faf2135aa0b7e989c7d31344194b4afb9d5e47b1bf"}, + {file = "pluggy-1.2.0-py3-none-any.whl", hash = "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849"}, + {file = "pluggy-1.2.0.tar.gz", hash = "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3"}, ] [package.extras] @@ -2320,18 +2403,40 @@ virtualenv = ">=20.10.0" [[package]] name = "prompt-toolkit" -version = "3.0.38" +version = "3.0.39" description = "Library for building powerful interactive command lines in Python" optional = false python-versions = ">=3.7.0" files = [ - {file = "prompt_toolkit-3.0.38-py3-none-any.whl", hash = "sha256:45ea77a2f7c60418850331366c81cf6b5b9cf4c7fd34616f733c5427e6abbb1f"}, - {file = "prompt_toolkit-3.0.38.tar.gz", hash = "sha256:23ac5d50538a9a38c8bde05fecb47d0b403ecd0662857a86f886f798563d5b9b"}, + {file = "prompt_toolkit-3.0.39-py3-none-any.whl", hash = "sha256:9dffbe1d8acf91e3de75f3b544e4842382fc06c6babe903ac9acb74dc6e08d88"}, + {file = "prompt_toolkit-3.0.39.tar.gz", hash = "sha256:04505ade687dc26dc4284b1ad19a83be2f2afe83e7a828ace0c72f3a1df72aac"}, ] [package.dependencies] wcwidth = "*" +[[package]] +name = "protobuf" +version = "4.23.4" +description = "" +optional = false +python-versions = ">=3.7" +files = [ + {file = "protobuf-4.23.4-cp310-abi3-win32.whl", hash = "sha256:5fea3c64d41ea5ecf5697b83e41d09b9589e6f20b677ab3c48e5f242d9b7897b"}, + {file = "protobuf-4.23.4-cp310-abi3-win_amd64.whl", hash = "sha256:7b19b6266d92ca6a2a87effa88ecc4af73ebc5cfde194dc737cf8ef23a9a3b12"}, + {file = "protobuf-4.23.4-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8547bf44fe8cec3c69e3042f5c4fb3e36eb2a7a013bb0a44c018fc1e427aafbd"}, + {file = "protobuf-4.23.4-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:fee88269a090ada09ca63551bf2f573eb2424035bcf2cb1b121895b01a46594a"}, + {file = "protobuf-4.23.4-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:effeac51ab79332d44fba74660d40ae79985901ac21bca408f8dc335a81aa597"}, + {file = "protobuf-4.23.4-cp37-cp37m-win32.whl", hash = "sha256:c3e0939433c40796ca4cfc0fac08af50b00eb66a40bbbc5dee711998fb0bbc1e"}, + {file = "protobuf-4.23.4-cp37-cp37m-win_amd64.whl", hash = "sha256:9053df6df8e5a76c84339ee4a9f5a2661ceee4a0dab019e8663c50ba324208b0"}, + {file = "protobuf-4.23.4-cp38-cp38-win32.whl", hash = "sha256:e1c915778d8ced71e26fcf43c0866d7499891bca14c4368448a82edc61fdbc70"}, + {file = "protobuf-4.23.4-cp38-cp38-win_amd64.whl", hash = "sha256:351cc90f7d10839c480aeb9b870a211e322bf05f6ab3f55fcb2f51331f80a7d2"}, + {file = "protobuf-4.23.4-cp39-cp39-win32.whl", hash = "sha256:6dd9b9940e3f17077e820b75851126615ee38643c2c5332aa7a359988820c720"}, + {file = "protobuf-4.23.4-cp39-cp39-win_amd64.whl", hash = "sha256:0a5759f5696895de8cc913f084e27fd4125e8fb0914bb729a17816a33819f474"}, + {file = "protobuf-4.23.4-py3-none-any.whl", hash = "sha256:e9d0be5bf34b275b9f87ba7407796556abeeba635455d036c7351f7c183ef8ff"}, + {file = "protobuf-4.23.4.tar.gz", hash = "sha256:ccd9430c0719dce806b93f89c91de7977304729e55377f872a92465d548329a9"}, +] + [[package]] name = "psutil" version = "5.9.5" @@ -2408,47 +2513,47 @@ files = [ [[package]] name = "pydantic" -version = "1.10.9" +version = "1.10.12" description = "Data validation and settings management using python type hints" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic-1.10.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e692dec4a40bfb40ca530e07805b1208c1de071a18d26af4a2a0d79015b352ca"}, - {file = "pydantic-1.10.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3c52eb595db83e189419bf337b59154bdcca642ee4b2a09e5d7797e41ace783f"}, - {file = "pydantic-1.10.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:939328fd539b8d0edf244327398a667b6b140afd3bf7e347cf9813c736211896"}, - {file = "pydantic-1.10.9-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b48d3d634bca23b172f47f2335c617d3fcb4b3ba18481c96b7943a4c634f5c8d"}, - {file = "pydantic-1.10.9-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:f0b7628fb8efe60fe66fd4adadd7ad2304014770cdc1f4934db41fe46cc8825f"}, - {file = "pydantic-1.10.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e1aa5c2410769ca28aa9a7841b80d9d9a1c5f223928ca8bec7e7c9a34d26b1d4"}, - {file = "pydantic-1.10.9-cp310-cp310-win_amd64.whl", hash = "sha256:eec39224b2b2e861259d6f3c8b6290d4e0fbdce147adb797484a42278a1a486f"}, - {file = "pydantic-1.10.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d111a21bbbfd85c17248130deac02bbd9b5e20b303338e0dbe0faa78330e37e0"}, - {file = "pydantic-1.10.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e9aec8627a1a6823fc62fb96480abe3eb10168fd0d859ee3d3b395105ae19a7"}, - {file = "pydantic-1.10.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07293ab08e7b4d3c9d7de4949a0ea571f11e4557d19ea24dd3ae0c524c0c334d"}, - {file = "pydantic-1.10.9-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ee829b86ce984261d99ff2fd6e88f2230068d96c2a582f29583ed602ef3fc2c"}, - {file = "pydantic-1.10.9-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4b466a23009ff5cdd7076eb56aca537c745ca491293cc38e72bf1e0e00de5b91"}, - {file = "pydantic-1.10.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7847ca62e581e6088d9000f3c497267868ca2fa89432714e21a4fb33a04d52e8"}, - {file = "pydantic-1.10.9-cp311-cp311-win_amd64.whl", hash = "sha256:7845b31959468bc5b78d7b95ec52fe5be32b55d0d09983a877cca6aedc51068f"}, - {file = "pydantic-1.10.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:517a681919bf880ce1dac7e5bc0c3af1e58ba118fd774da2ffcd93c5f96eaece"}, - {file = "pydantic-1.10.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67195274fd27780f15c4c372f4ba9a5c02dad6d50647b917b6a92bf00b3d301a"}, - {file = "pydantic-1.10.9-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2196c06484da2b3fded1ab6dbe182bdabeb09f6318b7fdc412609ee2b564c49a"}, - {file = "pydantic-1.10.9-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:6257bb45ad78abacda13f15bde5886efd6bf549dd71085e64b8dcf9919c38b60"}, - {file = "pydantic-1.10.9-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3283b574b01e8dbc982080d8287c968489d25329a463b29a90d4157de4f2baaf"}, - {file = "pydantic-1.10.9-cp37-cp37m-win_amd64.whl", hash = "sha256:5f8bbaf4013b9a50e8100333cc4e3fa2f81214033e05ac5aa44fa24a98670a29"}, - {file = "pydantic-1.10.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b9cd67fb763248cbe38f0593cd8611bfe4b8ad82acb3bdf2b0898c23415a1f82"}, - {file = "pydantic-1.10.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f50e1764ce9353be67267e7fd0da08349397c7db17a562ad036aa7c8f4adfdb6"}, - {file = "pydantic-1.10.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73ef93e5e1d3c8e83f1ff2e7fdd026d9e063c7e089394869a6e2985696693766"}, - {file = "pydantic-1.10.9-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:128d9453d92e6e81e881dd7e2484e08d8b164da5507f62d06ceecf84bf2e21d3"}, - {file = "pydantic-1.10.9-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ad428e92ab68798d9326bb3e5515bc927444a3d71a93b4a2ca02a8a5d795c572"}, - {file = "pydantic-1.10.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fab81a92f42d6d525dd47ced310b0c3e10c416bbfae5d59523e63ea22f82b31e"}, - {file = "pydantic-1.10.9-cp38-cp38-win_amd64.whl", hash = "sha256:963671eda0b6ba6926d8fc759e3e10335e1dc1b71ff2a43ed2efd6996634dafb"}, - {file = "pydantic-1.10.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:970b1bdc6243ef663ba5c7e36ac9ab1f2bfecb8ad297c9824b542d41a750b298"}, - {file = "pydantic-1.10.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7e1d5290044f620f80cf1c969c542a5468f3656de47b41aa78100c5baa2b8276"}, - {file = "pydantic-1.10.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83fcff3c7df7adff880622a98022626f4f6dbce6639a88a15a3ce0f96466cb60"}, - {file = "pydantic-1.10.9-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0da48717dc9495d3a8f215e0d012599db6b8092db02acac5e0d58a65248ec5bc"}, - {file = "pydantic-1.10.9-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0a2aabdc73c2a5960e87c3ffebca6ccde88665616d1fd6d3db3178ef427b267a"}, - {file = "pydantic-1.10.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9863b9420d99dfa9c064042304868e8ba08e89081428a1c471858aa2af6f57c4"}, - {file = "pydantic-1.10.9-cp39-cp39-win_amd64.whl", hash = "sha256:e7c9900b43ac14110efa977be3da28931ffc74c27e96ee89fbcaaf0b0fe338e1"}, - {file = "pydantic-1.10.9-py3-none-any.whl", hash = "sha256:6cafde02f6699ce4ff643417d1a9223716ec25e228ddc3b436fe7e2d25a1f305"}, - {file = "pydantic-1.10.9.tar.gz", hash = "sha256:95c70da2cd3b6ddf3b9645ecaa8d98f3d80c606624b6d245558d202cd23ea3be"}, + {file = "pydantic-1.10.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a1fcb59f2f355ec350073af41d927bf83a63b50e640f4dbaa01053a28b7a7718"}, + {file = "pydantic-1.10.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b7ccf02d7eb340b216ec33e53a3a629856afe1c6e0ef91d84a4e6f2fb2ca70fe"}, + {file = "pydantic-1.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fb2aa3ab3728d950bcc885a2e9eff6c8fc40bc0b7bb434e555c215491bcf48b"}, + {file = "pydantic-1.10.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:771735dc43cf8383959dc9b90aa281f0b6092321ca98677c5fb6125a6f56d58d"}, + {file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ca48477862372ac3770969b9d75f1bf66131d386dba79506c46d75e6b48c1e09"}, + {file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a5e7add47a5b5a40c49b3036d464e3c7802f8ae0d1e66035ea16aa5b7a3923ed"}, + {file = "pydantic-1.10.12-cp310-cp310-win_amd64.whl", hash = "sha256:e4129b528c6baa99a429f97ce733fff478ec955513630e61b49804b6cf9b224a"}, + {file = "pydantic-1.10.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b0d191db0f92dfcb1dec210ca244fdae5cbe918c6050b342d619c09d31eea0cc"}, + {file = "pydantic-1.10.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:795e34e6cc065f8f498c89b894a3c6da294a936ee71e644e4bd44de048af1405"}, + {file = "pydantic-1.10.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69328e15cfda2c392da4e713443c7dbffa1505bc9d566e71e55abe14c97ddc62"}, + {file = "pydantic-1.10.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2031de0967c279df0d8a1c72b4ffc411ecd06bac607a212892757db7462fc494"}, + {file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:ba5b2e6fe6ca2b7e013398bc7d7b170e21cce322d266ffcd57cca313e54fb246"}, + {file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2a7bac939fa326db1ab741c9d7f44c565a1d1e80908b3797f7f81a4f86bc8d33"}, + {file = "pydantic-1.10.12-cp311-cp311-win_amd64.whl", hash = "sha256:87afda5539d5140cb8ba9e8b8c8865cb5b1463924d38490d73d3ccfd80896b3f"}, + {file = "pydantic-1.10.12-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:549a8e3d81df0a85226963611950b12d2d334f214436a19537b2efed61b7639a"}, + {file = "pydantic-1.10.12-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:598da88dfa127b666852bef6d0d796573a8cf5009ffd62104094a4fe39599565"}, + {file = "pydantic-1.10.12-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba5c4a8552bff16c61882db58544116d021d0b31ee7c66958d14cf386a5b5350"}, + {file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c79e6a11a07da7374f46970410b41d5e266f7f38f6a17a9c4823db80dadf4303"}, + {file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab26038b8375581dc832a63c948f261ae0aa21f1d34c1293469f135fa92972a5"}, + {file = "pydantic-1.10.12-cp37-cp37m-win_amd64.whl", hash = "sha256:e0a16d274b588767602b7646fa05af2782576a6cf1022f4ba74cbb4db66f6ca8"}, + {file = "pydantic-1.10.12-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a9dfa722316f4acf4460afdf5d41d5246a80e249c7ff475c43a3a1e9d75cf62"}, + {file = "pydantic-1.10.12-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a73f489aebd0c2121ed974054cb2759af8a9f747de120acd2c3394cf84176ccb"}, + {file = "pydantic-1.10.12-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b30bcb8cbfccfcf02acb8f1a261143fab622831d9c0989707e0e659f77a18e0"}, + {file = "pydantic-1.10.12-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fcfb5296d7877af406ba1547dfde9943b1256d8928732267e2653c26938cd9c"}, + {file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:2f9a6fab5f82ada41d56b0602606a5506aab165ca54e52bc4545028382ef1c5d"}, + {file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dea7adcc33d5d105896401a1f37d56b47d443a2b2605ff8a969a0ed5543f7e33"}, + {file = "pydantic-1.10.12-cp38-cp38-win_amd64.whl", hash = "sha256:1eb2085c13bce1612da8537b2d90f549c8cbb05c67e8f22854e201bde5d98a47"}, + {file = "pydantic-1.10.12-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ef6c96b2baa2100ec91a4b428f80d8f28a3c9e53568219b6c298c1125572ebc6"}, + {file = "pydantic-1.10.12-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6c076be61cd0177a8433c0adcb03475baf4ee91edf5a4e550161ad57fc90f523"}, + {file = "pydantic-1.10.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d5a58feb9a39f481eda4d5ca220aa8b9d4f21a41274760b9bc66bfd72595b86"}, + {file = "pydantic-1.10.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5f805d2d5d0a41633651a73fa4ecdd0b3d7a49de4ec3fadf062fe16501ddbf1"}, + {file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1289c180abd4bd4555bb927c42ee42abc3aee02b0fb2d1223fb7c6e5bef87dbe"}, + {file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5d1197e462e0364906cbc19681605cb7c036f2475c899b6f296104ad42b9f5fb"}, + {file = "pydantic-1.10.12-cp39-cp39-win_amd64.whl", hash = "sha256:fdbdd1d630195689f325c9ef1a12900524dceb503b00a987663ff4f58669b93d"}, + {file = "pydantic-1.10.12-py3-none-any.whl", hash = "sha256:b749a43aa51e32839c9d71dc67eb1e4221bb04af1033a32e3923d46f9effa942"}, + {file = "pydantic-1.10.12.tar.gz", hash = "sha256:0fe8a415cea8f340e7a9af9c54fc71a649b43e8ca3cc732986116b3cb135d303"}, ] [package.dependencies] @@ -2474,42 +2579,42 @@ pyparsing = ">=2.1.4" [[package]] name = "pygit2" -version = "1.12.1" +version = "1.12.2" description = "Python bindings for libgit2." optional = false python-versions = ">=3.8" files = [ - {file = "pygit2-1.12.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:50a155528aa611e4a217be31a9d2d8da283cfd978dbba07494cd04ea3d7c8768"}, - {file = "pygit2-1.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:248e22ccb1ea31f569373a3da3fa73d110ba2585c6326ff74b03c9579fb7b913"}, - {file = "pygit2-1.12.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e575e672c5a6cb39234b0076423a560e016d6b88cd50947c2df3bf59c5ccdf3d"}, - {file = "pygit2-1.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad9b46b52997d131b31ff46f699b074e9745c8fea8d0efb6b72ace43ab25828c"}, - {file = "pygit2-1.12.1-cp310-cp310-win32.whl", hash = "sha256:a8f495df877da04c572ecec4d532ae195680b4781dbf229bab4e801fa9ef20e9"}, - {file = "pygit2-1.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f1e1355c7fe2938a2bca0d6204a00c02950d13008722879e54a335b3e874006"}, - {file = "pygit2-1.12.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8a5c56b0b5dc8a317561070ef7557e180d4937d8b115c5a762d85e0109a216f3"}, - {file = "pygit2-1.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b7c9ca8bc8a722863fc873234748fef3422007d5a6ea90ba3ae338d2907d3d6e"}, - {file = "pygit2-1.12.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71c02a11f10bc4e329ab941f0c70874d39053c8f78544aefeb506f04cedb621a"}, - {file = "pygit2-1.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b3af334adf325b7c973417efa220fd5a9ce946b936262eceabc8ad8d46e0310"}, - {file = "pygit2-1.12.1-cp311-cp311-win32.whl", hash = "sha256:86c393962d1341893bbfa91829b3b8545e8ac7622f8b53b9a0b835b9cc1b5198"}, - {file = "pygit2-1.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:86c7e75ddc76f4e5593b47f9c2074fff242322ed9f4126116749f7c86021520a"}, - {file = "pygit2-1.12.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:939d11677f434024ea25a9137d8a525ef9f9ac474fb8b86399bc9526e6a7bff5"}, - {file = "pygit2-1.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:946f9215c0442995042ea512f764f7a6638d3a09f9d0484d3aeedbf8833f89e6"}, - {file = "pygit2-1.12.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd574620d3cc80df0b23bf2b7b08d8726e75a338d0fa1b67e4d6738d3ee56635"}, - {file = "pygit2-1.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24d0adeff5c43229913f3bdae71c36e77ed19f36bd8dd6b5c141820964b1f5b3"}, - {file = "pygit2-1.12.1-cp38-cp38-win32.whl", hash = "sha256:ed8e2ef97171e994bf4d46c6c6534a3c12dd2dbbc47741e5995eaf8c2c92f71c"}, - {file = "pygit2-1.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:5318817055a3ca3906bf88344b9a6dc70c640f9b6bc236ac9e767d12bad54361"}, - {file = "pygit2-1.12.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cb9c803151ffeb0b8de52a93381108a2c6a9a446c55d659a135f52645e1650eb"}, - {file = "pygit2-1.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:47bf1e196dc23fe38018ad49b021d425edc319328169c597df45d73cf46b62ef"}, - {file = "pygit2-1.12.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:948479df72223bbcd16b2a88904dc2a3886c15a0107a7cf3b5373c8e34f52f31"}, - {file = "pygit2-1.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4bebe8b310edc2662cbffb94ef1a758252fe2e4c92bc83fac0eaf2bedf8b871"}, - {file = "pygit2-1.12.1-cp39-cp39-win32.whl", hash = "sha256:77bc0ab778ab6fe631f5f9eb831b426376a7b71426c5a913aaa9088382ef1dc9"}, - {file = "pygit2-1.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:e87b2306a266f6abca94ab37dda807033a6f40faad05c4d1e089f9e8354130a8"}, - {file = "pygit2-1.12.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5d5e8a3b67f5d4ba8e3838c492254688997747989b184b5f1a3af4fef7f9f53e"}, - {file = "pygit2-1.12.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2500b749759f2efdfa5096c0aafeb2d92152766708f5700284427bd658e5c407"}, - {file = "pygit2-1.12.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c21759ca9cc755faa2d17180cd49af004486ca84f3166cac089a2083dcb09114"}, - {file = "pygit2-1.12.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d73074ab64b383e3a1ab03e8070f6b195ef89b9d379ca5682c38dd9c289cc6e2"}, - {file = "pygit2-1.12.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:865c0d1925c52426455317f29c1db718187ec69ed5474faaf3e1c68ff2135767"}, - {file = "pygit2-1.12.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ebebbe9125b068337b5415565ec94c9e092c708e430851b2d02e51217bdce4a"}, - {file = "pygit2-1.12.1.tar.gz", hash = "sha256:8218922abedc88a65d5092308d533ca4c4ed634aec86a3493d3bdf1a25aeeff3"}, + {file = "pygit2-1.12.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:79fbd99d3e08ca7478150eeba28ca4d4103f564148eab8d00aba8f1e6fc60654"}, + {file = "pygit2-1.12.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:be3bb0139f464947523022a5af343a2e862c4ff250a57ec9f631449e7c0ba7c0"}, + {file = "pygit2-1.12.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4df3e5745fdf3111a6ccc905eae99f22f1a180728f714795138ca540cc2a50a"}, + {file = "pygit2-1.12.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:214bd214784fcbef7a8494d1d59e0cd3a731c0d24ce0f230dcc843322ee33b08"}, + {file = "pygit2-1.12.2-cp310-cp310-win32.whl", hash = "sha256:336c864ac961e7be8ba06e9ed8c999e4f624a8ccd90121cc4e40956d8b57acac"}, + {file = "pygit2-1.12.2-cp310-cp310-win_amd64.whl", hash = "sha256:fb9eb57b75ce586928053692a25aae2a50fef3ad36661c57c07d4902899b1df3"}, + {file = "pygit2-1.12.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f8f813d35d836c5b0d1962c387754786bcc7f1c3c8e11207b9eeb30238ac4cc7"}, + {file = "pygit2-1.12.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:25a6548930328c5247bfb7c67d29104e63b036cb5390f032d9f91f63efb70434"}, + {file = "pygit2-1.12.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a365ffca23d910381749fdbcc367db52fe808f9aa4852914dd9ef8b711384a32"}, + {file = "pygit2-1.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec04c27be5d5af1ceecdcc0464e07081222f91f285f156dc53b23751d146569a"}, + {file = "pygit2-1.12.2-cp311-cp311-win32.whl", hash = "sha256:546091316c9a8c37b9867ddcc6c9f7402ca4d0b9db3f349212a7b5e71988e359"}, + {file = "pygit2-1.12.2-cp311-cp311-win_amd64.whl", hash = "sha256:8bf14196cbfffbcd286f459a1d4fc660c5d5dfa8fb422e21216961df575410d6"}, + {file = "pygit2-1.12.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:7bb30ab1fdaa4c30821fed33892958b6d92d50dbd03c76f7775b4e5d62f53a2e"}, + {file = "pygit2-1.12.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e7e705aaecad85b883022e81e054fbd27d26023fc031618ee61c51516580517e"}, + {file = "pygit2-1.12.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac2b5f408eb882e79645ebb43039ac37739c3edd25d857cc97d7482a684b613f"}, + {file = "pygit2-1.12.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22e7f3ad2b7b0c80be991bb47d8a2f2535cc9bf090746eb8679231ee565fde81"}, + {file = "pygit2-1.12.2-cp38-cp38-win32.whl", hash = "sha256:5b3ab4d6302990f7adb2b015bcbda1f0715277008d0c66440497e6f8313bf9cb"}, + {file = "pygit2-1.12.2-cp38-cp38-win_amd64.whl", hash = "sha256:c74e7601cb8b8dc3d02fd32274e200a7761cffd20ee531442bf1fa115c8f99a5"}, + {file = "pygit2-1.12.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6a4083ba093c69142e0400114a4ef75e87834637d2bbfd77b964614bf70f624f"}, + {file = "pygit2-1.12.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:926f2e48c4eaa179249d417b8382290b86b0f01dbf41d289f763576209276b9f"}, + {file = "pygit2-1.12.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14ae27491347a0ac4bbe8347b09d752cfe7fea1121c14525415e0cca6db4a836"}, + {file = "pygit2-1.12.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f65483ab5e3563c58f60debe2acc0979fdf6fd633432fcfbddf727a9a265ba4"}, + {file = "pygit2-1.12.2-cp39-cp39-win32.whl", hash = "sha256:8da8517809635ea3da950d9cf99c6d1851352d92b6db309382db88a01c3b0bfd"}, + {file = "pygit2-1.12.2-cp39-cp39-win_amd64.whl", hash = "sha256:b9c2359b99eed8e7fac30c06e6b4ae277a6a0537d6b4b88a190828c3d7eb9ef2"}, + {file = "pygit2-1.12.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:685378852ef8eb081333bc80dbdfc4f1333cf4a8f3baf614c4135e02ad1ee38a"}, + {file = "pygit2-1.12.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdf655e5f801990f5cad721b6ccbe7610962f0a4f1c20373dbf9c0be39374a81"}, + {file = "pygit2-1.12.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:857c5cde635d470f58803d67bfb281dc4f6336065a0253bfbed001f18e2d0767"}, + {file = "pygit2-1.12.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:fe35a72af61961dbb7fb4abcdaa36d5f1c85b2cd3daae94137eeb9c07215cdd3"}, + {file = "pygit2-1.12.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f443d3641762b2bb9c76400bb18beb4ba27dd35bc098a8bfae82e6a190c52ab"}, + {file = "pygit2-1.12.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c1e26649e1540b6a774f812e2fc9890320ff4d33f16db1bb02626318b5ceae2"}, + {file = "pygit2-1.12.2.tar.gz", hash = "sha256:56e85d0e66de957d599d1efb2409d39afeefd8f01009bfda0796b42a4b678358"}, ] [package.dependencies] @@ -2556,13 +2661,13 @@ diagrams = ["jinja2", "railroad-diagrams"] [[package]] name = "pytest" -version = "7.3.2" +version = "7.4.0" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-7.3.2-py3-none-any.whl", hash = "sha256:cdcbd012c9312258922f8cd3f1b62a6580fdced17db6014896053d47cddf9295"}, - {file = "pytest-7.3.2.tar.gz", hash = "sha256:ee990a3cc55ba808b80795a79944756f315c67c12b56abd3ac993a7b8c17030b"}, + {file = "pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32"}, + {file = "pytest-7.4.0.tar.gz", hash = "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a"}, ] [package.dependencies] @@ -2626,51 +2731,51 @@ files = [ [[package]] name = "pyyaml" -version = "6.0" +version = "6.0.1" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.6" files = [ - {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, - {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, - {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, - {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, - {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, - {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, - {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, - {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, - {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, - {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, - {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, - {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, - {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, - {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, - {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, - {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, - {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, - {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, - {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, - {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, - {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, - {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] [[package]] @@ -3035,13 +3140,13 @@ test = ["asv", "gmpy2", "mpmath", "pytest", "pytest-cov", "pytest-xdist", "sciki [[package]] name = "scmrepo" -version = "1.0.3" +version = "1.0.4" description = "SCM wrapper and fsspec filesystem for Git for use in DVC" optional = false python-versions = ">=3.8" files = [ - {file = "scmrepo-1.0.3-py3-none-any.whl", hash = "sha256:95a9f4da2f9bc4a1604a01b5d73c4d76432ceff17440ff5e70cfd5d53ae6c47b"}, - {file = "scmrepo-1.0.3.tar.gz", hash = "sha256:70e65e3d614040e6698c1a2c876a82ea1b2b6b85cb4e0a65e5b8c19320f3597d"}, + {file = "scmrepo-1.0.4-py3-none-any.whl", hash = "sha256:048ffd98ab72afb6d3dfb177e5cefe14652ea28377b4258d9dac098cd4461036"}, + {file = "scmrepo-1.0.4.tar.gz", hash = "sha256:d03278d6a86caa5c7f1e85918bc28ef69040a5e5fd04c97cc0eea611ea8be13c"}, ] [package.dependencies] @@ -3056,8 +3161,8 @@ pygtrie = ">=2.3.2" shortuuid = ">=0.5.0" [package.extras] -dev = ["mock (==5.0.1)", "mypy (==0.971)", "paramiko (==3.1.0)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-asyncio (==0.18.3)", "pytest-cov (==3.0.0)", "pytest-docker (==0.12.0)", "pytest-mock (==3.8.2)", "pytest-sugar (==0.9.5)", "pytest-test-utils (==0.0.8)", "types-certifi (==2021.10.8.3)", "types-mock (==5.0.0.6)", "types-paramiko (==3.0.0.7)"] -tests = ["mock (==5.0.1)", "mypy (==0.971)", "paramiko (==3.1.0)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-asyncio (==0.18.3)", "pytest-cov (==3.0.0)", "pytest-docker (==0.12.0)", "pytest-mock (==3.8.2)", "pytest-sugar (==0.9.5)", "pytest-test-utils (==0.0.8)", "types-certifi (==2021.10.8.3)", "types-mock (==5.0.0.6)", "types-paramiko (==3.0.0.7)"] +dev = ["mock (==5.0.1)", "mypy (==0.971)", "paramiko (==3.1.0)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-asyncio (==0.18.3)", "pytest-cov (==3.0.0)", "pytest-docker (==0.12.0)", "pytest-mock (==3.8.2)", "pytest-sugar (==0.9.5)", "pytest-test-utils (==0.0.8)", "types-certifi (==2021.10.8.3)", "types-mock (==5.0.0.6)", "types-paramiko (==3.0.0.10)"] +tests = ["mock (==5.0.1)", "mypy (==0.971)", "paramiko (==3.1.0)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-asyncio (==0.18.3)", "pytest-cov (==3.0.0)", "pytest-docker (==0.12.0)", "pytest-mock (==3.8.2)", "pytest-sugar (==0.9.5)", "pytest-test-utils (==0.0.8)", "types-certifi (==2021.10.8.3)", "types-mock (==5.0.0.6)", "types-paramiko (==3.0.0.10)"] [[package]] name = "sentencepiece" @@ -3113,15 +3218,141 @@ files = [ {file = "sentencepiece-0.1.99.tar.gz", hash = "sha256:189c48f5cb2949288f97ccdb97f0473098d9c3dcf5a3d99d4eabe719ec27297f"}, ] +[[package]] +name = "sentry-sdk" +version = "1.28.1" +description = "Python client for Sentry (https://sentry.io)" +optional = false +python-versions = "*" +files = [ + {file = "sentry-sdk-1.28.1.tar.gz", hash = "sha256:dcd88c68aa64dae715311b5ede6502fd684f70d00a7cd4858118f0ba3153a3ae"}, + {file = "sentry_sdk-1.28.1-py2.py3-none-any.whl", hash = "sha256:6bdb25bd9092478d3a817cb0d01fa99e296aea34d404eac3ca0037faa5c2aa0a"}, +] + +[package.dependencies] +certifi = "*" +urllib3 = {version = ">=1.26.11", markers = "python_version >= \"3.6\""} + +[package.extras] +aiohttp = ["aiohttp (>=3.5)"] +arq = ["arq (>=0.23)"] +beam = ["apache-beam (>=2.12)"] +bottle = ["bottle (>=0.12.13)"] +celery = ["celery (>=3)"] +chalice = ["chalice (>=1.16.0)"] +django = ["django (>=1.8)"] +falcon = ["falcon (>=1.4)"] +fastapi = ["fastapi (>=0.79.0)"] +flask = ["blinker (>=1.1)", "flask (>=0.11)", "markupsafe"] +grpcio = ["grpcio (>=1.21.1)"] +httpx = ["httpx (>=0.16.0)"] +huey = ["huey (>=2)"] +loguru = ["loguru (>=0.5)"] +opentelemetry = ["opentelemetry-distro (>=0.35b0)"] +pure-eval = ["asttokens", "executing", "pure-eval"] +pymongo = ["pymongo (>=3.1)"] +pyspark = ["pyspark (>=2.4.4)"] +quart = ["blinker (>=1.1)", "quart (>=0.16.1)"] +rq = ["rq (>=0.6)"] +sanic = ["sanic (>=0.8)"] +sqlalchemy = ["sqlalchemy (>=1.2)"] +starlette = ["starlette (>=0.19.1)"] +starlite = ["starlite (>=1.48)"] +tornado = ["tornado (>=5)"] + +[[package]] +name = "setproctitle" +version = "1.3.2" +description = "A Python module to customize the process title" +optional = false +python-versions = ">=3.7" +files = [ + {file = "setproctitle-1.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:288943dec88e178bb2fd868adf491197cc0fc8b6810416b1c6775e686bab87fe"}, + {file = "setproctitle-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:630f6fe5e24a619ccf970c78e084319ee8be5be253ecc9b5b216b0f474f5ef18"}, + {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c877691b90026670e5a70adfbcc735460a9f4c274d35ec5e8a43ce3f8443005"}, + {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7a55fe05f15c10e8c705038777656fe45e3bd676d49ad9ac8370b75c66dd7cd7"}, + {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab45146c71ca6592c9cc8b354a2cc9cc4843c33efcbe1d245d7d37ce9696552d"}, + {file = "setproctitle-1.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e00c9d5c541a2713ba0e657e0303bf96ddddc412ef4761676adc35df35d7c246"}, + {file = "setproctitle-1.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:265ecbe2c6eafe82e104f994ddd7c811520acdd0647b73f65c24f51374cf9494"}, + {file = "setproctitle-1.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c2c46200656280a064073447ebd363937562debef329482fd7e570c8d498f806"}, + {file = "setproctitle-1.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:fa2f50678f04fda7a75d0fe5dd02bbdd3b13cbe6ed4cf626e4472a7ccf47ae94"}, + {file = "setproctitle-1.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7f2719a398e1a2c01c2a63bf30377a34d0b6ef61946ab9cf4d550733af8f1ef1"}, + {file = "setproctitle-1.3.2-cp310-cp310-win32.whl", hash = "sha256:e425be62524dc0c593985da794ee73eb8a17abb10fe692ee43bb39e201d7a099"}, + {file = "setproctitle-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:e85e50b9c67854f89635a86247412f3ad66b132a4d8534ac017547197c88f27d"}, + {file = "setproctitle-1.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2a97d51c17d438cf5be284775a322d57b7ca9505bb7e118c28b1824ecaf8aeaa"}, + {file = "setproctitle-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:587c7d6780109fbd8a627758063d08ab0421377c0853780e5c356873cdf0f077"}, + {file = "setproctitle-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d17c8bd073cbf8d141993db45145a70b307385b69171d6b54bcf23e5d644de"}, + {file = "setproctitle-1.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e932089c35a396dc31a5a1fc49889dd559548d14cb2237adae260382a090382e"}, + {file = "setproctitle-1.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e4f8f12258a8739c565292a551c3db62cca4ed4f6b6126664e2381acb4931bf"}, + {file = "setproctitle-1.3.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:570d255fd99c7f14d8f91363c3ea96bd54f8742275796bca67e1414aeca7d8c3"}, + {file = "setproctitle-1.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a8e0881568c5e6beff91ef73c0ec8ac2a9d3ecc9edd6bd83c31ca34f770910c4"}, + {file = "setproctitle-1.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4bba3be4c1fabf170595b71f3af46c6d482fbe7d9e0563999b49999a31876f77"}, + {file = "setproctitle-1.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:37ece938110cab2bb3957e3910af8152ca15f2b6efdf4f2612e3f6b7e5459b80"}, + {file = "setproctitle-1.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db684d6bbb735a80bcbc3737856385b55d53f8a44ce9b46e9a5682c5133a9bf7"}, + {file = "setproctitle-1.3.2-cp311-cp311-win32.whl", hash = "sha256:ca58cd260ea02759238d994cfae844fc8b1e206c684beb8f38877dcab8451dfc"}, + {file = "setproctitle-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:88486e6cce2a18a033013d17b30a594f1c5cb42520c49c19e6ade40b864bb7ff"}, + {file = "setproctitle-1.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:92c626edc66169a1b09e9541b9c0c9f10488447d8a2b1d87c8f0672e771bc927"}, + {file = "setproctitle-1.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:710e16fa3bade3b026907e4a5e841124983620046166f355bbb84be364bf2a02"}, + {file = "setproctitle-1.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f29b75e86260b0ab59adb12661ef9f113d2f93a59951373eb6d68a852b13e83"}, + {file = "setproctitle-1.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c8d9650154afaa86a44ff195b7b10d683c73509d085339d174e394a22cccbb9"}, + {file = "setproctitle-1.3.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0452282258dfcc01697026a8841258dd2057c4438b43914b611bccbcd048f10"}, + {file = "setproctitle-1.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e49ae693306d7624015f31cb3e82708916759d592c2e5f72a35c8f4cc8aef258"}, + {file = "setproctitle-1.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1ff863a20d1ff6ba2c24e22436a3daa3cd80be1dfb26891aae73f61b54b04aca"}, + {file = "setproctitle-1.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:55ce1e9925ce1765865442ede9dca0ba9bde10593fcd570b1f0fa25d3ec6b31c"}, + {file = "setproctitle-1.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7fe9df7aeb8c64db6c34fc3b13271a363475d77bc157d3f00275a53910cb1989"}, + {file = "setproctitle-1.3.2-cp37-cp37m-win32.whl", hash = "sha256:e5c50e164cd2459bc5137c15288a9ef57160fd5cbf293265ea3c45efe7870865"}, + {file = "setproctitle-1.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:a499fff50387c1520c085a07578a000123f519e5f3eee61dd68e1d301659651f"}, + {file = "setproctitle-1.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5b932c3041aa924163f4aab970c2f0e6b4d9d773f4d50326e0ea1cd69240e5c5"}, + {file = "setproctitle-1.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f4bfc89bd33ebb8e4c0e9846a09b1f5a4a86f5cb7a317e75cc42fee1131b4f4f"}, + {file = "setproctitle-1.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcd3cf4286a60fdc95451d8d14e0389a6b4f5cebe02c7f2609325eb016535963"}, + {file = "setproctitle-1.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5fb4f769c02f63fac90989711a3fee83919f47ae9afd4758ced5d86596318c65"}, + {file = "setproctitle-1.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5194b4969f82ea842a4f6af2f82cd16ebdc3f1771fb2771796e6add9835c1973"}, + {file = "setproctitle-1.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f0cde41857a644b7353a0060b5f94f7ba7cf593ebde5a1094da1be581ac9a31"}, + {file = "setproctitle-1.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9124bedd8006b0e04d4e8a71a0945da9b67e7a4ab88fdad7b1440dc5b6122c42"}, + {file = "setproctitle-1.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c8a09d570b39517de10ee5b718730e171251ce63bbb890c430c725c8c53d4484"}, + {file = "setproctitle-1.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:8ff3c8cb26afaed25e8bca7b9dd0c1e36de71f35a3a0706b5c0d5172587a3827"}, + {file = "setproctitle-1.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:589be87172b238f839e19f146b9ea47c71e413e951ef0dc6db4218ddacf3c202"}, + {file = "setproctitle-1.3.2-cp38-cp38-win32.whl", hash = "sha256:4749a2b0c9ac52f864d13cee94546606f92b981b50e46226f7f830a56a9dc8e1"}, + {file = "setproctitle-1.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:e43f315c68aa61cbdef522a2272c5a5b9b8fd03c301d3167b5e1343ef50c676c"}, + {file = "setproctitle-1.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:de3a540cd1817ede31f530d20e6a4935bbc1b145fd8f8cf393903b1e02f1ae76"}, + {file = "setproctitle-1.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4058564195b975ddc3f0462375c533cce310ccdd41b80ac9aed641c296c3eff4"}, + {file = "setproctitle-1.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c5d5dad7c28bdd1ec4187d818e43796f58a845aa892bb4481587010dc4d362b"}, + {file = "setproctitle-1.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ffc61a388a5834a97953d6444a2888c24a05f2e333f9ed49f977a87bb1ad4761"}, + {file = "setproctitle-1.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fa1a0fbee72b47dc339c87c890d3c03a72ea65c061ade3204f285582f2da30f"}, + {file = "setproctitle-1.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe8a988c7220c002c45347430993830666e55bc350179d91fcee0feafe64e1d4"}, + {file = "setproctitle-1.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bae283e85fc084b18ffeb92e061ff7ac5af9e183c9d1345c93e178c3e5069cbe"}, + {file = "setproctitle-1.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fed18e44711c5af4b681c2b3b18f85e6f0f1b2370a28854c645d636d5305ccd8"}, + {file = "setproctitle-1.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:b34baef93bfb20a8ecb930e395ccd2ae3268050d8cf4fe187de5e2bd806fd796"}, + {file = "setproctitle-1.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7f0bed90a216ef28b9d227d8d73e28a8c9b88c0f48a082d13ab3fa83c581488f"}, + {file = "setproctitle-1.3.2-cp39-cp39-win32.whl", hash = "sha256:4d8938249a7cea45ab7e1e48b77685d0f2bab1ebfa9dde23e94ab97968996a7c"}, + {file = "setproctitle-1.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:a47d97a75fd2d10c37410b180f67a5835cb1d8fdea2648fd7f359d4277f180b9"}, + {file = "setproctitle-1.3.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:dad42e676c5261eb50fdb16bdf3e2771cf8f99a79ef69ba88729aeb3472d8575"}, + {file = "setproctitle-1.3.2-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c91b9bc8985d00239f7dc08a49927a7ca1ca8a6af2c3890feec3ed9665b6f91e"}, + {file = "setproctitle-1.3.2-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8579a43eafd246e285eb3a5b939e7158073d5087aacdd2308f23200eac2458b"}, + {file = "setproctitle-1.3.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:2fbd8187948284293f43533c150cd69a0e4192c83c377da837dbcd29f6b83084"}, + {file = "setproctitle-1.3.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:faec934cfe5fd6ac1151c02e67156c3f526e82f96b24d550b5d51efa4a5527c6"}, + {file = "setproctitle-1.3.2-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1aafc91cbdacc9e5fe712c52077369168e6b6c346f3a9d51bf600b53eae56bb"}, + {file = "setproctitle-1.3.2-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b617f12c9be61e8f4b2857be4a4319754756845dbbbd9c3718f468bbb1e17bcb"}, + {file = "setproctitle-1.3.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b2c9cb2705fc84cb8798f1ba74194f4c080aaef19d9dae843591c09b97678e98"}, + {file = "setproctitle-1.3.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a149a5f7f2c5a065d4e63cb0d7a4b6d3b66e6e80f12e3f8827c4f63974cbf122"}, + {file = "setproctitle-1.3.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e3ac25bfc4a0f29d2409650c7532d5ddfdbf29f16f8a256fc31c47d0dc05172"}, + {file = "setproctitle-1.3.2-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65d884e22037b23fa25b2baf1a3316602ed5c5971eb3e9d771a38c3a69ce6e13"}, + {file = "setproctitle-1.3.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7aa0aac1711fadffc1d51e9d00a3bea61f68443d6ac0241a224e4d622489d665"}, + {file = "setproctitle-1.3.2.tar.gz", hash = "sha256:b9fb97907c830d260fa0658ed58afd48a86b2b88aac521135c352ff7fd3477fd"}, +] + +[package.extras] +test = ["pytest"] + [[package]] name = "setuptools" -version = "67.8.0" +version = "68.0.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.7" files = [ - {file = "setuptools-67.8.0-py3-none-any.whl", hash = "sha256:5df61bf30bb10c6f756eb19e7c9f3b473051f48db77fddbe06ff2ca307df9a6f"}, - {file = "setuptools-67.8.0.tar.gz", hash = "sha256:62642358adc77ffa87233bc4d2354c4b2682d214048f500964dbe760ccedf102"}, + {file = "setuptools-68.0.0-py3-none-any.whl", hash = "sha256:11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f"}, + {file = "setuptools-68.0.0.tar.gz", hash = "sha256:baf1fdb41c6da4cd2eae722e135500da913332ab3f2f5c7d33af9b492acb5235"}, ] [package.extras] @@ -3235,15 +3466,29 @@ files = [ [package.extras] widechars = ["wcwidth"] +[[package]] +name = "tenacity" +version = "8.2.3" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, + {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, +] + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + [[package]] name = "threadpoolctl" -version = "3.1.0" +version = "3.2.0" description = "threadpoolctl" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "threadpoolctl-3.1.0-py3-none-any.whl", hash = "sha256:8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b"}, - {file = "threadpoolctl-3.1.0.tar.gz", hash = "sha256:a335baacfaa4400ae1f0d8e3a58d6674d2f8828e3716bb2802c44955ad391380"}, + {file = "threadpoolctl-3.2.0-py3-none-any.whl", hash = "sha256:2b7818516e423bdaebb97c723f86a7c6b0a83d3f3b0970328d66f4d9104dc032"}, + {file = "threadpoolctl-3.2.0.tar.gz", hash = "sha256:c96a0ba3bdddeaca37dc4cc7344aafad41cdb8c313f74fdfe387a867bba93355"}, ] [[package]] @@ -3503,13 +3748,13 @@ test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6. [[package]] name = "typing-extensions" -version = "4.6.3" +version = "4.7.1" description = "Backported and Experimental Type Hints for Python 3.7+" optional = false python-versions = ">=3.7" files = [ - {file = "typing_extensions-4.6.3-py3-none-any.whl", hash = "sha256:88a4153d8505aabbb4e13aacb7c486c2b4a33ca3b3f807914a9b4c844c471c26"}, - {file = "typing_extensions-4.6.3.tar.gz", hash = "sha256:d91d5919357fe7f681a9f2b5b4cb2a5f1ef0a1e9f59c4d8ff0d3491e05c0ffd5"}, + {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, + {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, ] [[package]] @@ -3552,13 +3797,13 @@ files = [ [[package]] name = "virtualenv" -version = "20.23.1" +version = "20.24.0" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.7" files = [ - {file = "virtualenv-20.23.1-py3-none-any.whl", hash = "sha256:34da10f14fea9be20e0fd7f04aba9732f84e593dac291b757ce42e3368a39419"}, - {file = "virtualenv-20.23.1.tar.gz", hash = "sha256:8ff19a38c1021c742148edc4f81cb43d7f8c6816d2ede2ab72af5b84c749ade1"}, + {file = "virtualenv-20.24.0-py3-none-any.whl", hash = "sha256:18d1b37fc75cc2670625702d76849a91ebd383768b4e91382a8d51be3246049e"}, + {file = "virtualenv-20.24.0.tar.gz", hash = "sha256:e2a7cef9da880d693b933db7654367754f14e20650dc60e8ee7385571f8593a3"}, ] [package.dependencies] @@ -3581,6 +3826,43 @@ files = [ {file = "voluptuous-0.13.1.tar.gz", hash = "sha256:e8d31c20601d6773cb14d4c0f42aee29c6821bbd1018039aac7ac5605b489723"}, ] +[[package]] +name = "wandb" +version = "0.15.5" +description = "A CLI and library for interacting with the Weights and Biases API." +optional = false +python-versions = ">=3.6" +files = [ + {file = "wandb-0.15.5-py3-none-any.whl", hash = "sha256:8cfb8fdaaf0a35b636d0ca2c2c1262b0e3d835ac37f70fc3094b618f55f63f01"}, + {file = "wandb-0.15.5.tar.gz", hash = "sha256:40c1d9ae501194bff408bc9c555865ffcccf08d2d65dd413547df0c17ed20cb5"}, +] + +[package.dependencies] +appdirs = ">=1.4.3" +Click = ">=7.1,<8.0.0 || >8.0.0" +docker-pycreds = ">=0.4.0" +GitPython = ">=1.0.0,<3.1.29 || >3.1.29" +pathtools = "*" +protobuf = {version = ">=3.19.0,<4.21.0 || >4.21.0,<5", markers = "python_version > \"3.9\" or sys_platform != \"linux\""} +psutil = ">=5.0.0" +PyYAML = "*" +requests = ">=2.0.0,<3" +sentry-sdk = ">=1.0.0" +setproctitle = "*" +setuptools = "*" + +[package.extras] +async = ["httpx (>=0.22.0)"] +aws = ["boto3"] +azure = ["azure-identity", "azure-storage-blob"] +gcp = ["google-cloud-storage"] +grpc = ["grpcio (>=1.27.2)"] +kubeflow = ["google-cloud-storage", "kubernetes", "minio", "sh"] +launch = ["awscli", "azure-containerregistry", "azure-identity", "azure-storage-blob", "boto3", "botocore", "chardet", "google-auth", "google-cloud-artifact-registry", "google-cloud-compute", "google-cloud-storage", "iso8601", "kubernetes", "nbconvert", "nbformat", "optuna", "typing-extensions"] +media = ["bokeh", "moviepy", "numpy", "pillow", "plotly", "rdkit-pypi", "soundfile"] +models = ["cloudpickle"] +sweeps = ["sweeps (>=0.2.0)"] + [[package]] name = "wasabi" version = "1.1.1" @@ -3606,6 +3888,20 @@ files = [ {file = "wcwidth-0.2.6.tar.gz", hash = "sha256:a5220780a404dbe3353789870978e472cfe477761f06ee55077256e509b156d0"}, ] +[[package]] +name = "win32-setctime" +version = "1.1.0" +description = "A small Python utility to set file creation time on Windows" +optional = false +python-versions = ">=3.5" +files = [ + {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, + {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, +] + +[package.extras] +dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] + [[package]] name = "wrapt" version = "1.14.1" @@ -3893,4 +4189,4 @@ test = ["zope.testing"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "405d67996eae5299b1ca3e60712389ce8b793fc64d74d936b28d747e20315ce5" +content-hash = "cb53df7c0ffa68f5c40fe32b94d716f6ee3d944bb795e67d653389a0c0070d93" diff --git a/pyproject.toml b/pyproject.toml index 7f431e5d..edcc4bf9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,12 +13,18 @@ scikit-learn = "1.2.2" pandas = "1.5.3" wasabi = "1.1.1" typer = "^0.9.0" -datasets = "^2.12.0" +datasets = "2.13.1" accelerate = "^0.19.0" dvc = {extras = ["s3"], version = "^2.58.2"} -torch = {version = "2.0.1", source = "torch-cpu"} +# torch-cpu is not available anymore! +# torch = {version = "2.0.1", source = "torch-cpu"} +torch = "2.0.1" transformers = "4.29.2" libpecos = "^1.0.0" +loguru = "^0.7.0" +wandb = "^0.15.4" +openai = "0.27.8" +openai-multi-client = "^0.1.1" [tool.poetry.group.dev] diff --git a/scripts/create_xlinear_bertmesh_comparison_csv.py b/scripts/create_xlinear_bertmesh_comparison_csv.py index 48e00ff0..1f525a2c 100644 --- a/scripts/create_xlinear_bertmesh_comparison_csv.py +++ b/scripts/create_xlinear_bertmesh_comparison_csv.py @@ -89,6 +89,8 @@ def create_comparison_csv( num_samples_per_cat: int, mesh_metadata_path: str, mesh_terms_list_path: str, + active_portfolio_path: str, + active_portfolio_sample: int, pre_annotate_bertmesh: bool, bertmesh_path: str, bertmesh_thresh: float, @@ -121,11 +123,23 @@ def create_comparison_csv( grants_sample = all_grants.groupby("for_first_level_name", group_keys=False).apply( lambda x: x.sample(min(len(x), num_samples_per_cat)) ) + grants_sample["active_portfolio"] = 0 + + # Add active portfolio + active_grants = pd.read_csv(active_portfolio_path) + active_grants = active_grants[~active_grants["Synopsis"].isna()] + active_grants.sample(frac=1) + active_grants_sample = active_grants.iloc[:active_portfolio_sample] + active_grants_sample = pd.DataFrame({"abstract": active_grants_sample["Synopsis"]}) + active_grants_sample["active_portfolio"] = 1 + grants_sample = pd.concat([grants_sample, active_grants_sample]) abstracts = grants_sample["abstract"].tolist() + print(f"{len(abstracts)} abstracts to tag") # Annotate with bertmesh if pre_annotate_bertmesh: + print("Tagging with bertmesh") tags = predict_tags_bertmesh( abstracts, bertmesh_path, @@ -141,6 +155,7 @@ def create_comparison_csv( # Annotate with xlinear if pre_annotate_xlinear: + print("Tagging with xlinear") model = MeshXLinear( model_path=xlinear_path, label_binarizer_path=xlinear_label_binarizer_path, @@ -187,6 +202,8 @@ def create_comparison_csv( parser.add_argument("--num-samples-per-cat", type=int, default=10) parser.add_argument("--mesh-metadata-path", type=str) parser.add_argument("--mesh-terms-list-path", type=str) + parser.add_argument("--active-portfolio-path", type=str) + parser.add_argument("--active-portfolio-sample", type=int, default=200) parser.add_argument("--pre-annotate-bertmesh", action="store_true") parser.add_argument( "--bertmesh-path", type=str, default="Wellcome/WellcomeBertMesh" @@ -206,6 +223,8 @@ def create_comparison_csv( num_samples_per_cat=args.num_samples_per_cat, mesh_metadata_path=args.mesh_metadata_path, mesh_terms_list_path=args.mesh_terms_list_path, + active_portfolio_path=args.active_portfolio_path, + active_portfolio_sample=args.active_portfolio_sample, pre_annotate_bertmesh=args.pre_annotate_bertmesh, bertmesh_path=args.bertmesh_path, bertmesh_thresh=args.bertmesh_thresh, diff --git a/scripts/mesh_json_to_jsonl.py b/scripts/mesh_json_to_jsonl.py new file mode 100644 index 00000000..11d8efa3 --- /dev/null +++ b/scripts/mesh_json_to_jsonl.py @@ -0,0 +1,127 @@ +import json +from argparse import ArgumentParser +import numpy as np +from loguru import logger + + +def process_data(item, filter_tags: list = None, filter_years: list = None): + check_tags = filter_tags is not None + check_years = filter_years is not None + + if check_tags and "meshMajor" not in item: + logger.warning("`meshMajor` not found in the fields. Unable to filter tags.") + check_tags = False + + if check_years and "year" not in item: + logger.warning("`year` not found in the fields. Unable to filter tags.") + check_years = False + + if check_tags: + if filter_tags is None: + filter_tags = [] + if len(filter_tags) > 0 and not any(np.isin(filter_tags, item["meshMajor"])): + return False + + if check_years: + if filter_years is None: + filter_years = [] + + # Making sure it's str and not int + filter_years = [str(y) for y in filter_years] + if len(filter_years) > 0 and not any( + np.isin(filter_years, [str(item["year"])]) + ): + return False + + return True + + +def mesh_json_to_jsonl( + input_path, + output_path, + input_encoding="latin1", + output_encoding="latin1", + filter_tags: str = "", + filter_years: str = "", + show_progress: bool = True, +): + """ + Mesh json is not optimized for parallel processing. + This script aims to transform it into `jsonl` so that, + by having 1 json per line, you can evenly distribute it among all the cores. + + Args: + input_path: allMeSH_2021.json (or similar) path + output_path: path for the resulted jsonl file + input_encoding: encoding of the input json (default `latin1`) + output_encoding: encoding of the output jsonl (default `latin1`) + filter_tags: tags separated by commas(T1,T2,T3) to only include the entries with those + filter_years: years separated by commas(2008,2009) to only include the entries of those + show_progress: print the number of line you are processing + Returns: + + """ # noqa + filter_tags_list = list(filter(lambda x: x.strip() != "", filter_tags.split(","))) + filter_years_list = list(filter(lambda x: x.strip() != "", filter_years.split(","))) + with open(output_path, "w", encoding=output_encoding) as fw: + with open(input_path, "r", encoding=input_encoding) as fr: + for idx, line in enumerate(fr): + if show_progress: + print(idx, end="\r") + # Skip 1st line + if idx == 0: + logger.info(f"Skipping first line (articles): {line}") + continue + try: + sample = json.loads(line[:-2]) + except json.JSONDecodeError: + logger.warning(f"Skipping line in bad json format: {line}") + continue + if process_data(sample, filter_tags_list, filter_years_list): + fw.write(json.dumps(sample)) + fw.write("\n") + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument( + "--input_path", + required=True, + help="path to input allMeSH_2021.json (or equivalent)", + ) + parser.add_argument("--output_path", required=True, help="path to ioutput jsonl") + parser.add_argument( + "--input_encoding", + required=False, + default="latin1", + help="encoding of the input json", + ) + parser.add_argument( + "--output_encoding", + required=False, + default="latin1", + help="encoding of the output jsonl", + ) + parser.add_argument( + "--filter_tags", + required=False, + default="", + help="comma-separated tags to include (the rest will be discarded)", + ) + parser.add_argument( + "--filter_years", + required=False, + default="", + help="comma-separated years to include (the rest will be discarded)", + ) + + args = parser.parse_args() + + mesh_json_to_jsonl( + args.input_path, + args.output_path, + args.input_encoding, + args.output_encoding, + args.filter_tags, + args.filter_years, + ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_preprocess_mesh.py b/tests/test_preprocess_mesh.py index 07007cdb..d0e11a2d 100644 --- a/tests/test_preprocess_mesh.py +++ b/tests/test_preprocess_mesh.py @@ -3,45 +3,36 @@ from grants_tagger_light.preprocessing.preprocess_mesh import ( preprocess_mesh, - process_data, ) +from scripts.mesh_json_to_jsonl import process_data, mesh_json_to_jsonl +import pytest -def test_preprocess_mesh(): - item = { - "abstractText": "This is an abstract", - "meshMajor": ["T1", "T2"], - "journal": "Journal", - "year": 2018, - } - with tempfile.NamedTemporaryFile(mode="r+") as input_tmp: - input_tmp.write("\n" + json.dumps(item) + ", ") - input_tmp.seek(0) - output_tmp = tempfile.NamedTemporaryFile(mode="r+") - preprocess_mesh(input_tmp.name, output_tmp.name) - output_tmp.seek(0) - expected_processed_item = { - "text": "This is an abstract", - "tags": ["T1", "T2"], - "meta": {"journal": "Journal", "year": 2018}, - } - assert output_tmp.read() == json.dumps(expected_processed_item) + "\n" +jsonl_data = """{"journal":"dummyJournal","meshMajor":["COVID-19","SARS-CoV-2"],"year":"2023","abstractText":"This is an article about coronavirus.","title":"article1","pmid":"pmid1"} +{"journal":"dummyJournal","meshMajor":["Malaria"],"year":"2023","abstractText":"This is an article about malaria", "title": "article3", "pmid": "pmid3"}""" # noqa +json_data = """{"articles":[ +{"journal":"dummyJournal","meshMajor":["COVID-19","SARS-CoV-2"],"year":"2023","abstractText":"This is an article about coronavirus.","title":"article1","pmid":"pmid1"}, +{"journal":"dummyJournal","meshMajor":["Malaria"],"year":"2023","abstractText":"This is an article about malaria", "title": "article3", "pmid": "pmid3"}, +""" # noqa -def test_process_data(): - item = { - "abstractText": "This is an abstract", - "meshMajor": ["T1", "T2"], - "journal": "Journal", - "year": 2018, - } - expected_processed_item = { - "text": "This is an abstract", - "tags": ["T1", "T2"], - "meta": {"journal": "Journal", "year": 2018}, - } - processed_item = process_data(item) - assert processed_item == expected_processed_item + +@pytest.fixture +def json_data_path(): + with tempfile.TemporaryDirectory() as tmpdirname: + data_path = tmpdirname + "/data.json" + with open(data_path, "w") as f: + f.write(json_data) + yield data_path + + +@pytest.fixture +def jsonl_data_path(): + with tempfile.TemporaryDirectory() as tmpdirname: + data_path = tmpdirname + "/data.jsonl" + with open(data_path, "w") as f: + f.write(jsonl_data) + yield data_path def test_process_data_with_filter_tags(): @@ -51,13 +42,7 @@ def test_process_data_with_filter_tags(): "journal": "Journal", "year": 2018, } - expected_processed_item = { - "text": "This is an abstract", - "tags": ["T1"], - "meta": {"journal": "Journal", "year": 2018}, - } - processed_item = process_data(item, filter_tags=["T1"]) - assert processed_item == expected_processed_item + assert process_data(item, filter_tags=["T1"]) is True def test_process_data_with_missing_filter_tag(): @@ -67,8 +52,7 @@ def test_process_data_with_missing_filter_tag(): "journal": "Journal", "year": 2018, } - processed_item = process_data(item, filter_tags=["T3"]) - assert processed_item is None + assert process_data(item, filter_tags=["T3"]) is False def test_process_data_with_filter_years(): @@ -78,13 +62,40 @@ def test_process_data_with_filter_years(): "journal": "Journal", "year": 2018, } - processed_item = process_data(item, filter_years="2019,2020") - assert processed_item is None + assert process_data(item, filter_years=["2019", "2020"]) is False item["year"] = 2020 - expected_processed_item = { - "text": "This is an abstract", - "tags": ["T1", "T2"], - "meta": {"journal": "Journal", "year": 2020}, + assert process_data(item, filter_years=["2019", "2020"]) is True + + +def test_process_data_with_filter_years_and_tags(): + item = { + "abstractText": "This is an abstract", + "meshMajor": ["T1", "T2"], + "journal": "Journal", + "year": 2020, } - processed_item = process_data(item, filter_years="2019,2020") - assert processed_item == expected_processed_item + assert process_data(item, filter_years=["2019", "2020"], filter_tags=["T1"]) is True + assert process_data(item, filter_years=["2018"], filter_tags=["T1"]) is False + assert process_data(item, filter_years=["2020"], filter_tags=["T3"]) is False + + +def test_json_to_jsonl(json_data_path): + output_tmp = tempfile.NamedTemporaryFile(mode="w") + mesh_json_to_jsonl(json_data_path, output_tmp.name, show_progress=False) + + with open(output_tmp.name, "r") as f: + result = [json.loads(jline) for jline in f.read().splitlines()] + assert len(result) == 2 + + output_tmp.close() + + +def test_preprocess_mesh(jsonl_data_path): + dset, label2id, id2label = preprocess_mesh( + data_path=jsonl_data_path, model_key="", num_proc=2, batch_size=1, test_size=0.5 + ) + assert "train" in dset + assert "test" in dset + assert len(dset["train"]) == 1 + assert len(dset["test"]) == 1 + assert len(list(label2id.keys())) == 3 diff --git a/tests/test_split_data.py b/tests/test_split_data.py index c4e9ba6d..a225b63b 100644 --- a/tests/test_split_data.py +++ b/tests/test_split_data.py @@ -27,12 +27,12 @@ def test_split_data(): examples = 0 with open(train_output_path) as f: - for line in f: + for _ in f: examples += 1 assert examples == 9 examples = 0 with open(test_output_path) as f: - for line in f: + for _ in f: examples += 1 assert examples == 1 diff --git a/tests/test_train.py b/tests/test_train.py index d34b0f77..c12a4b2b 100644 --- a/tests/test_train.py +++ b/tests/test_train.py @@ -1,23 +1,14 @@ from grants_tagger_light.training.train import train_bertmesh +from grants_tagger_light.training.cli_args import ( + BertMeshModelArguments, + BertMeshTrainingArguments, +) import tempfile import pytest -import json # Note dummy data is not necessarily annotated correctly -dummy_data = [ - { - "text": "This grant is about malaria", - "tags": ["Humans", "Malaria"], - }, - { - "text": "This grant is about HIV", - "tags": ["HIV Infections", "Humans"], - }, - { - "text": "This grant is about diabetes", - "tags": ["Diabetes Mellitus", "Humans"], - }, -] +dummy_data = """{"journal":"dummyJournal","meshMajor":["COVID-19","SARS-CoV-2"],"year":"2023","abstractText":"This is an article about coronavirus.","title":"article1","pmid":"pmid1"} +{"journal":"dummyJournal","meshMajor":["Malaria"],"year":"2023","abstractText":"This is an article about malaria", "title": "article3", "pmid": "pmid3"}""" # noqa @pytest.fixture @@ -25,8 +16,7 @@ def data_path(): with tempfile.TemporaryDirectory() as tmpdirname: data_path = tmpdirname + "/data.jsonl" with open(data_path, "w") as f: - for sample in dummy_data: - f.write(json.dumps(sample) + "\n") + f.write(dummy_data) yield data_path @@ -36,17 +26,38 @@ def save_path(): yield tmpdirname + "/model" -def test_train_bertmesh(data_path, save_path): - model_key = "Wellcome/WellcomeBertMesh" - +def _train_bertmesh_from_model_key(data_path, save_path, model_key): # 1 train step, 1 eval step, save after training - user_args = { - "output_dir": save_path, - "max_steps": 1, - "evaluation_strategy": "steps", - "eval_steps": 1, - "save_strategy": "steps", - "save_steps": 1, - } - - train_bertmesh(model_key, data_path, **user_args) + training_args = BertMeshTrainingArguments( + output_dir=save_path, + max_steps=1, + per_device_train_batch_size=2, + per_device_eval_batch_size=2, + evaluation_strategy="no", + save_strategy="no", + report_to="none", + no_cuda=True, + num_train_epochs=1, + dataloader_num_workers=1, + ) + + model_args = BertMeshModelArguments() + + train_bertmesh( + model_key=model_key, + data_path=data_path, + max_samples=-1, + training_args=training_args, + model_args=model_args, + num_proc=1, + test_size=0.5, + shards=1, + ) + + +def test_train_bertmesh_from_model_key(data_path, save_path): + _train_bertmesh_from_model_key(data_path, save_path, "Wellcome/WellcomeBertMesh") + + +def test_train_bertmesh_from_scratch(data_path, save_path): + _train_bertmesh_from_model_key(data_path, save_path, "")