diff --git a/.all-contributorsrc b/.all-contributorsrc index bc6e83c6f7b..5b4285e8946 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -209,7 +209,9 @@ "avatar_url": "https://avatars.githubusercontent.com/u/54197164?v=4", "profile": "https://github.com/achieveordie", "contributions": [ - "test" + "bug", + "code", + "test" ] }, { @@ -285,6 +287,14 @@ "tutorial" ] }, + { + "login": "vnicholson1", + "name": "Vincent Nicholson", + "profile": "https://github.com/vnicholson1", + "contributions": [ + "code" + ] + }, { "login": "lnthach", "name": "Thach Le Nguyen", @@ -309,6 +319,14 @@ "tutorial" ] }, + { + "login": "Riyabelle25", + "name": "Riya Elizabeth John", + "avatar_url": "https://avatars.githubusercontent.com/u/55790848?v=4", + "contributions": [ + "code" + ] + }, { "login": "ninfueng", "name": "Ninnart Fuengfusin", @@ -2376,6 +2394,15 @@ "code" ] }, + { + "login": "sz85512678", + "name": "Zhen Shao", + "avatar_url": "https://avatars.githubusercontent.com/sz85512678", + "profile": "https://github.com/sz85512678", + "contributions": [ + "code" + ] + }, { "login": "Vasudeva-bit", "name": "Vasudeva Kilaru", @@ -2421,6 +2448,16 @@ "avatar_url": "https://avatars.githubusercontent.com/u/19709277?v=4", "profile": "https://github.com/adamkells", "contributions": [ + "test" + ] + }, + { + "login": "YHallouard", + "name": "Yann Hallouard", + "avatar_url": "https://avatars.githubusercontent.com/YHallouard", + "profile": "https://www.linkedin.com/in/yann-hallouard/", + "contributions": [ + "code", "test" ] }, @@ -2442,6 +2479,101 @@ "bug", "code" ] + }, + { + "login": "rahulporuri", + "name": "Poruri Sai Rahul", + "avatar_url": "https://avatars.githubusercontent.com/u/1926457?v=4", + "profile": "https://github.com/rahulporuri", + "contributions": [ + "doc" + ] + }, + { + "login": "fspinna", + "name": "Francesco Spinnato", + "avatar_url": "https://avatars.githubusercontent.com/u/35352023?v=4", + "profile": "https://github.com/fspinna", + "contributions": [ + "code" + ] + }, + { + "login": "sbuse", + "name": "Simon B.", + "avatar_url": "https://avatars.githubusercontent.com/u/24408707?v=4", + "profile": "https://github.com/sbuse", + "contributions": [ + "code" + ] + }, + { + "login": "sd2k", + "name": "Ben Sully", + "avatar_url": "https://avatars.githubusercontent.com/u/5464991?&v=4", + "profile": "https://github.com/sd2k", + "contributions": [ + "bug", + "code" + ] + }, + { + "login": "wayneadams", + "name": "Wayne Adams", + "avatar_url": "https://avatars.githubusercontent.com/u/15034841?s=400&u=d717e9945910bcc844c5e64cd56d570c6cc4e8e6&v=4", + "profile": "https://github.com/wayneadams", + "contributions": [ + "doc" + ] + }, + { + "login": "sssilvar", + "name": "Santiago Smith Silva", + "avatar_url": "https://avatars.githubusercontent.com/u/16252054?v=4", + "profile": "https://github.com/sssilvar", + "contributions": [ + "code" + ] + }, + { + "login": "DManowitz", + "name": "David Manowitz", + "avatar_url": "https://avatars.githubusercontent.com/u/66927103?v=4", + "profile": "https://github.com/DManowitz", + "contributions": [ + "bug", + "maintenance" + ] + }, + { + "login": "ninedigits", + "name": "Max Frohlich", + "avatar_url": "https://avatars.githubusercontent.com/u/16393653?v=4", + "profile": "https://www.linkedin.com/in/maxfrohlich/", + "contributions": [ + "code", + "ideas", + "maintenance" + ] + }, + { + "login": "steenrotsman", + "name": "Stijn J. Rotman", + "avatar_url": "https://avatars.githubusercontent.com/u/78110080?s=400&v=4", + "profile": "https://github.com/steenrotsman", + "contributions": [ + "code", + "doc" + ] + }, + { + "login": "tvdboom", + "name": "Mavs", + "avatar_url": "https://avatars.githubusercontent.com/u/32366550?v=4", + "profile": "https://github.com/tvdboom", + "contributions": [ + "code" + ] } ] } diff --git a/.binder/Dockerfile b/.binder/Dockerfile index e7cbaef9a80..8932745f889 100644 --- a/.binder/Dockerfile +++ b/.binder/Dockerfile @@ -1,9 +1,7 @@ # This Dockerfile is used to build sktime when launching binder. # Find out more at: https://mybinder.readthedocs.io/en/latest/index.html -# Load jupyter python 3.8 image -# 3.8 is the highest currently supported version we can use -FROM jupyter/scipy-notebook:python-3.8.8 +FROM jupyter/scipy-notebook:python-3.11.6 # Set up user to avoid running as root ARG NB_USER ARG NB_UID diff --git a/.github/actions/test-base/action.yml b/.github/actions/test-base/action.yml index c87d95235b9..5e6fc1196b9 100644 --- a/.github/actions/test-base/action.yml +++ b/.github/actions/test-base/action.yml @@ -15,23 +15,41 @@ runs: steps: - name: repository checkout step uses: actions/checkout@v4 + - name: update tracking reference step run: git remote set-branches origin main shell: bash + - name: shallow clone update step run: git fetch --depth 1 shell: bash + - name: python environment step uses: actions/setup-python@v4 with: python-version: ${{ inputs.python-version-identifier }} - - name: dependencies installation step + + - name: Display Python version + run: python -c "import sys; print(sys.version)" + shell: bash + + - name: Install sktime and dependencies run: python3 -m pip install .[tests] shell: bash + + - name: Show dependencies + run: python -m pip list + shell: bash + + - name: Show available branches + run: git branch -a + shell: bash + - name: unit test step - run: python3 -m pytest sktime/base --matrixdesign ${{ inputs.sub-sample-estimators }} --only_changed_modules ${{ inputs.test-affected-estimators }} + run: >- + python3 + -m pytest + sktime/base + --matrixdesign ${{ inputs.sub-sample-estimators }} + --only_changed_modules ${{ inputs.test-affected-estimators }} shell: bash - - name: test coverage step - uses: codecov/codecov-action@v3 - with: - flags: ${{ inputs.python-version-identifier }},base diff --git a/.github/actions/test-component/action.yml b/.github/actions/test-component/action.yml index 4b154aa1f90..3c1d72eac2e 100644 --- a/.github/actions/test-component/action.yml +++ b/.github/actions/test-component/action.yml @@ -1,5 +1,5 @@ -name: test specific sktime component -description: run unit tests on individual component of sktime framework +name: test +description: test inputs: sktime-component-identifier: description: name of sktime component @@ -18,23 +18,41 @@ runs: steps: - name: repository checkout step uses: actions/checkout@v4 + - name: update tracking reference step run: git remote set-branches origin main shell: bash + - name: shallow clone update step run: git fetch --depth 1 shell: bash + - name: python environment step uses: actions/setup-python@v4 with: python-version: ${{ inputs.python-version-identifier }} - - name: dependencies installation step + + - name: Display Python version + run: python -c "import sys; print(sys.version)" + shell: bash + + - name: Install sktime and dependencies run: python3 -m pip install .[${{ inputs.sktime-component-identifier }},tests] shell: bash + + - name: Show dependencies + run: python -m pip list + shell: bash + + - name: Show available branches + run: git branch -a + shell: bash + - name: unit test step - run: python3 -m pytest sktime/${{ inputs.sktime-component-identifier }} --matrixdesign ${{ inputs.sub-sample-estimators }} --only_changed_modules ${{ inputs.test-affected-estimators }} + run: >- + python3 + -m pytest + sktime/${{ inputs.sktime-component-identifier }} + --matrixdesign ${{ inputs.sub-sample-estimators }} + --only_changed_modules ${{ inputs.test-affected-estimators }} shell: bash - - name: test coverage step - uses: codecov/codecov-action@v3 - with: - flags: ${{ inputs.python-version-identifier }},${{ inputs.sktime-component-identifier }} diff --git a/.github/actions/validate-extra/action.yml b/.github/actions/test-install/action.yml similarity index 94% rename from .github/actions/validate-extra/action.yml rename to .github/actions/test-install/action.yml index 4358b31b154..f5bf2b58645 100644 --- a/.github/actions/validate-extra/action.yml +++ b/.github/actions/test-install/action.yml @@ -1,4 +1,4 @@ -name: test specific sktime extra +name: test install description: install individual extra of sktime inputs: sktime-extra-identifier: @@ -12,10 +12,12 @@ runs: steps: - name: repository checkout step uses: actions/checkout@v4 + - name: python environment step uses: actions/setup-python@v4 with: python-version: ${{ inputs.python-version-identifier }} + - name: extra installation step run: python3 -m pip install .[${{ inputs.sktime-extra-identifier }}] shell: bash diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml index 685c74e8dec..3372a1100a8 100644 --- a/.github/workflows/cancel.yml +++ b/.github/workflows/cancel.yml @@ -8,6 +8,6 @@ jobs: cancel: runs-on: ubuntu-latest steps: - - uses: styfle/cancel-workflow-action@0.12.0 + - uses: styfle/cancel-workflow-action@0.12.1 with: workflow_id: ${{ github.event.workflow.id }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0ee0b597340..a08da3e6bed 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,14 +13,14 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 - id: file_changes uses: trilom/file-changes-action@v1.2.4 with: output: " " - name: List changed files run: echo '${{ steps.file_changes.outputs.files}}' - - uses: pre-commit/action@v3.0.0 + - uses: pre-commit/action@v3.0.1 with: extra_args: --files ${{ steps.file_changes.outputs.files}} - name: Check for missing init files @@ -33,7 +33,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 - name: Install dependencies @@ -44,13 +44,30 @@ jobs: run: build_tools/run_examples.sh shell: bash + run-blogpost-examples: + needs: code-quality + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.9 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install .[all_extras,binder,dev,mlflow] + - name: Run example notebooks + run: build_tools/run_blogposts.sh + shell: bash + test-nodevdeps: needs: code-quality runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 - name: Display Python version @@ -70,7 +87,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 - name: Display Python version @@ -97,7 +114,7 @@ jobs: - run: git fetch --depth 1 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 @@ -120,7 +137,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 - name: Display Python version @@ -147,7 +164,7 @@ jobs: - run: git fetch --depth 1 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 - name: Display Python version @@ -167,7 +184,7 @@ jobs: run: make PYTESTOPTIONS="--cov --cov-report=xml --only_cython_estimators=True --matrixdesign=False --timeout=600" test_check_suite - name: Publish code coverage - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 test-full: needs: test-nosoftdeps @@ -185,7 +202,7 @@ jobs: - run: git fetch --depth 1 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -194,7 +211,7 @@ jobs: - name: Install sktime and dependencies run: | - python -m pip install .[all_extras_pandas2,dev] --no-cache-dir + python -m pip install .[all_extras_pandas2,dev,dl] --no-cache-dir - name: Show dependencies run: python -m pip list @@ -203,10 +220,10 @@ jobs: run: git branch -a - name: Run tests - run: make test + run: make test_without_datasets - name: Publish code coverage - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 test-unix-pandas1: needs: test-nosoftdeps @@ -223,7 +240,7 @@ jobs: - run: git fetch --depth 1 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -241,7 +258,7 @@ jobs: run: git branch -a - name: Run tests - run: make test + run: make test_without_datasets - name: Publish code coverage - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 diff --git a/.github/workflows/test_all.yml b/.github/workflows/test_all.yml index c30a1547d36..3cdc7ffab96 100644 --- a/.github/workflows/test_all.yml +++ b/.github/workflows/test_all.yml @@ -1,17 +1,17 @@ -name: test all workflow +name: test all on: schedule: - cron: 0 0 * * 0 workflow_dispatch: jobs: code_quality: - name: validate code quality + name: code quality runs-on: ubuntu-latest steps: - name: repository checkout step uses: actions/checkout@v4 - name: python environment step - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.8" - name: install pre-commit @@ -23,7 +23,7 @@ jobs: shell: bash test_base: needs: code_quality - name: test base framework + name: base strategy: fail-fast: false matrix: @@ -47,9 +47,13 @@ jobs: python-version-identifier: ${{ matrix.python-version }} sub-sample-estimators: "False" test-affected-estimators: "False" - test_components: + - name: upload coverage + uses: codecov/codecov-action@v4 + with: + flags: ${{ matrix.operating-system }},${{ matrix.python-version }},base,complete + test_module: needs: code_quality - name: test individual components + name: module strategy: fail-fast: false matrix: @@ -84,3 +88,59 @@ jobs: python-version-identifier: ${{ matrix.python-version }} sub-sample-estimators: "False" test-affected-estimators: "False" + - name: upload coverage + uses: codecov/codecov-action@v4 + with: + flags: ${{ matrix.operating-system }},${{ matrix.python-version }},${{ matrix.sktime-component }},complete + test_other: + needs: code_quality + name: other + strategy: + fail-fast: false + matrix: + python-version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" + - "3.12" + operating-system: + - macos-latest + - ubuntu-latest + - windows-latest + runs-on: ${{ matrix.operating-system }} + steps: + - name: checkout pull request branch + uses: actions/checkout@v4 + - name: update local git tracking reference + run: git remote set-branches origin main + - name: update local shallow clone + run: git fetch --depth 1 + - name: create python virtual environment + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: install core, test and all soft dependencies + run: python3 -m pip install .[all_extras_pandas2,tests] + - name: run unit tests + run: >- + python3 + -m pytest + sktime + --ignore sktime/base + --ignore sktime/datasets + --ignore sktime/alignment + --ignore sktime/annotation + --ignore sktime/classification + --ignore sktime/clustering + --ignore sktime/forecasting + --ignore sktime/networks + --ignore sktime/param_est + --ignore sktime/regression + --ignore sktime/transformations + --matrixdesign False + --only_changed_modules False + - name: upload coverage + uses: codecov/codecov-action@v4 + with: + flags: ${{ matrix.operating-system }},${{ matrix.python-version }},components_without_extras,complete diff --git a/.github/workflows/test_base.yml b/.github/workflows/test_base.yml index 921ede71977..0684c735f04 100644 --- a/.github/workflows/test_base.yml +++ b/.github/workflows/test_base.yml @@ -1,24 +1,25 @@ -name: test base workflow +name: base on: workflow_call: jobs: detect: - name: check for changes in base framework + name: detect runs-on: ubuntu-latest permissions: pull-requests: read outputs: base: ${{ steps.filter.outputs.base }} steps: - - uses: dorny/paths-filter@v2 + - uses: dorny/paths-filter@v3 id: filter with: filters: | base: + - pyproject.toml - sktime/base/** test: needs: detect - name: test base framework + name: if: ${{ needs.detect.outputs.base == 'true' }} strategy: fail-fast: false @@ -37,9 +38,15 @@ jobs: steps: - name: checkout pull request branch uses: actions/checkout@v4 + - name: run tests on python ${{ matrix.python-version }} uses: ./.github/actions/test-base with: python-version-identifier: ${{ matrix.python-version }} sub-sample-estimators: "True" test-affected-estimators: "True" + + - name: upload coverage + uses: codecov/codecov-action@v4 + with: + flags: ${{ matrix.operating-system }},${{ matrix.python-version }},base,incomplete diff --git a/.github/workflows/test_code_quality.yml b/.github/workflows/test_code_quality.yml deleted file mode 100644 index 20b2eb76be2..00000000000 --- a/.github/workflows/test_code_quality.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: validate code quality workflow -on: - pull_request: - branches: - - main -jobs: - code_quality: - name: test code quality - runs-on: ubuntu-latest - steps: - - name: repository checkout step - uses: actions/checkout@v4 - - name: python environment step - uses: actions/setup-python@v4 - with: - python-version: "3.8" - - name: install pre-commit - run: python3 -m pip install pre-commit - - id: changed-files - name: identify modified files - uses: tj-actions/changed-files@v40 - - name: run pre-commit hooks on modified files - run: pre-commit run --color always --files ${{ steps.changed-files.outputs.all_changed_files }} --show-diff-on-failure - - name: check missing __init__ files - run: build_tools/fail_on_missing_init_files.sh - shell: bash - unit_test_base: - needs: code_quality - name: run unit tests for base framework - uses: ./.github/workflows/test_base.yml - unit_test_components: - needs: code_quality - name: run unit tests for individual components - uses: ./.github/workflows/test_components.yml diff --git a/.github/workflows/test_datasets.yml b/.github/workflows/test_datasets.yml new file mode 100644 index 00000000000..cc0ea1d9d6e --- /dev/null +++ b/.github/workflows/test_datasets.yml @@ -0,0 +1,85 @@ +name: datasets +on: + schedule: + - cron: 0 0 1 * * + workflow_call: + workflow_dispatch: +jobs: + test_internal_data: + name: onboard + strategy: + fail-fast: false + matrix: + python-version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" + - "3.12" + operating-system: + - macos-latest + - ubuntu-latest + - windows-latest + runs-on: ${{ matrix.operating-system }} + steps: + - name: repository checkout step + uses: actions/checkout@v4 + - name: update tracking reference step + run: git remote set-branches origin main + shell: bash + - name: shallow clone update step + run: git fetch --depth 1 + shell: bash + - name: python environment step + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: dependencies installation step + run: python3 -m pip install .[tests] + shell: bash + - name: unit test step + run: python3 -m pytest -m "not datadownload" sktime/datasets + shell: bash + - name: upload coverage step + uses: codecov/codecov-action@v4 + with: + flags: ${{ matrix.operating-system }},${{ matrix.python-version }},datasets + test_external_data: + name: downloads + strategy: + fail-fast: false + matrix: + python-version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" + - "3.12" + operating-system: + - macos-latest + - ubuntu-latest + - windows-latest + runs-on: ${{ matrix.operating-system }} + steps: + - name: repository checkout step + uses: actions/checkout@v4 + - name: update tracking reference step + run: git remote set-branches origin main + shell: bash + - name: shallow clone update step + run: git fetch --depth 1 + shell: bash + - name: python environment step + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: dependencies installation step + run: python3 -m pip install .[tests] + shell: bash + - name: unit test step + run: python3 -m pytest -m "datadownload" sktime/datasets + shell: bash + - name: upload coverage step + uses: codecov/codecov-action@v4 + with: + flags: ${{ matrix.operating-system }},${{ matrix.python-version }},datasets diff --git a/.github/workflows/validate_extras.yml b/.github/workflows/test_install.yml similarity index 84% rename from .github/workflows/validate_extras.yml rename to .github/workflows/test_install.yml index 1f0f8225904..f2820fcee9a 100644 --- a/.github/workflows/validate_extras.yml +++ b/.github/workflows/test_install.yml @@ -1,16 +1,12 @@ -name: extras validation +name: test softdep set install on: - pull_request: - branches: - - main - paths: - - pyproject.toml schedule: - cron: 0 0 1 * * + workflow_call: workflow_dispatch: jobs: test_installations: - name: extras installation + name: install strategy: fail-fast: false matrix: @@ -25,6 +21,7 @@ jobs: - ubuntu-latest - windows-latest sktime-extra: + - all_extras_pandas2 - alignment - annotation - classification @@ -39,7 +36,7 @@ jobs: - name: checkout pull request branch uses: actions/checkout@v4 - name: install ${{ matrix.sktime-extra }} extra on python ${{ matrix.python-version }} - uses: ./.github/actions/validate-extra + uses: ./.github/actions/test-install with: sktime-extra-identifier: ${{ matrix.sktime-extra }} python-version-identifier: ${{ matrix.python-version }} diff --git a/.github/workflows/test_main.yml b/.github/workflows/test_main.yml new file mode 100644 index 00000000000..c9a6281f4e7 --- /dev/null +++ b/.github/workflows/test_main.yml @@ -0,0 +1,67 @@ +name: CI +on: + pull_request: + branches: + - main +jobs: + code_quality: + name: code quality + runs-on: ubuntu-latest + steps: + - name: repository checkout step + uses: actions/checkout@v4 + - name: python environment step + uses: actions/setup-python@v5 + with: + python-version: "3.8" + - name: install pre-commit + run: python3 -m pip install pre-commit + - id: changed-files + name: identify modified files + uses: tj-actions/changed-files@v42 + - name: run pre-commit hooks on modified files + run: pre-commit run --color always --files ${{ steps.changed-files.outputs.all_changed_files }} --show-diff-on-failure + - name: check missing __init__ files + run: build_tools/fail_on_missing_init_files.sh + shell: bash + detect: + needs: code_quality + name: detect changes + runs-on: ubuntu-latest + permissions: + pull-requests: read + outputs: + datasets: ${{ steps.filter.outputs.datasets }} + pyproject: ${{ steps.filter.outputs.pyproject }} + steps: + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + datasets: + - sktime/base/** + - sktime/datasets/** + pyproject: + - pyproject.toml + test: + needs: detect + name: install + if: ${{ needs.detect.outputs.pyproject == 'true' }} + uses: ./.github/workflows/test_install.yml + unit_test_base: + needs: code_quality + name: base + uses: ./.github/workflows/test_base.yml + unit_test_module: + needs: code_quality + name: module + uses: ./.github/workflows/test_module.yml + unit_test_other: + needs: code_quality + name: other + uses: ./.github/workflows/test_other.yml + unit_test_datasets: + needs: detect + name: datasets + if: ${{ needs.detect.outputs.datasets == 'true' }} + uses: ./.github/workflows/test_datasets.yml diff --git a/.github/workflows/test_components.yml b/.github/workflows/test_module.yml similarity index 57% rename from .github/workflows/test_components.yml rename to .github/workflows/test_module.yml index cf66c2a8e15..42ea6cf2739 100644 --- a/.github/workflows/test_components.yml +++ b/.github/workflows/test_module.yml @@ -1,40 +1,58 @@ -name: test individual components workflow +name: test module on: workflow_call: jobs: detect: - name: check for changes in individual components + name: detect runs-on: ubuntu-latest permissions: pull-requests: read outputs: - components: ${{ steps.filter.outputs.changes }} + module_changes: ${{ steps.filter.outputs.changes }} steps: - - uses: dorny/paths-filter@v2 + - uses: dorny/paths-filter@v3 id: filter with: filters: | alignment: + - pyproject.toml + - sktime/base/** - sktime/alignment/** annotation: + - pyproject.toml + - sktime/base/** - sktime/annotation/** classification: + - pyproject.toml + - sktime/base/** - sktime/classification/** clustering: + - pyproject.toml + - sktime/base/** - sktime/clustering/** forecasting: + - pyproject.toml + - sktime/base/** - sktime/forecasting/** networks: + - pyproject.toml + - sktime/base/** - sktime/networks/** param_est: + - pyproject.toml + - sktime/base/** - sktime/param_est/** regression: + - pyproject.toml + - sktime/base/** - sktime/regression/** transformations: + - pyproject.toml + - sktime/base/** - sktime/transformations/** test: needs: detect - name: test specific component + name: strategy: fail-fast: false matrix: @@ -48,15 +66,21 @@ jobs: - macos-latest - ubuntu-latest - windows-latest - sktime-component: ${{ fromJSON(needs.detect.outputs.components) }} + sktime-component: ${{ fromJSON(needs.detect.outputs.module_changes) }} runs-on: ${{ matrix.operating-system }} steps: - name: checkout pull request branch uses: actions/checkout@v4 - - name: run tests for component ${{ matrix.sktime-component }} on python ${{ matrix.python-version }} + + - name: ${{ matrix.sktime-component }}, ${{ matrix.operating-system }}, ${{ matrix.python-version }} uses: ./.github/actions/test-component with: sktime-component-identifier: ${{ matrix.sktime-component }} python-version-identifier: ${{ matrix.python-version }} sub-sample-estimators: "True" test-affected-estimators: "True" + + - name: upload coverage + uses: codecov/codecov-action@v4 + with: + flags: ${{ matrix.operating-system }},${{ matrix.python-version }},${{ matrix.sktime-component }},incomplete diff --git a/.github/workflows/test_other.yml b/.github/workflows/test_other.yml new file mode 100644 index 00000000000..76c1ad5f9fc --- /dev/null +++ b/.github/workflows/test_other.yml @@ -0,0 +1,85 @@ +name: test other +on: + workflow_call: +jobs: + detect: + name: detect + runs-on: ubuntu-latest + permissions: + pull-requests: read + outputs: + other_change: ${{ steps.filter.outputs.other_change }} + steps: + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + other_change: + - pyproject.toml + - sktime/base/** + - sktime/_contrib/** + - sktime/benchmarking/** + - sktime/datatypes/** + - sktime/distances/** + - sktime/dists_kernels/** + - sktime/performance_metrics/** + - sktime/pipeline/** + - sktime/proba/** + - sktime/registry/** + - sktime/series_as_features/** + - sktime/split/** + - sktime/tests/** + - sktime/utils/** + test_module: + needs: detect + name: + if: ${{ needs.detect.outputs.other_change == 'true' }} + strategy: + fail-fast: false + matrix: + python-version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" + - "3.12" + operating-system: + - macos-latest + - ubuntu-latest + - windows-latest + runs-on: ${{ matrix.operating-system }} + steps: + - name: checkout pull request branch + uses: actions/checkout@v4 + - name: update local git tracking reference + run: git remote set-branches origin main + - name: update local shallow clone + run: git fetch --depth 1 + - name: create python virtual environment + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: install core, test and all soft dependencies + run: python3 -m pip install .[all_extras_pandas2,tests] + - name: run unit tests + run: >- + python3 + -m pytest + sktime + --ignore sktime/base + --ignore sktime/datasets + --ignore sktime/alignment + --ignore sktime/annotation + --ignore sktime/classification + --ignore sktime/clustering + --ignore sktime/forecasting + --ignore sktime/networks + --ignore sktime/param_est + --ignore sktime/regression + --ignore sktime/transformations + --matrixdesign True + --only_changed_modules True + - name: upload coverage + uses: codecov/codecov-action@v4 + with: + flags: ${{ matrix.operating-system }},${{ matrix.python-version }},components_without_extras,incomplete diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index c482476a130..4f40c1f6891 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -12,7 +12,7 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.10' @@ -22,7 +22,7 @@ jobs: python -m build --wheel --sdist --outdir wheelhouse - name: Store wheels - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: wheels path: wheelhouse/* @@ -39,15 +39,18 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: wheels path: wheelhouse + - name: Display downloaded artifacts + run: ls -l wheelhouse + - name: Get wheel filename run: echo "WHEELNAME=$(ls ./wheelhouse/sktime-*none-any.whl)" >> $GITHUB_ENV @@ -55,7 +58,7 @@ jobs: run: python -m pip install "${{ env.WHEELNAME }}[all_extras_pandas2,dev]" - name: Run tests - run: make test + run: make test_without_datasets test_windows_wheels: needs: build_wheels @@ -64,75 +67,31 @@ jobs: strategy: fail-fast: false # to not fail all combinations if just one fail matrix: - include: - # Window 64 bit - - os: windows-latest - python: 38 - python-version: '3.8' - bitness: 64 - platform_id: win_amd64 - - os: windows-latest - python: 39 - python-version: '3.9' - bitness: 64 - platform_id: win_amd64 - - os: windows-latest - python: 310 - python-version: '3.10' - bitness: 64 - platform_id: win_amd64 - - os: windows-latest - python: 311 - python-version: '3.11' - bitness: 64 - platform_id: win_amd64 - - os: windows-latest - python: 312 - python-version: '3.12' - bitness: 64 - platform_id: win_amd64 + os: [windows-latest] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - - uses: conda-incubator/setup-miniconda@v2 + - uses: actions/setup-python@v5 with: - activate-environment: test - auto-update-conda: true python-version: ${{ matrix.python-version }} - channels: anaconda, conda-forge, - - - run: conda --version - - run: which python - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: wheels path: wheelhouse - - name: Install conda libpython - run: conda install -c anaconda -n test -y libpython - - name: Display downloaded artifacts run: ls -l wheelhouse - name: Get wheel filename run: echo "WHEELNAME=$(ls ./wheelhouse/sktime-*none-any.whl)" >> $env:GITHUB_ENV - - name: Activate conda env - run: conda activate test - - name: Install wheel and extras run: python -m pip install "${env:WHEELNAME}[all_extras_pandas2,dev]" - - name: Show conda packages - run: conda list -n test - - - name: Run tests - run: | - mkdir -p testdir/ - cp .coveragerc testdir/ - cp setup.cfg testdir/ - python -m pytest + - name: Run tests # explicit commands as windows does not support make + run: python -m pytest --ignore sktime/datasets upload_wheels: name: Upload wheels to PyPI @@ -140,7 +99,7 @@ jobs: needs: [build_wheels,test_unix_wheels,test_windows_wheels] steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: wheels path: wheelhouse diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3bda1e00104..6e256366205 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,9 +1,10 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-added-large-files - args: ["--maxkb=1000"] + args: + - --maxkb=1000 - id: check-case-conflict - id: check-merge-conflict - id: check-symlinks @@ -18,27 +19,26 @@ repos: - id: trailing-whitespace - repo: https://github.com/asottile/pyupgrade - rev: v3.10.1 + rev: v3.15.0 hooks: - id: pyupgrade args: - --py38-plus - repo: https://github.com/pycqa/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort name: isort - repo: https://github.com/psf/black - rev: 23.7.0 + rev: 23.12.1 hooks: - id: black language_version: python3 - # args: [--line-length 79] - repo: https://github.com/pycqa/flake8 - rev: 6.1.0 + rev: 7.0.0 hooks: - id: flake8 exclude: docs/conf.py @@ -51,28 +51,30 @@ repos: stages: [manual] - repo: https://github.com/nbQA-dev/nbQA - rev: 1.7.0 + rev: 1.7.1 hooks: - id: nbqa-black - args: [--nbqa-mutate, --nbqa-dont-skip-bad-cells] - additional_dependencies: [black==22.3.0] + args: + - --nbqa-dont-skip-bad-cells - id: nbqa-isort - args: [--nbqa-mutate, --nbqa-dont-skip-bad-cells] - additional_dependencies: [isort==5.6.4] + args: + - --nbqa-dont-skip-bad-cells - id: nbqa-flake8 - args: [--nbqa-dont-skip-bad-cells, "--extend-ignore=E402,E203"] - additional_dependencies: [flake8==3.8.3] + args: + - --nbqa-dont-skip-bad-cells + - --extend-ignore=E402,E203 - repo: https://github.com/pycqa/pydocstyle rev: 6.3.0 hooks: - id: pydocstyle - args: ["--config=setup.cfg"] + args: + - --config=setup.cfg # We use the Python version instead of the original version which seems to require Docker # https://github.com/koalaman/shellcheck-precommit - repo: https://github.com/shellcheck-py/shellcheck-py - rev: v0.9.0.5 + rev: v0.9.0.6 hooks: - id: shellcheck name: shellcheck diff --git a/CODEOWNERS b/CODEOWNERS index 0e2122e79af..720a0e57dd3 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,89 +1,14 @@ -# The file lists sktime's algorithm maintainers as specified in GOVERNANCE.md. -# Each line is a file pattern followed by one or more owners. +# The file specifies framework level core developers for automated review requests +# +# Note: historically, this file has been used to maintain a list of +# algorithm maintainers as specified in GOVERNANCE.md. +# This is no longer the case, algorithm maintainers are now +# specified directly in the estimator, +# in the "maintainers" tag of the respective scikit-base object. +# +# Algorithm maintainers are programmatically queriable +# via Estimator.get_class_tag("maintainers"). +# Further lookup such as "which algorithms does M maintain" +# can be carried out using registry.all_estimators * @achieveordie @benheid @fkiraly @yarnabrina - -sktime/annotation/hmm_learn/ @miraep8 -sktime/annotation/clasp.py @patrickzib @ermshaua -sktime/annotation/eagglo.py @KatieBuc -sktime/annotation/stray.py @KatieBuc - -sktime/classification/dictionary_based/_boss.py @patrickzib @MatthewMiddlehurst -sktime/classification/dictionary_based/_cboss.py @patrickzib @MatthewMiddlehurst -sktime/classification/dictionary_based/_muse.py @patrickzib @MatthewMiddlehurst -sktime/classification/dictionary_based/_tde.py @patrickzib @MatthewMiddlehurst -sktime/classification/dictionary_based/_weasel.py @patrickzib @MatthewMiddlehurst -sktime/classification/distance_based/ @goastler -sktime/classification/dummy/ @ZiyaoWei -sktime/classification/early_classification/_probability_threshold.py @MatthewMiddlehurst -sktime/classification/early_classification/_teaser.py @patrickzib @MatthewMiddlehurst -sktime/classification/feature_based/_catch22_classifier.py @MatthewMiddlehurst -sktime/classification/feature_based/_fresh_prince.py @MatthewMiddlehurst -sktime/classification/feature_based/_matrix_profile_classifier.py @MatthewMiddlehurst -sktime/classification/feature_based/_random_interval_classifier.py @MatthewMiddlehurst -sktime/classification/feature_based/_signature_classifier.py @jambo6 -sktime/classification/feature_based/_summary_classifier.py @MatthewMiddlehurst -sktime/classification/feature_based/_tsfresh_classifier.py @MatthewMiddlehurst -sktime/classification/hybrid/_hivecote_v1.py @MatthewMiddlehurst -sktime/classification/hybrid/_hivecote_v2.py @MatthewMiddlehurst -sktime/classification/interval_based/_cif.py @MatthewMiddlehurst -sktime/classification/interval_based/_drcif.py @MatthewMiddlehurst -sktime/classification/interval_based/_rise.py @MatthewMiddlehurst -sktime/classification/interval_based/_stsf.py @MatthewMiddlehurst -sktime/classification/interval_based/_tsf.py @MatthewMiddlehurst -sktime/classification/kernel_based/_arsenal.py @MatthewMiddlehurst -sktime/classification/kernel_based/_rocket_classifier.py @MatthewMiddlehurst -sktime/classification/shapelet_based/_stc.py @ABostrom @MatthewMiddlehurst -sktime/classification/sklearn/_continuous_interval_tree.py @MatthewMiddlehurst -sktime/classification/sklearn/_rotation_forest.py @MatthewMiddlehurst - -sktime/forecasting/adapters/_hcrystalball.py @MichalChromcak -sktime/forecasting/arch/_uarch.py @Vasudeva-bit -sktime/forecasting/arima.py @HYang1996 -sktime/forecasting/base/adapters/_statsforecast.py @FedericoGarza -sktime/forecasting/bats.py @aiwalter -sktime/forecasting/compose/_ensemble.py @aiwalter -sktime/forecasting/compose/_hierarchy_ensemble.py @VyomkeshVyas -sktime/forecasting/ets.py @HYang1996 -sktime/forecasting/fbprophet.py @aiwalter -sktime/forecasting/model_selection/_split @koralturkk -sktime/forecasting/online_learning/ @magittan -sktime/forecasting/sarimax.py @TNTran92 -sktime/forecasting/statsforecast.py @FedericoGarza -sktime/forecasting/structural.py @juanitorduz -sktime/forecasting/tests/test_ets.py @HYang1996 -sktime/forecasting/tbats.py @aiwalter - -sktime/regression/dummy/ @badrmarani - -sktime/transformations/panel/augmenter.py @MrPr3ntice @iljamaurer -sktime/transformations/panel/catch22.py @MatthewMiddlehurst -sktime/transformations/panel/catch22wrapper.py @MatthewMiddlehurst -sktime/transformations/panel/channel_selection.py @haskarb @a-pasos-ruiz -sktime/transformations/panel/dictionary_based/_paa.py @patrickzib @MatthewMiddlehurst -sktime/transformations/panel/dictionary_based/_sax.py @patrickzib @MatthewMiddlehurst -sktime/transformations/panel/dictionary_based/_sfa.py @patrickzib @MatthewMiddlehurst -sktime/transformations/panel/dictionary_based/_sfa_fast.py @patrickzib @MatthewMiddlehurst -sktime/transformations/panel/random_intervals.py @MatthewMiddlehurst -sktime/transformations/panel/rocket/ @angus924 -sktime/transformations/panel/rocket/_multirocket.py @ChangWeiTan @fstinner @angus924 -sktime/transformations/panel/rocket/_multirocket_multivariate.py @ChangWeiTan @fstinner @angus924 -sktime/transformations/panel/signature_based/ @jambo6 -sktime/transformations/panel/shapelet_transform.py @ABostrom @MatthewMiddlehurst -sktime/transformations/panel/supervised_intervals.py @MatthewMiddlehurst - -sktime/transformations/series/clear_sky.py @ciaran-g -sktime/transformations/series/clasp.py @patrickzib @ermshaua -sktime/transformations/series/date.py @danbartl @KishManani -sktime/transformations/series/difference.py @rnkuhns -sktime/transformations/series/exponent.py @rnkuhns -sktime/transformations/series/feature_selection.py @aiwalter -sktime/transformations/series/impute.py @aiwalter -sktime/transformations/series/kalman_filter.py @NoaBenAmi -sktime/transformations/series/outlier_detection.py @aiwalter -sktime/transformations/series/scaledlogit.py @ltsaprounis -sktime/transformations/series/time_since.py @KishManani -sktime/transformations/series/theta.py @GuzalBulatova -sktime/transformations/series/scaledasinh.py @ali-parizad - -sktime/utils/mlflow_sktime.py @benjaminbluhm diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 369a573ba47..5f938eba351 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -1,8 +1,12 @@ Contributors ============ +<!-- IMPORTANT - DO NOT MODIFY THIS FILE DIRECTLY. --> +<!-- This file is updated automatically from .all-contributorsrc --> +<!-- Please add your badges to .all-contibutorsrc --> + <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section --> -[![All Contributors](https://img.shields.io/badge/all_contributors-238-orange.svg)](#contributors) +[![All Contributors](https://img.shields.io/badge/all_contributors-252-orange.svg)](#contributors) <!-- ALL-CONTRIBUTORS-BADGE:END --> This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! @@ -54,11 +58,12 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/ayushmaan-seth-4a96364a/"><img src="https://avatars1.githubusercontent.com/u/29939762?v=4?s=100" width="100px;" alt="Ayushmaan Seth"/><br /><sub><b>Ayushmaan Seth</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Ayushmaanseth" title="Code">π»</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3AAyushmaanseth" title="Reviewed Pull Requests">π</a> <a href="https://github.com/sktime/sktime/commits?author=Ayushmaanseth" title="Tests">β οΈ</a> <a href="https://github.com/sktime/sktime/commits?author=Ayushmaanseth" title="Documentation">π</a> <a href="#eventOrganizing-Ayushmaanseth" title="Event Organizing">π</a> <a href="#tutorial-Ayushmaanseth" title="Tutorials">β </a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/BandaSaiTejaReddy"><img src="https://avatars0.githubusercontent.com/u/31387911?v=4?s=100" width="100px;" alt="BANDASAITEJAREDDY"/><br /><sub><b>BANDASAITEJAREDDY</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=BandaSaiTejaReddy" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=BandaSaiTejaReddy" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/badrmarani"><img src="https://avatars.githubusercontent.com/badrmarani?s=100" width="100px;" alt="Badr-Eddine Marani"/><br /><sub><b>Badr-Eddine Marani</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=badrmarani" title="Code">π»</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/sd2k"><img src="https://avatars.githubusercontent.com/u/5464991?&v=4?s=100" width="100px;" alt="Ben Sully"/><br /><sub><b>Ben Sully</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Asd2k" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=sd2k" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/benheid"><img src="?s=100" width="100px;" alt="Benedikt Heidrich"/><br /><sub><b>Benedikt Heidrich</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Abenheid" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=benheid" title="Code">π»</a> <a href="#design-benheid" title="Design">π¨</a> <a href="https://github.com/sktime/sktime/commits?author=benheid" title="Documentation">π</a> <a href="#example-benheid" title="Examples">π‘</a> <a href="#ideas-benheid" title="Ideas, Planning, & Feedback">π€</a> <a href="#mentoring-benheid" title="Mentoring">π§βπ«</a> <a href="#question-benheid" title="Answering Questions">π¬</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3Abenheid" title="Reviewed Pull Requests">π</a> <a href="#tutorial-benheid" title="Tutorials">β </a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/benjaminbluhm"><img src="?s=100" width="100px;" alt="Benjamin Bluhm"/><br /><sub><b>Benjamin Bluhm</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=benjaminbluhm" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=benjaminbluhm" title="Documentation">π</a> <a href="#example-benjaminbluhm" title="Examples">π‘</a></td> - <td align="center" valign="top" width="11.11%"><a href="https://haskarb.github.io/"><img src="https://avatars.githubusercontent.com/u/20501023?v=4?s=100" width="100px;" alt="Bhaskar Dhariyal"/><br /><sub><b>Bhaskar Dhariyal</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=haskarb" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=haskarb" title="Tests">β οΈ</a></td> </tr> <tr> + <td align="center" valign="top" width="11.11%"><a href="https://haskarb.github.io/"><img src="https://avatars.githubusercontent.com/u/20501023?v=4?s=100" width="100px;" alt="Bhaskar Dhariyal"/><br /><sub><b>Bhaskar Dhariyal</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=haskarb" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=haskarb" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/BINAYKUMAR943"><img src="https://avatars.githubusercontent.com/u/38756834?v=4?s=100" width="100px;" alt="Binay Kumar"/><br /><sub><b>Binay Kumar</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=BINAYKUMAR943" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=BINAYKUMAR943" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=BINAYKUMAR943" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://angelpone.github.io/"><img src="https://avatars.githubusercontent.com/u/32930283?v=4?s=100" width="100px;" alt="Bohan Zhang"/><br /><sub><b>Bohan Zhang</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=AngelPone" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/boukepostma"><img src="https://avatars.githubusercontent.com/boukepostma?s=100" width="100px;" alt="Bouke Postma"/><br /><sub><b>Bouke Postma</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=boukepostma" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Aboukepostma" title="Bug reports">π</a> <a href="#ideas-boukepostma" title="Ideas, Planning, & Feedback">π€</a></td> @@ -67,9 +72,9 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d <td align="center" valign="top" width="11.11%"><a href="https://github.com/vnmabus"><img src="https://avatars1.githubusercontent.com/u/2364173?v=4?s=100" width="100px;" alt="Carlos Ramos CarreΓ±o"/><br /><sub><b>Carlos Ramos CarreΓ±o</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=vnmabus" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/ChangWeiTan"><img src="https://avatars.githubusercontent.com/u/570744?v=4?s=100" width="100px;" alt="Chang Wei Tan"/><br /><sub><b>Chang Wei Tan</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ChangWeiTan" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="http://cheuk.dev"><img src="https://avatars1.githubusercontent.com/u/28761465?v=4?s=100" width="100px;" alt="Cheuk Ting Ho"/><br /><sub><b>Cheuk Ting Ho</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Cheukting" title="Code">π»</a></td> - <td align="center" valign="top" width="11.11%"><a href="https://github.com/ckastner"><img src="https://avatars.githubusercontent.com/u/15859947?v=4?s=100" width="100px;" alt="Christian Kastner"/><br /><sub><b>Christian Kastner</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ckastner" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Ackastner" title="Bug reports">π</a></td> </tr> <tr> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/ckastner"><img src="https://avatars.githubusercontent.com/u/15859947?v=4?s=100" width="100px;" alt="Christian Kastner"/><br /><sub><b>Christian Kastner</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ckastner" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Ackastner" title="Bug reports">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/hoesler/"><img src="https://avatars.githubusercontent.com/u/1052770?v=4?s=100" width="100px;" alt="Christoph HΓΆsler"/><br /><sub><b>Christoph HΓΆsler</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=hoesler" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/cdahlin"><img src="https://avatars.githubusercontent.com/u/1567780?v=4?s=100" width="100px;" alt="Christopher Dahlin"/><br /><sub><b>Christopher Dahlin</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=cdahlin" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/topher-lo"><img src="?s=100" width="100px;" alt="Christopher Lo"/><br /><sub><b>Christopher Lo</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=topher-lo" title="Code">π»</a> <a href="#ideas-topher-lo" title="Ideas, Planning, & Feedback">π€</a></td> @@ -78,20 +83,21 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d <td align="center" valign="top" width="11.11%"><a href="https://github.com/ClaudiaSanches"><img src="https://avatars3.githubusercontent.com/u/28742178?v=4?s=100" width="100px;" alt="ClaudiaSanches"/><br /><sub><b>ClaudiaSanches</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ClaudiaSanches" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=ClaudiaSanches" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/CTFallon"><img src="https://avatars.githubusercontent.com/u/19725980?v=4?s=100" width="100px;" alt="Colin Fallon"/><br /><sub><b>Colin Fallon</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=CTFallon" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://sites.google.com/view/corvinpaul/"><img src="https://lh3.googleusercontent.com/zMvwkuxyIsRN1I0-HLojbcbbHaERXa-b9eztZ23z_C2m7cXdMiU4z36ekS5-cgBmikPhZA=w1280?s=100" width="100px;" alt="Corvin Paul"/><br /><sub><b>Corvin Paul</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=corvusrabus" title="Documentation">π</a></td> - <td align="center" valign="top" width="11.11%"><a href="https://github.com/DBCerigo"><img src="https://avatars.githubusercontent.com/u/8318425?v=4?s=100" width="100px;" alt="Daniel Burkhardt Cerigo"/><br /><sub><b>Daniel Burkhardt Cerigo</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=DBCerigo" title="Code">π»</a></td> </tr> <tr> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/DBCerigo"><img src="https://avatars.githubusercontent.com/u/8318425?v=4?s=100" width="100px;" alt="Daniel Burkhardt Cerigo"/><br /><sub><b>Daniel Burkhardt Cerigo</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=DBCerigo" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/daniel-martin-martinez"><img src="https://avatars.githubusercontent.com/dainelli98?s=100" width="100px;" alt="Daniel MartΓn MartΓnez"/><br /><sub><b>Daniel MartΓn MartΓnez</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=dainelli98" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Adainelli98" title="Bug reports">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/dashapetr"><img src="https://avatars.githubusercontent.com/u/54349415?v=4?s=100" width="100px;" alt="Darya Petrashka"/><br /><sub><b>Darya Petrashka</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=dashapetr" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://dhirschfeld.github.io/"><img src="https://avatars1.githubusercontent.com/u/881019?v=4?s=100" width="100px;" alt="Dave Hirschfeld"/><br /><sub><b>Dave Hirschfeld</b></sub></a><br /><a href="#infra-dhirschfeld" title="Infrastructure (Hosting, Build-Tools, etc)">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/davidbp"><img src="https://avatars3.githubusercontent.com/u/4223580?v=4?s=100" width="100px;" alt="David Buchaca Prats"/><br /><sub><b>David Buchaca Prats</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=davidbp" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/davidgilbertson"><img src="https://avatars.githubusercontent.com/u/4443482?v=4?s=100" width="100px;" alt="David Gilbertson"/><br /><sub><b>David Gilbertson</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=davidgilbertson" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Adavidgilbertson" title="Bug reports">π</a></td> <td align="center" valign="top" width="11.11%"><a href="http://www.uco.es/grupos/ayrna/index.php/es/publicaciones/articulos?publications_view_all=1&theses_view_all=0&projects_view_all=0&task=show&view=member&id=22"><img src="https://avatars1.githubusercontent.com/u/47889499?v=4?s=100" width="100px;" alt="David Guijo Rubio"/><br /><sub><b>David Guijo Rubio</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=dguijo" title="Code">π»</a> <a href="#ideas-dguijo" title="Ideas, Planning, & Feedback">π€</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/DManowitz"><img src="https://avatars.githubusercontent.com/u/66927103?v=4?s=100" width="100px;" alt="David Manowitz"/><br /><sub><b>David Manowitz</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3ADManowitz" title="Bug reports">π</a> <a href="#maintenance-DManowitz" title="Maintenance">π§</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/DmitriyValetov"><img src="https://avatars0.githubusercontent.com/u/27976850?v=4?s=100" width="100px;" alt="Dmitriy Valetov"/><br /><sub><b>Dmitriy Valetov</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=DmitriyValetov" title="Code">π»</a> <a href="#tutorial-DmitriyValetov" title="Tutorials">β </a></td> - <td align="center" valign="top" width="11.11%"><a href="https://github.com/dougollerenshaw"><img src="https://avatars.githubusercontent.com/u/19944442?v=4?s=100" width="100px;" alt="Doug Ollerenshaw"/><br /><sub><b>Doug Ollerenshaw</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=dougollerenshaw" title="Documentation">π</a></td> - <td align="center" valign="top" width="11.11%"><a href="https://github.com/Dbhasin1"><img src="https://avatars.githubusercontent.com/u/56479884?v=4?s=100" width="100px;" alt="Drishti Bhasin "/><br /><sub><b>Drishti Bhasin </b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Dbhasin1" title="Code">π»</a></td> </tr> <tr> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/dougollerenshaw"><img src="https://avatars.githubusercontent.com/u/19944442?v=4?s=100" width="100px;" alt="Doug Ollerenshaw"/><br /><sub><b>Doug Ollerenshaw</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=dougollerenshaw" title="Documentation">π</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/Dbhasin1"><img src="https://avatars.githubusercontent.com/u/56479884?v=4?s=100" width="100px;" alt="Drishti Bhasin "/><br /><sub><b>Drishti Bhasin </b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Dbhasin1" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/dsherry"><img src="https://avatars.githubusercontent.com/dsherry?s=100" width="100px;" alt="Dylan Sherry"/><br /><sub><b>Dylan Sherry</b></sub></a><br /><a href="#infra-dsherry" title="Infrastructure (Hosting, Build-Tools, etc)">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Emiliathewolf"><img src="https://avatars2.githubusercontent.com/u/22026218?v=4?s=100" width="100px;" alt="Emilia Rose"/><br /><sub><b>Emilia Rose</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Emiliathewolf" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=Emiliathewolf" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/erjieyong"><img src="https://avatars.githubusercontent.com/u/109052378?v=4?s=100" width="100px;" alt="Er Jie Yong"/><br /><sub><b>Er Jie Yong</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Aerjieyong" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=erjieyong" title="Code">π»</a></td> @@ -99,153 +105,163 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d <td align="center" valign="top" width="11.11%"><a href="https://github.com/eyalshafran"><img src="https://avatars.githubusercontent.com/u/16999574?v=4?s=100" width="100px;" alt="Eyal Shafran"/><br /><sub><b>Eyal Shafran</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=eyalshafran" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/eyjo"><img src="https://avatars.githubusercontent.com/eyjo?s=100" width="100px;" alt="EyjΓ³lfur SigurΓ°sson"/><br /><sub><b>EyjΓ³lfur SigurΓ°sson</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=eyjo" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=eyjo" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/FedericoGarza"><img src="https://avatars.githubusercontent.com/u/10517170?v=4?s=100" width="100px;" alt="Federico Garza"/><br /><sub><b>Federico Garza</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=FedericoGarza" title="Code">π»</a> <a href="#example-FedericoGarza" title="Examples">π‘</a></td> - <td align="center" valign="top" width="11.11%"><a href="https://github.com/felipeangelimvieira"><img src="https://avatars.githubusercontent.com/felipeangelimvieira?s=100" width="100px;" alt="Felipe Angelim"/><br /><sub><b>Felipe Angelim</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=felipeangelimvieira" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Afelipeangelimvieira" title="Bug reports">π</a></td> - <td align="center" valign="top" width="11.11%"><a href="https://github.com/flix6x"><img src="https://avatars.githubusercontent.com/u/30658763?v=4?s=100" width="100px;" alt="Felix Claessen"/><br /><sub><b>Felix Claessen</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Flix6x" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=Flix6x" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=Flix6x" title="Tests">β οΈ</a> <a href="https://github.com/sktime/sktime/issues?q=author%3AFlix6x" title="Bug reports">π</a></td> </tr> <tr> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/felipeangelimvieira"><img src="https://avatars.githubusercontent.com/felipeangelimvieira?s=100" width="100px;" alt="Felipe Angelim"/><br /><sub><b>Felipe Angelim</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=felipeangelimvieira" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Afelipeangelimvieira" title="Bug reports">π</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/flix6x"><img src="https://avatars.githubusercontent.com/u/30658763?v=4?s=100" width="100px;" alt="Felix Claessen"/><br /><sub><b>Felix Claessen</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Flix6x" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=Flix6x" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=Flix6x" title="Tests">β οΈ</a> <a href="https://github.com/sktime/sktime/issues?q=author%3AFlix6x" title="Bug reports">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/fstinner"><img src="https://avatars.githubusercontent.com/u/11679462?v=4?s=100" width="100px;" alt="Florian Stinner"/><br /><sub><b>Florian Stinner</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=fstinner" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=fstinner" title="Tests">β οΈ</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/fspinna"><img src="https://avatars.githubusercontent.com/u/35352023?v=4?s=100" width="100px;" alt="Francesco Spinnato"/><br /><sub><b>Francesco Spinnato</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=fspinna" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/fkiraly"><img src="https://avatars1.githubusercontent.com/u/7985502?v=4?s=100" width="100px;" alt="Franz Kiraly"/><br /><sub><b>Franz Kiraly</b></sub></a><br /><a href="#blog-fkiraly" title="Blogposts">π</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Afkiraly" title="Bug reports">π</a> <a href="#business-fkiraly" title="Business development">πΌ</a> <a href="https://github.com/sktime/sktime/commits?author=fkiraly" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=fkiraly" title="Documentation">π</a> <a href="#design-fkiraly" title="Design">π¨</a> <a href="#eventOrganizing-fkiraly" title="Event Organizing">π</a> <a href="#example-fkiraly" title="Examples">π‘</a> <a href="#financial-fkiraly" title="Financial">π΅</a> <a href="#fundingFinding-fkiraly" title="Funding Finding">π</a> <a href="#ideas-fkiraly" title="Ideas, Planning, & Feedback">π€</a> <a href="#maintenance-fkiraly" title="Maintenance">π§</a> <a href="#mentoring-fkiraly" title="Mentoring">π§βπ«</a> <a href="#projectManagement-fkiraly" title="Project Management">π</a> <a href="#question-fkiraly" title="Answering Questions">π¬</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3Afkiraly" title="Reviewed Pull Requests">π</a> <a href="#talk-fkiraly" title="Talks">π’</a> <a href="https://github.com/sktime/sktime/commits?author=fkiraly" title="Tests">β οΈ</a> <a href="#tutorial-fkiraly" title="Tutorials">β </a> <a href="#video-fkiraly" title="Videos">πΉ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/freddyaboulton"><img src="https://avatars.githubusercontent.com/u/41651716?v=4?s=100" width="100px;" alt="Freddy A Boulton"/><br /><sub><b>Freddy A Boulton</b></sub></a><br /><a href="#infra-freddyaboulton" title="Infrastructure (Hosting, Build-Tools, etc)">π</a> <a href="https://github.com/sktime/sktime/commits?author=freddyaboulton" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/chernika158"><img src="https://avatars.githubusercontent.com/u/43787741?s=400&v=4?s=100" width="100px;" alt="Galina Chernikova"/><br /><sub><b>Galina Chernikova</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=chernika158" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/goastler"><img src="https://avatars0.githubusercontent.com/u/7059456?v=4?s=100" width="100px;" alt="George Oastler"/><br /><sub><b>George Oastler</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=goastler" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=goastler" title="Tests">β οΈ</a> <a href="#platform-goastler" title="Packaging/porting to new platform">π¦</a> <a href="#example-goastler" title="Examples">π‘</a> <a href="https://github.com/sktime/sktime/commits?author=goastler" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/geronimos"><img src="https://avatars.githubusercontent.com/u/29955288?s=96&v=4?s=100" width="100px;" alt="Geronimo Bergk"/><br /><sub><b>Geronimo Bergk</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Ageronimos" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=geronimos" title="Code">π»</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/gracewgao/"><img src="https://avatars0.githubusercontent.com/u/38268331?v=4?s=100" width="100px;" alt="Grace Gao"/><br /><sub><b>Grace Gao</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=gracewgao" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Agracewgao" title="Bug reports">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/GuzalBulatova"><img src="https://avatars.githubusercontent.com/GuzalBulatova?s=100" width="100px;" alt="Guzal Bulatova"/><br /><sub><b>Guzal Bulatova</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3AGuzalBulatova" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=GuzalBulatova" title="Code">π»</a> <a href="#eventOrganizing-GuzalBulatova" title="Event Organizing">π</a> <a href="#mentoring-GuzalBulatova" title="Mentoring">π§βπ«</a> <a href="#projectManagement-GuzalBulatova" title="Project Management">π</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3AGuzalBulatova" title="Reviewed Pull Requests">π</a> <a href="https://github.com/sktime/sktime/commits?author=GuzalBulatova" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/HYang1996"><img src="https://avatars0.githubusercontent.com/u/44179303?v=4?s=100" width="100px;" alt="HYang1996"/><br /><sub><b>HYang1996</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=HYang1996" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=HYang1996" title="Tests">β οΈ</a> <a href="https://github.com/sktime/sktime/commits?author=HYang1996" title="Documentation">π</a> <a href="#tutorial-HYang1996" title="Tutorials">β </a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/BensHamza"><img src="https://avatars.githubusercontent.com/u/96446862?v=4?s=100" width="100px;" alt="Hamza Benslimane"/><br /><sub><b>Hamza Benslimane</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3ABensHamza" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=BensHamza" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/hazrulakmal"><img src="https://avatars.githubusercontent.com/u/24774385?v=4?s=100" width="100px;" alt="Hazrul Akmal"/><br /><sub><b>Hazrul Akmal</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=hazrulakmal" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=hazrulakmal" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Ahazrulakmal" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=hazrulakmal" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/hliebert"><img src="https://avatars.githubusercontent.com/u/20834265?s=100" width="100px;" alt="Helge Liebert"/><br /><sub><b>Helge Liebert</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Ahliebert" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=hliebert" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/huayiwei/"><img src="https://avatars3.githubusercontent.com/u/22870735?v=4?s=100" width="100px;" alt="Huayi Wei"/><br /><sub><b>Huayi Wei</b></sub></a><br /><a href="#tutorial-huayicodes" title="Tutorials">β </a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Ifeanyi30"><img src="https://avatars.githubusercontent.com/u/49926145?v=4?s=100" width="100px;" alt="Ifeanyi30"/><br /><sub><b>Ifeanyi30</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Ifeanyi30" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/iljamaurer"><img src="https://avatars.githubusercontent.com/u/45882103?v=4?s=100" width="100px;" alt="Ilja Maurer"/><br /><sub><b>Ilja Maurer</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=iljamaurer" title="Code">π»</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/IlyasMoutawwakil"><img src="https://avatars.githubusercontent.com/IlyasMoutawwakil?s=100" width="100px;" alt="Ilyas Moutawwakil"/><br /><sub><b>Ilyas Moutawwakil</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=IlyasMoutawwakil" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=IlyasMoutawwakil" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/ireoluwatomiwa-sanusi/"><img src="https://avatars.githubusercontent.com/u/61966277?v=4?s=100" width="100px;" alt="Ireoluwatomiwa"/><br /><sub><b>Ireoluwatomiwa</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Tomiiwa" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/ishannangia001"><img src="https://avatars.githubusercontent.com/u/29480389?v=4?s=100" width="100px;" alt="Ishan Nangia"/><br /><sub><b>Ishan Nangia</b></sub></a><br /><a href="#ideas-ishannangia001" title="Ideas, Planning, & Feedback">π€</a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/jnrusson1"><img src="https://avatars.githubusercontent.com/u/51986332?v=4?s=100" width="100px;" alt="Jack Russon"/><br /><sub><b>Jack Russon</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=jnrusson1" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="http://www.timeseriesclassification.com/"><img src="https://avatars0.githubusercontent.com/u/44509982?v=4?s=100" width="100px;" alt="James Large"/><br /><sub><b>James Large</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=James-Large" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=James-Large" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=James-Large" title="Tests">β οΈ</a> <a href="#infra-James-Large" title="Infrastructure (Hosting, Build-Tools, etc)">π</a> <a href="#maintenance-James-Large" title="Maintenance">π§</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/jambo6"><img src="https://https://avatars.githubusercontent.com/jambo6?s=100" width="100px;" alt="James Morrill"/><br /><sub><b>James Morrill</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=jambo6" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/janpipek"><img src="https://avatars.githubusercontent.com/janpipek?s=100" width="100px;" alt="Jan Pipek"/><br /><sub><b>Jan Pipek</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=janpipek" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/jasmineliaw"><img src="?s=100" width="100px;" alt="Jasmine Liaw"/><br /><sub><b>Jasmine Liaw</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=jasmineliaw" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="http://www.timeseriesclassification.com"><img src="https://avatars1.githubusercontent.com/u/38794632?v=4?s=100" width="100px;" alt="Jason Lines"/><br /><sub><b>Jason Lines</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=jasonlines" title="Code">π»</a> <a href="#business-jasonlines" title="Business development">πΌ</a> <a href="https://github.com/sktime/sktime/commits?author=jasonlines" title="Documentation">π</a> <a href="#design-jasonlines" title="Design">π¨</a> <a href="#eventOrganizing-jasonlines" title="Event Organizing">π</a> <a href="#fundingFinding-jasonlines" title="Funding Finding">π</a> <a href="#ideas-jasonlines" title="Ideas, Planning, & Feedback">π€</a> <a href="#projectManagement-jasonlines" title="Project Management">π</a> <a href="#question-jasonlines" title="Answering Questions">π¬</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3Ajasonlines" title="Reviewed Pull Requests">π</a> <a href="#talk-jasonlines" title="Talks">π’</a> <a href="#example-jasonlines" title="Examples">π‘</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/whackteachers"><img src="https://avatars0.githubusercontent.com/u/33785383?v=4?s=100" width="100px;" alt="Jason Pong"/><br /><sub><b>Jason Pong</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=whackteachers" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=whackteachers" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/mateuja"><img src="https://avatars.githubusercontent.com/mateuja?s=100" width="100px;" alt="Jaume Mateu"/><br /><sub><b>Jaume Mateu</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=mateuja" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/pirnerjonas"><img src="https://avatars.githubusercontent.com/u/48887249?v=4?s=100" width="100px;" alt="Jonas Pirner"/><br /><sub><b>Jonas Pirner</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=pirnerjonas" title="Documentation">π</a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/JonathanBechtel"><img src="https://avatars.githubusercontent.com/u/481696?v=4?s=100" width="100px;" alt="JonathanBechtel"/><br /><sub><b>JonathanBechtel</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=JonathanBechtel" title="Code">π»</a> <a href="#ideas-JonathanBechtel" title="Ideas, Planning, & Feedback">π€</a> <a href="https://github.com/sktime/sktime/commits?author=JonathanBechtel" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/jorenham"><img src="?s=100" width="100px;" alt="Joren Hammudoglu"/><br /><sub><b>Joren Hammudoglu</b></sub></a><br /><a href="#infra-jorenham" title="Infrastructure (Hosting, Build-Tools, etc)">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://juanitorduz.github.io/"><img src="https://avatars1.githubusercontent.com/u/22996444?v=4?s=100" width="100px;" alt="Juan Orduz"/><br /><sub><b>Juan Orduz</b></sub></a><br /><a href="#tutorial-juanitorduz" title="Tutorials">β </a> <a href="https://github.com/sktime/sktime/commits?author=juanitorduz" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/julia-kraus"><img src="https://avatars.githubusercontent.com/julia-kraus?s=100" width="100px;" alt="Julia Kraus"/><br /><sub><b>Julia Kraus</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=julia-kraus" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=julia-kraus" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=julia-kraus" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/jelc53"><img src="?s=100" width="100px;" alt="Julian Cooper"/><br /><sub><b>Julian Cooper</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=jelc53" title="Code">π»</a> <a href="#ideas-jelc53" title="Ideas, Planning, & Feedback">π€</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/julnow"><img src="https://avatars.githubusercontent.com/u/21206185?v=4?s=100" width="100px;" alt="Julian Nowak"/><br /><sub><b>Julian Nowak</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Ajulnow" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=julnow" title="Code">π»</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/julianarn/"><img src="https://avatars.githubusercontent.com/u/19613567?v=4?s=100" width="100px;" alt="Juliana"/><br /><sub><b>Juliana</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=julramos" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.justinshenk.com/"><img src="https://avatars.githubusercontent.com/u/10270308?v=4?s=100" width="100px;" alt="Justin Shenk"/><br /><sub><b>Justin Shenk</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=justinshenk" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/kcc-lion"><img src="?s=100" width="100px;" alt="Kai Lion"/><br /><sub><b>Kai Lion</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=kcc-lion" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=kcc-lion" title="Tests">β οΈ</a> <a href="https://github.com/sktime/sktime/commits?author=kcc-lion" title="Documentation">π</a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/kanand77"><img src="https://avatars.githubusercontent.com/kanand77?s=100" width="100px;" alt="Kavin Anand"/><br /><sub><b>Kavin Anand</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=kanand77" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://kejsitake.com/"><img src="https://avatars.githubusercontent.com/u/23707808?v=4?s=100" width="100px;" alt="Kejsi Take"/><br /><sub><b>Kejsi Take</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=kejsitake" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/kevinlam2"><img src="https://avatars.githubusercontent.com/u/114420932?s=400&v=4?s=100" width="100px;" alt="Kevin Lam"/><br /><sub><b>Kevin Lam</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=klam-data" title="Code">π»</a> <a href="#example-klam-data" title="Examples">π‘</a> <a href="https://github.com/sktime/sktime/commits?author=klam-data" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://whitakerlab.github.io/"><img src="https://avatars1.githubusercontent.com/u/3626306?v=4?s=100" width="100px;" alt="Kirstie Whitaker"/><br /><sub><b>Kirstie Whitaker</b></sub></a><br /><a href="#ideas-KirstieJane" title="Ideas, Planning, & Feedback">π€</a> <a href="#fundingFinding-KirstieJane" title="Funding Finding">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/kishmanani"><img src="https://avatars.githubusercontent.com/u/30973056?v=4?s=100" width="100px;" alt="Kishan Manani"/><br /><sub><b>Kishan Manani</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=KishManani" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=KishManani" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=KishManani" title="Tests">β οΈ</a> <a href="https://github.com/sktime/sktime/issues?q=author%3AKishManani" title="Bug reports">π</a> <a href="#ideas-KishManani" title="Ideas, Planning, & Feedback">π€</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/krumeto"><img src="https://avatars3.githubusercontent.com/u/11272436?v=4?s=100" width="100px;" alt="Krum Arnaudov"/><br /><sub><b>Krum Arnaudov</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Akrumeto" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=krumeto" title="Code">π»</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/koralturkk"><img src="https://avatars2.githubusercontent.com/u/18037789?s=460&v=4?s=100" width="100px;" alt="Kutay Koralturk"/><br /><sub><b>Kutay Koralturk</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=koralturkk" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Akoralturkk" title="Bug reports">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/ltsaprounis"><img src="https://avatars.githubusercontent.com/u/64217214?v=4?s=100" width="100px;" alt="Leonidas Tsaprounis"/><br /><sub><b>Leonidas Tsaprounis</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ltsaprounis" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Altsaprounis" title="Bug reports">π</a> <a href="#mentoring-ltsaprounis" title="Mentoring">π§βπ«</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3Altsaprounis" title="Reviewed Pull Requests">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/lielleravid"><img src="https://avatars.githubusercontent.com/u/37774194?v=4?s=100" width="100px;" alt="Lielle Ravid"/><br /><sub><b>Lielle Ravid</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=lielleravid" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=lielleravid" title="Documentation">π</a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/chillerobscuro"><img src="https://avatars.githubusercontent.com/u/5232872?v=4?s=100" width="100px;" alt="Logan Duffy"/><br /><sub><b>Logan Duffy</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=chillerobscuro" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=chillerobscuro" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=chillerobscuro" title="Tests">β οΈ</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Achillerobscuro" title="Bug reports">π</a> <a href="#ideas-chillerobscuro" title="Ideas, Planning, & Feedback">π€</a></td> <td align="center" valign="top" width="11.11%"><a href="http://lpantano.github.io/"><img src="https://avatars2.githubusercontent.com/u/1621788?v=4?s=100" width="100px;" alt="Lorena Pantano"/><br /><sub><b>Lorena Pantano</b></sub></a><br /><a href="#ideas-lpantano" title="Ideas, Planning, & Feedback">π€</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/ltoniazzi"><img src="https://avatars.githubusercontent.com/u/61414566?s=100" width="100px;" alt="Lorenzo Toniazzi"/><br /><sub><b>Lorenzo Toniazzi</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ltoniazzi" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Lovkush-A"><img src="https://avatars.githubusercontent.com/u/25344832?v=4?s=100" width="100px;" alt="Lovkush"/><br /><sub><b>Lovkush</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Lovkush-A" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=Lovkush-A" title="Tests">β οΈ</a> <a href="#ideas-Lovkush-A" title="Ideas, Planning, & Feedback">π€</a> <a href="#mentoring-Lovkush-A" title="Mentoring">π§βπ«</a> <a href="#projectManagement-Lovkush-A" title="Project Management">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/luca-miniati"><img src="https://avatars.githubusercontent.com/u/87467600?v=4?s=100" width="100px;" alt="Luca Miniati"/><br /><sub><b>Luca Miniati</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=luca-miniati" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=luca-miniati" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/lbventura"><img src="https://avatars.githubusercontent.com/u/68004282?s=96&v=4?s=100" width="100px;" alt="Luis Ventura"/><br /><sub><b>Luis Ventura</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=lbventura" title="Code">π»</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/luiszugasti"><img src="https://avatars.githubusercontent.com/u/11198457?s=460&u=0645b72683e491824aca16db9702f1d3eb990389&v=4?s=100" width="100px;" alt="Luis Zugasti"/><br /><sub><b>Luis Zugasti</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=luiszugasti" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/lmmentel"><img src="https://avatars.githubusercontent.com/u/8989838?v=4?s=100" width="100px;" alt="Lukasz Mentel"/><br /><sub><b>Lukasz Mentel</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=lmmentel" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=lmmentel" title="Documentation">π</a> <a href="#infra-lmmentel" title="Infrastructure (Hosting, Build-Tools, etc)">π</a> <a href="https://github.com/sktime/sktime/commits?author=lmmentel" title="Tests">β οΈ</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Almmentel" title="Bug reports">π</a> <a href="#maintenance-lmmentel" title="Maintenance">π§</a> <a href="#mentoring-lmmentel" title="Mentoring">π§βπ«</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/marrov"><img src="https://avatars.githubusercontent.com/u/54272586?v=4?s=100" width="100px;" alt="Marc Rovira"/><br /><sub><b>Marc Rovira</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=marrov" title="Documentation">π</a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="http://marcelotryle.com"><img src="https://avatars3.githubusercontent.com/u/7353520?v=4?s=100" width="100px;" alt="Marcelo Trylesinski"/><br /><sub><b>Marcelo Trylesinski</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Kludex" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/MarcoGorelli"><img src="https://avatars2.githubusercontent.com/u/33491632?v=4?s=100" width="100px;" alt="Marco Gorelli"/><br /><sub><b>Marco Gorelli</b></sub></a><br /><a href="#infra-MarcoGorelli" title="Infrastructure (Hosting, Build-Tools, etc)">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/margaret-gorlin/"><img src="?s=100" width="100px;" alt="Margaret Gorlin"/><br /><sub><b>Margaret Gorlin</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=mgorlin" title="Code">π»</a> <a href="#example-mgorlin" title="Examples">π‘</a> <a href="https://github.com/sktime/sktime/commits?author=mgorlin" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/mariamjabara"><img src="?s=100" width="100px;" alt="Mariam Jabara"/><br /><sub><b>Mariam Jabara</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=mariamjabara" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://twitter.com/marielli"><img src="https://avatars2.githubusercontent.com/u/13499809?v=4?s=100" width="100px;" alt="Marielle"/><br /><sub><b>Marielle</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=marielledado" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=marielledado" title="Code">π»</a> <a href="#ideas-marielledado" title="Ideas, Planning, & Feedback">π€</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/mloning"><img src="https://avatars3.githubusercontent.com/u/21020482?v=4?s=100" width="100px;" alt="Markus LΓΆning"/><br /><sub><b>Markus LΓΆning</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=mloning" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=mloning" title="Tests">β οΈ</a> <a href="#maintenance-mloning" title="Maintenance">π§</a> <a href="#platform-mloning" title="Packaging/porting to new platform">π¦</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3Amloning" title="Reviewed Pull Requests">π</a> <a href="#infra-mloning" title="Infrastructure (Hosting, Build-Tools, etc)">π</a> <a href="#example-mloning" title="Examples">π‘</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Amloning" title="Bug reports">π</a> <a href="#tutorial-mloning" title="Tutorials">β </a> <a href="#business-mloning" title="Business development">πΌ</a> <a href="https://github.com/sktime/sktime/commits?author=mloning" title="Documentation">π</a> <a href="#design-mloning" title="Design">π¨</a> <a href="#eventOrganizing-mloning" title="Event Organizing">π</a> <a href="#fundingFinding-mloning" title="Funding Finding">π</a> <a href="#ideas-mloning" title="Ideas, Planning, & Feedback">π€</a> <a href="#projectManagement-mloning" title="Project Management">π</a> <a href="#question-mloning" title="Answering Questions">π¬</a> <a href="#talk-mloning" title="Talks">π’</a> <a href="#mentoring-mloning" title="Mentoring">π§βπ«</a> <a href="#video-mloning" title="Videos">πΉ</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/martin-walter-1a33b3114/"><img src="https://avatars0.githubusercontent.com/u/29627036?v=4?s=100" width="100px;" alt="Martin Walter"/><br /><sub><b>Martin Walter</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=aiwalter" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Aaiwalter" title="Bug reports">π</a> <a href="#projectManagement-aiwalter" title="Project Management">π</a> <a href="#fundingFinding-aiwalter" title="Funding Finding">π</a> <a href="#mentoring-aiwalter" title="Mentoring">π§βπ«</a> <a href="#ideas-aiwalter" title="Ideas, Planning, & Feedback">π€</a> <a href="#design-aiwalter" title="Design">π¨</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3Aaiwalter" title="Reviewed Pull Requests">π</a> <a href="https://github.com/sktime/sktime/commits?author=aiwalter" title="Documentation">π</a> <a href="#talk-aiwalter" title="Talks">π’</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/martinagvilas"><img src="https://avatars2.githubusercontent.com/u/37339384?v=4?s=100" width="100px;" alt="Martina G. Vilas"/><br /><sub><b>Martina G. Vilas</b></sub></a><br /><a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3Amartinagvilas" title="Reviewed Pull Requests">π</a> <a href="#ideas-martinagvilas" title="Ideas, Planning, & Feedback">π€</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/MCRE-BE"><img src="https://avatars.githubusercontent.com/u/99316631?s=100" width="100px;" alt="Mathias Creemers"/><br /><sub><b>Mathias Creemers</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3AMCRE-BE" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=MCRE-BE" title="Code">π»</a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="http://www.timeseriesclassification.com"><img src="https://avatars0.githubusercontent.com/u/25731235?v=4?s=100" width="100px;" alt="Matthew Middlehurst"/><br /><sub><b>Matthew Middlehurst</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=MatthewMiddlehurst" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=MatthewMiddlehurst" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=MatthewMiddlehurst" title="Tests">β οΈ</a> <a href="#tutorial-MatthewMiddlehurst" title="Tutorials">β </a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3AMatthewMiddlehurst" title="Reviewed Pull Requests">π</a> <a href="https://github.com/sktime/sktime/issues?q=author%3AMatthewMiddlehurst" title="Bug reports">π</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/tvdboom"><img src="https://avatars.githubusercontent.com/u/32366550?v=4?s=100" width="100px;" alt="Mavs"/><br /><sub><b>Mavs</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=tvdboom" title="Code">π»</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/maxfrohlich/"><img src="https://avatars.githubusercontent.com/u/16393653?v=4?s=100" width="100px;" alt="Max Frohlich"/><br /><sub><b>Max Frohlich</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ninedigits" title="Code">π»</a> <a href="#ideas-ninedigits" title="Ideas, Planning, & Feedback">π€</a> <a href="#maintenance-ninedigits" title="Maintenance">π§</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/solen0id"><img src="https://avatars.githubusercontent.com/u/20767606?v=4?s=100" width="100px;" alt="Max Patzelt"/><br /><sub><b>Max Patzelt</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=solen0id" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Hephaest"><img src="https://avatars2.githubusercontent.com/u/37981444?v=4?s=100" width="100px;" alt="Miao Cai"/><br /><sub><b>Miao Cai</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3AHephaest" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=Hephaest" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="michaelfeil.eu"><img src="https://avatars.githubusercontent.com/u/63565275?v=4?s=100" width="100px;" alt="Michael Feil"/><br /><sub><b>Michael Feil</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=michaelfeil" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=michaelfeil" title="Tests">β οΈ</a> <a href="#ideas-michaelfeil" title="Ideas, Planning, & Feedback">π€</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/mgazian000"><img src="https://avatars.githubusercontent.com/mgazian000?s=100" width="100px;" alt="Michael Gaziani"/><br /><sub><b>Michael Gaziani</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=mgazian000" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/MichalChromcak"><img src="https://avatars1.githubusercontent.com/u/12393430?v=4?s=100" width="100px;" alt="Michal Chromcak"/><br /><sub><b>Michal Chromcak</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=MichalChromcak" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=MichalChromcak" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=MichalChromcak" title="Tests">β οΈ</a> <a href="#tutorial-MichalChromcak" title="Tutorials">β </a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/miraep8"><img src="https://avatars.githubusercontent.com/u/10511777?s=400&u=10a774fd4be767fa3b23a82a98bbfe102c17f0f3&v=4?s=100" width="100px;" alt="Mirae Parker"/><br /><sub><b>Mirae Parker</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=miraep8" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=miraep8" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/MBristle"><img src="https://avatars.githubusercontent.com/MBristle?s=100" width="100px;" alt="Mirko Bristle"/><br /><sub><b>Mirko Bristle</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=MBristle" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://mo-saif.github.io/"><img src="https://avatars0.githubusercontent.com/u/27867617?v=4?s=100" width="100px;" alt="Mohammed Saif Kazamel"/><br /><sub><b>Mohammed Saif Kazamel</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3AMo-Saif" title="Bug reports">π</a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="https://moradabaz.github.io/"><img src="https://avatars.githubusercontent.com/u/29915156?v=4?s=100" width="100px;" alt="Morad :)"/><br /><sub><b>Morad :)</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=moradabaz" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=moradabaz" title="Tests">β οΈ</a> <a href="https://github.com/sktime/sktime/commits?author=moradabaz" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Multivin12"><img src="https://avatars3.githubusercontent.com/u/36476633?v=4?s=100" width="100px;" alt="Multivin12"/><br /><sub><b>Multivin12</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Multivin12" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=Multivin12" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/marcio55afr"><img src="https://avatars.githubusercontent.com/u/42646282?v=4?s=100" width="100px;" alt="MΓ‘rcio A. Freitas Jr"/><br /><sub><b>MΓ‘rcio A. Freitas Jr</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=marcio55afr" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/niekvanderlaan"><img src="https://avatars.githubusercontent.com/u/9962825?v=4?s=100" width="100px;" alt="Niek van der Laan"/><br /><sub><b>Niek van der Laan</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=niekvanderlaan" title="Code">π»</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/ngupta23"><img src="https://avatars0.githubusercontent.com/u/33585645?v=4?s=100" width="100px;" alt="Nikhil Gupta"/><br /><sub><b>Nikhil Gupta</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ngupta23" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Angupta23" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=ngupta23" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/nshahpazov/"><img src="https://avatars.githubusercontent.com/nshahpazov?s=100" width="100px;" alt="Nikola Shahpazov"/><br /><sub><b>Nikola Shahpazov</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=nshahpazov" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/nilesh05apr"><img src="https://avatars.githubusercontent.com/u/65773314?v=4?s=100" width="100px;" alt="Nilesh Kumar"/><br /><sub><b>Nilesh Kumar</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=nilesh05apr" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/ninfueng"><img src="https://avatars2.githubusercontent.com/u/28499769?v=4?s=100" width="100px;" alt="Ninnart Fuengfusin"/><br /><sub><b>Ninnart Fuengfusin</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ninfueng" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/NoaBenAmi"><img src="https://avatars.githubusercontent.com/u/37590002?v=4?s=100" width="100px;" alt="Noa Ben Ami"/><br /><sub><b>Noa Ben Ami</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=NoaBenAmi" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=NoaBenAmi" title="Tests">β οΈ</a> <a href="https://github.com/sktime/sktime/commits?author=NoaBenAmi" title="Documentation">π</a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/shchur"><img src="https://avatars.githubusercontent.com/u/6944857?v=4?s=100" width="100px;" alt="Oleksandr Shchur"/><br /><sub><b>Oleksandr Shchur</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Ashchur" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=shchur" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/kachayev"><img src="https://avatars.githubusercontent.com/u/485647?v=4?s=100" width="100px;" alt="Oleksii Kachaiev"/><br /><sub><b>Oleksii Kachaiev</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=kachayev" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=kachayev" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/olivermatthews"><img src="https://avatars.githubusercontent.com/u/31141490?v=4?s=100" width="100px;" alt="Oliver Matthews"/><br /><sub><b>Oliver Matthews</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=OliverMatthews" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/prockenschaub"><img src="https://avatars0.githubusercontent.com/u/15381732?v=4?s=100" width="100px;" alt="Patrick Rockenschaub"/><br /><sub><b>Patrick Rockenschaub</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=prockenschaub" title="Code">π»</a> <a href="#design-prockenschaub" title="Design">π¨</a> <a href="#ideas-prockenschaub" title="Ideas, Planning, & Feedback">π€</a> <a href="https://github.com/sktime/sktime/commits?author=prockenschaub" title="Tests">β οΈ</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="http://www2.informatik.hu-berlin.de/~schaefpa/"><img src="https://avatars0.githubusercontent.com/u/7783034?v=4?s=100" width="100px;" alt="Patrick SchΓ€fer"/><br /><sub><b>Patrick SchΓ€fer</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=patrickzib" title="Code">π»</a> <a href="#tutorial-patrickzib" title="Tutorials">β </a></td> <td align="center" valign="top" width="11.11%"><a href="https://ber.gp"><img src="https://avatars1.githubusercontent.com/u/9824244?v=4?s=100" width="100px;" alt="Paul"/><br /><sub><b>Paul</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Pangoraw" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/paulyim97/"><img src="https://avatars.githubusercontent.com/pyyim?s=100" width="100px;" alt="Paul Yim"/><br /><sub><b>Paul Yim</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=pyyim" title="Code">π»</a> <a href="#example-pyyim" title="Examples">π‘</a> <a href="https://github.com/sktime/sktime/commits?author=pyyim" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.imes.uni-hannover.de/de/institut/team/m-sc-karl-philipp-kortmann/"><img src="https://avatars.githubusercontent.com/u/20466981?v=4?s=100" width="100px;" alt="Philipp Kortmann"/><br /><sub><b>Philipp Kortmann</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=MrPr3ntice" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=MrPr3ntice" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Piyush1729"><img src="https://avatars2.githubusercontent.com/u/64950012?v=4?s=100" width="100px;" alt="Piyush Gade"/><br /><sub><b>Piyush Gade</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Piyush1729" title="Code">π»</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3APiyush1729" title="Reviewed Pull Requests">π</a></td> - </tr> - <tr> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/rahulporuri"><img src="https://avatars.githubusercontent.com/u/1926457?v=4?s=100" width="100px;" alt="Poruri Sai Rahul"/><br /><sub><b>Poruri Sai Rahul</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=rahulporuri" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/pranav-prajapati-a5b413226/"><img src="https://avatars.githubusercontent.com/u/94780581?v=4?s=100" width="100px;" alt="Pranav Prajapati"/><br /><sub><b>Pranav Prajapati</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=pranavvp16" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=pranavvp16" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/pul95"><img src="https://avatars.githubusercontent.com/pul95?s=100" width="100px;" alt="Pulkit Verma"/><br /><sub><b>Pulkit Verma</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=pul95" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Quaterion"><img src="https://avatars2.githubusercontent.com/u/23200273?v=4?s=100" width="100px;" alt="Quaterion"/><br /><sub><b>Quaterion</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3AQuaterion" title="Bug reports">π</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/rakshitha123"><img src="https://avatars.githubusercontent.com/u/7654679?v=4?s=100" width="100px;" alt="Rakshitha Godahewa"/><br /><sub><b>Rakshitha Godahewa</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=rakshitha123" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=rakshitha123" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Ram0nB"><img src="https://avatars.githubusercontent.com/u/45173421?s=100" width="100px;" alt="Ramon Bussing"/><br /><sub><b>Ramon Bussing</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Ram0nB" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=Ram0nB" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/RavenRudi"><img src="https://avatars.githubusercontent.com/u/46402968?v=4?s=100" width="100px;" alt="RavenRudi"/><br /><sub><b>RavenRudi</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=RavenRudi" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/wolph"><img src="?s=100" width="100px;" alt="Rick van Hattem"/><br /><sub><b>Rick van Hattem</b></sub></a><br /><a href="#infra-wolph" title="Infrastructure (Hosting, Build-Tools, etc)">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Ris-Bali"><img src="https://avatars.githubusercontent.com/u/81592570?v=4?s=100" width="100px;" alt="Rishabh Bali"/><br /><sub><b>Rishabh Bali</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Ris-Bali" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/RishiKumarRay"><img src="https://avatars.githubusercontent.com/u/87641376?v=4?s=100" width="100px;" alt="Rishi Kumar Ray"/><br /><sub><b>Rishi Kumar Ray</b></sub></a><br /><a href="#infra-RishiKumarRay" title="Infrastructure (Hosting, Build-Tools, etc)">π</a></td> - </tr> - <tr> + <td align="center" valign="top" width="11.11%"><img src="https://avatars.githubusercontent.com/u/55790848?v=4?s=100" width="100px;" alt="Riya Elizabeth John"/><br /><sub><b>Riya Elizabeth John</b></sub><br /><a href="https://github.com/sktime/sktime/commits?author=Riyabelle25" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Riyabelle25"><img src="https://avatars.githubusercontent.com/u/55790848?v=4?s=100" width="100px;" alt="Riya Elizabeth John"/><br /><sub><b>Riya Elizabeth John</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Riyabelle25" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=Riyabelle25" title="Tests">β οΈ</a> <a href="https://github.com/sktime/sktime/commits?author=Riyabelle25" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/romanlutz/"><img src="https://avatars.githubusercontent.com/u/10245648?v=4?s=100" width="100px;" alt="Roman Lutz"/><br /><sub><b>Roman Lutz</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=romanlutz" title="Documentation">π</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/ronnie-llamado"><img src="https://avatars.githubusercontent.com/ronnie-llamado?s=100" width="100px;" alt="Ronnie Llamado"/><br /><sub><b>Ronnie Llamado</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ronnie-llamado" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/rnkuhns"><img src="https://avatars0.githubusercontent.com/u/26907244?v=4?s=100" width="100px;" alt="Ryan Kuhns"/><br /><sub><b>Ryan Kuhns</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=RNKuhns" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=RNKuhns" title="Documentation">π</a> <a href="#tutorial-RNKuhns" title="Tutorials">β </a> <a href="#example-RNKuhns" title="Examples">π‘</a> <a href="#ideas-RNKuhns" title="Ideas, Planning, & Feedback">π€</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3ARNKuhns" title="Reviewed Pull Requests">π</a> <a href="https://github.com/sktime/sktime/commits?author=RNKuhns" title="Tests">β οΈ</a></td> - <td align="center" valign="top" width="11.11%"><a href="https://github.com/achieveordie"><img src="https://avatars.githubusercontent.com/u/54197164?v=4?s=100" width="100px;" alt="Sagar Mishra"/><br /><sub><b>Sagar Mishra</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=achieveordie" title="Tests">β οΈ</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/achieveordie"><img src="https://avatars.githubusercontent.com/u/54197164?v=4?s=100" width="100px;" alt="Sagar Mishra"/><br /><sub><b>Sagar Mishra</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Aachieveordie" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=achieveordie" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=achieveordie" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://sajay.online"><img src="https://avatars2.githubusercontent.com/u/25329624?v=4?s=100" width="100px;" alt="Sajaysurya Ganesh"/><br /><sub><b>Sajaysurya Ganesh</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=sajaysurya" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=sajaysurya" title="Documentation">π</a> <a href="#design-sajaysurya" title="Design">π¨</a> <a href="#example-sajaysurya" title="Examples">π‘</a> <a href="#ideas-sajaysurya" title="Ideas, Planning, & Feedback">π€</a> <a href="https://github.com/sktime/sktime/commits?author=sajaysurya" title="Tests">β οΈ</a> <a href="#tutorial-sajaysurya" title="Tutorials">β </a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/SamiAlavi"><img src="https://avatars.githubusercontent.com/u/32700289?v=4?s=100" width="100px;" alt="Sami Alavi"/><br /><sub><b>Sami Alavi</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=SamiAlavi" title="Code">π»</a> <a href="#maintenance-SamiAlavi" title="Maintenance">π§</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/sssilvar"><img src="https://avatars.githubusercontent.com/u/16252054?v=4?s=100" width="100px;" alt="Santiago Smith Silva"/><br /><sub><b>Santiago Smith Silva</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=sssilvar" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Saransh-cpp"><img src="https://avatars.githubusercontent.com/u/74055102?v=4?s=100" width="100px;" alt="Saransh Chopra"/><br /><sub><b>Saransh Chopra</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Saransh-cpp" title="Documentation">π</a> <a href="#infra-Saransh-cpp" title="Infrastructure (Hosting, Build-Tools, etc)">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/satya-pattnaik-77a430144/"><img src="https://avatars.githubusercontent.com/u/22102468?v=4?s=100" width="100px;" alt="Satya Prakash Pattnaik"/><br /><sub><b>Satya Prakash Pattnaik</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=satya-pattnaik" title="Documentation">π</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/dasgupsa"><img src="https://avatars2.githubusercontent.com/u/10398956?v=4?s=100" width="100px;" alt="Saurabh Dasgupta"/><br /><sub><b>Saurabh Dasgupta</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=dasgupsa" title="Code">π»</a></td> </tr> <tr> - <td align="center" valign="top" width="11.11%"><a href="https://github.com/dasgupsa"><img src="https://avatars2.githubusercontent.com/u/10398956?v=4?s=100" width="100px;" alt="Saurabh Dasgupta"/><br /><sub><b>Saurabh Dasgupta</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=dasgupsa" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/SebasKoel"><img src="https://avatars3.githubusercontent.com/u/66252156?v=4?s=100" width="100px;" alt="Sebastiaan Koel"/><br /><sub><b>Sebastiaan Koel</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=SebasKoel" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=SebasKoel" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/shagn"><img src="https://avatars.githubusercontent.com/u/16029092?v=4?s=100" width="100px;" alt="Sebastian Hagn"/><br /><sub><b>Sebastian Hagn</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=shagn" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/ShivamPathak99"><img src="https://avatars.githubusercontent.com/u/98941325?s=400&v=4?s=100" width="100px;" alt="Shivam Pathak"/><br /><sub><b>Shivam Pathak</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ShivamPathak99" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/AurumnPegasus"><img src="https://avatars.githubusercontent.com/u/54315149?v=4?s=100" width="100px;" alt="Shivansh Subramanian"/><br /><sub><b>Shivansh Subramanian</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=AurumnPegasus" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=AurumnPegasus" title="Code">π»</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/sbuse"><img src="https://avatars.githubusercontent.com/u/24408707?v=4?s=100" width="100px;" alt="Simon B."/><br /><sub><b>Simon B.</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=sbuse" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/solomon-botchway-a1383821b/"><img src="https://avatars.githubusercontent.com/u/62394255?v=4?s=100" width="100px;" alt="Solomon Botchway"/><br /><sub><b>Solomon Botchway</b></sub></a><br /><a href="#maintenance-snnbotchway" title="Maintenance">π§</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/khrapovs"><img src="https://avatars.githubusercontent.com/u/3774663?v=4?s=100" width="100px;" alt="Stanislav Khrapov"/><br /><sub><b>Stanislav Khrapov</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=khrapovs" title="Code">π»</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/steenrotsman"><img src="https://avatars.githubusercontent.com/u/78110080?s=400&v=4?s=100" width="100px;" alt="Stijn J. Rotman"/><br /><sub><b>Stijn J. Rotman</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=steenrotsman" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=steenrotsman" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/SveaMeyer13"><img src="https://avatars.githubusercontent.com/u/46671894?v=4?s=100" width="100px;" alt="Svea Marie Meyer"/><br /><sub><b>Svea Marie Meyer</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=SveaMeyer13" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=SveaMeyer13" title="Code">π»</a></td> - <td align="center" valign="top" width="11.11%"><a href="https://github.com/TNTran92"><img src="https://avatars.githubusercontent.com/u/55965636?v=4?s=100" width="100px;" alt="TNTran92"/><br /><sub><b>TNTran92</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=TNTran92" title="Code">π»</a></td> </tr> <tr> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/TNTran92"><img src="https://avatars.githubusercontent.com/u/55965636?v=4?s=100" width="100px;" alt="TNTran92"/><br /><sub><b>TNTran92</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=TNTran92" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Taise228"><img src="https://avatars.githubusercontent.com/u/95762401?s=400&v=4?s=100" width="100px;" alt="Taisei Yamamoto"/><br /><sub><b>Taisei Yamamoto</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Taise228" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://thayeylolu.github.io/portfolio/"><img src="https://avatars.githubusercontent.com/u/13348874?v=4?s=100" width="100px;" alt="Taiwo Owoseni"/><br /><sub><b>Taiwo Owoseni</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=thayeylolu" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/lnthach"><img src="https://avatars0.githubusercontent.com/u/7788363?v=4?s=100" width="100px;" alt="Thach Le Nguyen"/><br /><sub><b>Thach Le Nguyen</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=lnthach" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=lnthach" title="Tests">β οΈ</a></td> @@ -254,53 +270,57 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d <td align="center" valign="top" width="11.11%"><a href="https://github.com/xxl4tomxu98"><img src="https://avatars.githubusercontent.com/u/62292177?s=40&v=4?s=100" width="100px;" alt="Tom Xu"/><br /><sub><b>Tom Xu</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=xxl4tomxu98" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=xxl4tomxu98" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/tpvasconcelos"><img src="https://avatars.githubusercontent.com/u/17701527?v=4?s=100" width="100px;" alt="Tomas P. de Vasconcelos"/><br /><sub><b>Tomas P. de Vasconcelos</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Atpvasconcelos" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=tpvasconcelos" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/tch"><img src="https://avatars3.githubusercontent.com/u/184076?v=4?s=100" width="100px;" alt="Tomasz Chodakowski"/><br /><sub><b>Tomasz Chodakowski</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=tch" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=tch" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Atch" title="Bug reports">π</a></td> - <td align="center" valign="top" width="11.11%"><a href="http://www.timeseriesclassification.com"><img src="https://avatars1.githubusercontent.com/u/9594042?v=4?s=100" width="100px;" alt="Tony Bagnall"/><br /><sub><b>Tony Bagnall</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=TonyBagnall" title="Code">π»</a> <a href="#business-TonyBagnall" title="Business development">πΌ</a> <a href="https://github.com/sktime/sktime/commits?author=TonyBagnall" title="Documentation">π</a> <a href="#design-TonyBagnall" title="Design">π¨</a> <a href="#eventOrganizing-TonyBagnall" title="Event Organizing">π</a> <a href="#fundingFinding-TonyBagnall" title="Funding Finding">π</a> <a href="#ideas-TonyBagnall" title="Ideas, Planning, & Feedback">π€</a> <a href="#projectManagement-TonyBagnall" title="Project Management">π</a> <a href="#question-TonyBagnall" title="Answering Questions">π¬</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3ATonyBagnall" title="Reviewed Pull Requests">π</a> <a href="#talk-TonyBagnall" title="Talks">π’</a> <a href="#data-TonyBagnall" title="Data">π£</a></td> </tr> <tr> + <td align="center" valign="top" width="11.11%"><a href="http://www.timeseriesclassification.com"><img src="https://avatars1.githubusercontent.com/u/9594042?v=4?s=100" width="100px;" alt="Tony Bagnall"/><br /><sub><b>Tony Bagnall</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=TonyBagnall" title="Code">π»</a> <a href="#business-TonyBagnall" title="Business development">πΌ</a> <a href="https://github.com/sktime/sktime/commits?author=TonyBagnall" title="Documentation">π</a> <a href="#design-TonyBagnall" title="Design">π¨</a> <a href="#eventOrganizing-TonyBagnall" title="Event Organizing">π</a> <a href="#fundingFinding-TonyBagnall" title="Funding Finding">π</a> <a href="#ideas-TonyBagnall" title="Ideas, Planning, & Feedback">π€</a> <a href="#projectManagement-TonyBagnall" title="Project Management">π</a> <a href="#question-TonyBagnall" title="Answering Questions">π¬</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3ATonyBagnall" title="Reviewed Pull Requests">π</a> <a href="#talk-TonyBagnall" title="Talks">π’</a> <a href="#data-TonyBagnall" title="Data">π£</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/utsavcoding"><img src="https://avatars3.githubusercontent.com/u/55446385?v=4?s=100" width="100px;" alt="Utsav Kumar Tiwari"/><br /><sub><b>Utsav Kumar Tiwari</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=utsavcoding" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=utsavcoding" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/Vasudeva-bit"><img src="https://avatars.githubusercontent.com/u/70791259?v=4?s=100" width="100px;" alt="Vasudeva Kilaru"/><br /><sub><b>Vasudeva Kilaru</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=Vasudeva-bit" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=Vasudeva-bit" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/victordremov"><img src="https://avatars.githubusercontent.com/u/32140716?s=100" width="100px;" alt="Viktor Dremov"/><br /><sub><b>Viktor Dremov</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=victordremov" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/ViktorKaz"><img src="https://avatars0.githubusercontent.com/u/33499138?v=4?s=100" width="100px;" alt="ViktorKaz"/><br /><sub><b>ViktorKaz</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ViktorKaz" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=ViktorKaz" title="Documentation">π</a> <a href="#design-ViktorKaz" title="Design">π¨</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/vnicholson1"><img src="?s=100" width="100px;" alt="Vincent Nicholson"/><br /><sub><b>Vincent Nicholson</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=vnicholson1" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/VyomkeshVyas"><img src="?s=100" width="100px;" alt="Vyomkesh Vyas"/><br /><sub><b>Vyomkesh Vyas</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=VyomkeshVyas" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=VyomkeshVyas" title="Documentation">π</a> <a href="#example-VyomkeshVyas" title="Examples">π‘</a> <a href="https://github.com/sktime/sktime/commits?author=VyomkeshVyas" title="Tests">β οΈ</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/wayneadams"><img src="https://avatars.githubusercontent.com/u/15034841?s=400&u=d717e9945910bcc844c5e64cd56d570c6cc4e8e6&v=4?s=100" width="100px;" alt="Wayne Adams"/><br /><sub><b>Wayne Adams</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=wayneadams" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/templierw/"><img src="https://github.com/templierw.png?s=100" width="100px;" alt="William Templier"/><br /><sub><b>William Templier</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=templierw" title="Documentation">π</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/magittan"><img src="https://avatars0.githubusercontent.com/u/14024202?v=4?s=100" width="100px;" alt="William Zheng"/><br /><sub><b>William Zheng</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=magittan" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=magittan" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/yairbeer"><img src="https://avatars.githubusercontent.com/yairbeer?s=100" width="100px;" alt="Yair Beer"/><br /><sub><b>Yair Beer</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=yairbeer" title="Code">π»</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://www.linkedin.com/in/yann-hallouard/"><img src="https://avatars.githubusercontent.com/YHallouard?s=100" width="100px;" alt="Yann Hallouard"/><br /><sub><b>Yann Hallouard</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=YHallouard" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=YHallouard" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/yashlamba"><img src="https://avatars.githubusercontent.com/u/44164398?v=4?s=100" width="100px;" alt="Yash Lamba"/><br /><sub><b>Yash Lamba</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=yashlamba" title="Code">π»</a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/xuyxu"><img src="https://avatars2.githubusercontent.com/u/22359569?v=4?s=100" width="100px;" alt="Yi-Xuan Xu"/><br /><sub><b>Yi-Xuan Xu</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=xuyxu" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=xuyxu" title="Tests">β οΈ</a> <a href="#maintenance-xuyxu" title="Maintenance">π§</a> <a href="https://github.com/sktime/sktime/commits?author=xuyxu" title="Documentation">π</a></td> + <td align="center" valign="top" width="11.11%"><a href="https://github.com/sz85512678"><img src="https://avatars.githubusercontent.com/sz85512678?s=100" width="100px;" alt="Zhen Shao"/><br /><sub><b>Zhen Shao</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=sz85512678" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/ZiyaoWei"><img src="https://avatars.githubusercontent.com/u/940823?v=4?s=100" width="100px;" alt="Ziyao Wei"/><br /><sub><b>Ziyao Wei</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=ZiyaoWei" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/aa25desh"><img src="https://avatars1.githubusercontent.com/u/29518290?v=4?s=100" width="100px;" alt="aa25desh"/><br /><sub><b>aa25desh</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=aa25desh" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Aaa25desh" title="Bug reports">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/abandus"><img src="https://avatars2.githubusercontent.com/u/46486474?v=4?s=100" width="100px;" alt="abandus"/><br /><sub><b>abandus</b></sub></a><br /><a href="#ideas-abandus" title="Ideas, Planning, & Feedback">π€</a> <a href="https://github.com/sktime/sktime/commits?author=abandus" title="Code">π»</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/adoherty21"><img src="https://avatars.githubusercontent.com/u/52799751?s=400&v=4?s=100" width="100px;" alt="adoherty21"/><br /><sub><b>adoherty21</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Aadoherty21" title="Bug reports">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/bethrice44"><img src="https://avatars.githubusercontent.com/u/11226988?v=4?s=100" width="100px;" alt="bethrice44"/><br /><sub><b>bethrice44</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Abethrice44" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=bethrice44" title="Code">π»</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3Abethrice44" title="Reviewed Pull Requests">π</a> <a href="https://github.com/sktime/sktime/commits?author=bethrice44" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/big-o"><img src="https://avatars1.githubusercontent.com/u/1134151?v=4?s=100" width="100px;" alt="big-o"/><br /><sub><b>big-o</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=big-o" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=big-o" title="Tests">β οΈ</a> <a href="#design-big-o" title="Design">π¨</a> <a href="#ideas-big-o" title="Ideas, Planning, & Feedback">π€</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3Abig-o" title="Reviewed Pull Requests">π</a> <a href="#tutorial-big-o" title="Tutorials">β </a> <a href="#mentoring-big-o" title="Mentoring">π§βπ«</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/bobbys-dev"><img src="https://avatars.githubusercontent.com/bobbys-dev?s=100" width="100px;" alt="bobbys"/><br /><sub><b>bobbys</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=bobbys-dev" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/brettkoonce"><img src="https://avatars2.githubusercontent.com/u/11281814?v=4?s=100" width="100px;" alt="brett koonce"/><br /><sub><b>brett koonce</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=brettkoonce" title="Documentation">π</a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/btrtts"><img src="https://avatars3.githubusercontent.com/u/66252156?v=4?s=100" width="100px;" alt="btrtts"/><br /><sub><b>btrtts</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=btrtts" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/chizzi25"><img src="https://avatars3.githubusercontent.com/u/67911243?v=4?s=100" width="100px;" alt="chizzi25"/><br /><sub><b>chizzi25</b></sub></a><br /><a href="#blog-chizzi25" title="Blogposts">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/chrisholder"><img src="https://avatars.githubusercontent.com/u/4674372?v=4?s=100" width="100px;" alt="chrisholder"/><br /><sub><b>chrisholder</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=chrisholder" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=chrisholder" title="Tests">β οΈ</a> <a href="https://github.com/sktime/sktime/commits?author=chrisholder" title="Documentation">π</a> <a href="#design-chrisholder" title="Design">π¨</a> <a href="#example-chrisholder" title="Examples">π‘</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/danbartl"><img src="https://avatars.githubusercontent.com/u/19947407?v=4?s=100" width="100px;" alt="danbartl"/><br /><sub><b>danbartl</b></sub></a><br /><a href="https://github.com/sktime/sktime/issues?q=author%3Adanbartl" title="Bug reports">π</a> <a href="https://github.com/sktime/sktime/commits?author=danbartl" title="Code">π»</a> <a href="https://github.com/sktime/sktime/pulls?q=is%3Apr+reviewed-by%3Adanbartl" title="Reviewed Pull Requests">π</a> <a href="#talk-danbartl" title="Talks">π’</a> <a href="https://github.com/sktime/sktime/commits?author=danbartl" title="Tests">β οΈ</a> <a href="#tutorial-danbartl" title="Tutorials">β </a> <a href="#video-danbartl" title="Videos">πΉ</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/hamzahiqb"><img src="https://avatars3.githubusercontent.com/u/10302415?v=4?s=100" width="100px;" alt="hamzahiqb"/><br /><sub><b>hamzahiqb</b></sub></a><br /><a href="#infra-hamzahiqb" title="Infrastructure (Hosting, Build-Tools, etc)">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/hiqbal2"><img src="https://avatars3.githubusercontent.com/u/10302415?v=4?s=100" width="100px;" alt="hiqbal2"/><br /><sub><b>hiqbal2</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=hiqbal2" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/jesellier"><img src="https://avatars0.githubusercontent.com/u/51952076?v=4?s=100" width="100px;" alt="jesellier"/><br /><sub><b>jesellier</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=jesellier" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/jschemm"><img src="https://avatars.githubusercontent.com/u/81151346?v=4?s=100" width="100px;" alt="jschemm"/><br /><sub><b>jschemm</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=jschemm" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/kkoziara"><img src="https://avatars1.githubusercontent.com/u/4346849?v=4?s=100" width="100px;" alt="kkoziara"/><br /><sub><b>kkoziara</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=kkoziara" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Akkoziara" title="Bug reports">π</a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/matteogales"><img src="https://avatars0.githubusercontent.com/u/9269326?v=4?s=100" width="100px;" alt="matteogales"/><br /><sub><b>matteogales</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=matteogales" title="Code">π»</a> <a href="#design-matteogales" title="Design">π¨</a> <a href="#ideas-matteogales" title="Ideas, Planning, & Feedback">π€</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/oleskiewicz"><img src="https://avatars1.githubusercontent.com/u/5682158?v=4?s=100" width="100px;" alt="oleskiewicz"/><br /><sub><b>oleskiewicz</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=oleskiewicz" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=oleskiewicz" title="Documentation">π</a> <a href="https://github.com/sktime/sktime/commits?author=oleskiewicz" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/pabworks"><img src="https://avatars.githubusercontent.com/u/32725127?v=4?s=100" width="100px;" alt="pabworks"/><br /><sub><b>pabworks</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=pabworks" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=pabworks" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/xloem"><img src="?s=100" width="100px;" alt="patiently pending world peace"/><br /><sub><b>patiently pending world peace</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=xloem" title="Code">π»</a></td> + </tr> + <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/raishubham1"><img src="https://avatars3.githubusercontent.com/u/29356417?v=4?s=100" width="100px;" alt="raishubham1"/><br /><sub><b>raishubham1</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=raishubham1" title="Documentation">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/simone-pignotti"><img src="https://avatars1.githubusercontent.com/u/44410066?v=4?s=100" width="100px;" alt="simone-pignotti"/><br /><sub><b>simone-pignotti</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=simone-pignotti" title="Code">π»</a> <a href="https://github.com/sktime/sktime/issues?q=author%3Asimone-pignotti" title="Bug reports">π</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/sophijka"><img src="https://avatars2.githubusercontent.com/u/47450591?v=4?s=100" width="100px;" alt="sophijka"/><br /><sub><b>sophijka</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=sophijka" title="Documentation">π</a> <a href="#maintenance-sophijka" title="Maintenance">π§</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/sri1419"><img src="https://avatars2.githubusercontent.com/u/65078278?v=4?s=100" width="100px;" alt="sri1419"/><br /><sub><b>sri1419</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=sri1419" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/tensorflow-as-tf"><img src="https://avatars.githubusercontent.com/u/51345718?v=4?s=100" width="100px;" alt="tensorflow-as-tf"/><br /><sub><b>tensorflow-as-tf</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=tensorflow-as-tf" title="Code">π»</a></td> - </tr> - <tr> <td align="center" valign="top" width="11.11%"><a href="https://github.com/vedazeren"><img src="https://avatars3.githubusercontent.com/u/63582874?v=4?s=100" width="100px;" alt="vedazeren"/><br /><sub><b>vedazeren</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=vedazeren" title="Code">π»</a> <a href="https://github.com/sktime/sktime/commits?author=vedazeren" title="Tests">β οΈ</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/vincent-nich12"><img src="https://avatars3.githubusercontent.com/u/36476633?v=4?s=100" width="100px;" alt="vincent-nich12"/><br /><sub><b>vincent-nich12</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=vincent-nich12" title="Code">π»</a></td> <td align="center" valign="top" width="11.11%"><a href="https://github.com/vollmersj"><img src="https://avatars2.githubusercontent.com/u/12613127?v=4?s=100" width="100px;" alt="vollmersj"/><br /><sub><b>vollmersj</b></sub></a><br /><a href="https://github.com/sktime/sktime/commits?author=vollmersj" title="Documentation">π</a></td> diff --git a/Makefile b/Makefile index e916f804495..0697bdbf4a6 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,20 @@ test: ## Run unit tests cp setup.cfg ${TEST_DIR} python -m pytest +full_test: ## Run all tests + -rm -rf ${TEST_DIR} + mkdir -p ${TEST_DIR} + cp .coveragerc ${TEST_DIR} + cp setup.cfg ${TEST_DIR} + python -m pytest --only_changed_modules False + +test_without_datasets: ## Run unit tests skipping sktime/datasets + -rm -rf ${TEST_DIR} + mkdir -p ${TEST_DIR} + cp .coveragerc ${TEST_DIR} + cp setup.cfg ${TEST_DIR} + python -m pytest --ignore sktime/datasets + test_check_suite: ## run only estimator contract tests in TestAll classes -rm -rf ${TEST_DIR} mkdir -p ${TEST_DIR} @@ -46,12 +60,12 @@ test_softdeps: ## Run unit tests to check soft dependency handling in estimators python -m pytest -v -n auto --showlocals -k 'test_check_estimator_does_not_raise' $(PYTESTOPTIONS) --pyargs sktime.utils.tests python -m pytest -v -n auto --showlocals $(PYTESTOPTIONS) --pyargs sktime.tests.test_softdeps -test_softdeps_full: ## Run all non-suite unit tests without soft dependencies +test_softdeps_full: ## Run all non-suite unit tests without soft dependencies or downloading datasets -rm -rf ${TEST_DIR} mkdir -p ${TEST_DIR} cp setup.cfg ${TEST_DIR} cd ${TEST_DIR} - python -m pytest -v --showlocals -k 'not TestAll' $(PYTESTOPTIONS) + python -m pytest -v --showlocals --ignore sktime/datasets -k 'not TestAll' $(PYTESTOPTIONS) test_mlflow: ## Run mlflow integration tests -rm -rf ${TEST_DIR} @@ -94,4 +108,4 @@ nb: clean dockertest: docker build -t sktime -f build_tools/docker/$(PYTHON_VERSION).dockerfile . - docker run -it --name sktime sktime bash -c "make test" + docker run -it --name sktime sktime bash -c "make full_test" diff --git a/README.md b/README.md index bcb9a99f126..33b146fe906 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ > A unified interface for machine learning with time series -:rocket: **Version 0.24.0 out now!** [Check out the release notes here](https://www.sktime.net/en/latest/changelog.html). +:rocket: **Version 0.26.0 out now!** [Check out the release notes here](https://www.sktime.net/en/latest/changelog.html). sktime is a library for time series analysis in Python. It provides a unified interface for multiple time series learning tasks. Currently, this includes time series classification, regression, clustering, annotation, and forecasting. It comes with [time series algorithms](https://www.sktime.net/en/stable/estimator_overview.html) and [scikit-learn] compatible tools to build, tune and validate time series models. @@ -66,8 +66,6 @@ Our objective is to enhance the interoperability and usability of the time serie sktime also provides **interfaces to related libraries**, for example [scikit-learn], [statsmodels], [tsfresh], [PyOD], and [fbprophet], among others. -For **deep learning**, see our companion package: [sktime-dl](https://github.com/sktime/sktime-dl). - [statsmodels]: https://www.statsmodels.org/stable/index.html [tsfresh]: https://tsfresh.readthedocs.io/en/latest/ [pyod]: https://pyod.readthedocs.io/en/latest/ @@ -84,6 +82,7 @@ For **deep learning**, see our companion package: [sktime-dl](https://github.com | **[Time Series Distances/Kernels]** | maturing | [Tutorial](https://github.com/sktime/sktime/blob/main/examples/03_transformers.ipynb) Β· [API Reference](https://www.sktime.net/en/latest/api_reference/dists_kernels.html) Β· [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/dist_kern_panel.py) | | **[Time Series Alignment]** | experimental | [API Reference](https://www.sktime.net/en/latest/api_reference/alignment.html) Β· [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/alignment.py) | | **[Annotation]** | experimental | [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/annotation.py) | +| **[Time Series Splitters]** | maturing | [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/split.py) | | | **[Distributions and simulation]** | experimental | | [forecasting]: https://github.com/sktime/sktime/tree/main/sktime/forecasting @@ -95,6 +94,7 @@ For **deep learning**, see our companion package: [sktime-dl](https://github.com [time series alignment]: https://github.com/sktime/sktime/tree/main/sktime/alignment [transformations]: https://github.com/sktime/sktime/tree/main/sktime/transformations [distributions and simulation]: https://github.com/sktime/sktime/tree/main/sktime/proba +[time series splitters]: https://github.com/sktime/sktime/tree/main/sktime/split [parameter fitting]: https://github.com/sktime/sktime/tree/main/sktime/param_est @@ -169,8 +169,8 @@ flexible choice of soft dependencies is unavailable via `conda`) ``` python from sktime.datasets import load_airline from sktime.forecasting.base import ForecastingHorizon -from sktime.forecasting.model_selection import temporal_train_test_split from sktime.forecasting.theta import ThetaForecaster +from sktime.split import temporal_train_test_split from sktime.performance_metrics.forecasting import mean_absolute_percentage_error y = load_airline() diff --git a/build_tools/run_blogposts.sh b/build_tools/run_blogposts.sh new file mode 100755 index 00000000000..95e00b97b7a --- /dev/null +++ b/build_tools/run_blogposts.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Script to run all blog post notebooks. +set -euxo pipefail + +CMD="jupyter nbconvert --to notebook --inplace --execute --ExecutePreprocessor.timeout=600" + +for notebook in examples/blog_posts/*.ipynb; do + echo "Running: $notebook" + $CMD "$notebook" +done diff --git a/docs/source/api_reference.rst b/docs/source/api_reference.rst index 657c06571db..9dc6307d578 100644 --- a/docs/source/api_reference.rst +++ b/docs/source/api_reference.rst @@ -24,6 +24,7 @@ For a scientific manual, see the :ref:`user_guide`. api_reference/dists_kernels api_reference/param_est api_reference/performance_metrics + api_reference/split api_reference/alignment api_reference/annotation api_reference/datasets diff --git a/docs/source/api_reference/alignment.rst b/docs/source/api_reference/alignment.rst index 5306ed44cb4..eecec7e7ef3 100644 --- a/docs/source/api_reference/alignment.rst +++ b/docs/source/api_reference/alignment.rst @@ -6,6 +6,12 @@ Time series alignment The :mod:`sktime.alignment` module contains time series aligners, such as dynamic time warping aligners. +All time series aligners in ``sktime`` can be listed using the +``sktime.registry.all_estimators`` utility, +using ``estimator_types="aligner"``, optionally filtered by tags. +Valid tags can be listed using ``sktime.registry.all_tags``. + + Naive aligners -------------- diff --git a/docs/source/api_reference/classification.rst b/docs/source/api_reference/classification.rst index 1994630b232..ebb3d53b257 100644 --- a/docs/source/api_reference/classification.rst +++ b/docs/source/api_reference/classification.rst @@ -21,6 +21,18 @@ Composition ClassifierPipeline ColumnEnsembleClassifier SklearnClassifierPipeline + MultiplexClassifier + +Model selection and tuning +-------------------------- + +.. currentmodule:: sktime.classification.model_selection + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + TSCGridSearchCV Ensembles --------- @@ -45,12 +57,14 @@ Deep learning :template: class.rst CNNClassifier + CNTCClassifier FCNClassifier LSTMFCNClassifier InceptionTimeClassifier MACNNClassifier MLPClassifier MCDCNNClassifier + ResNetClassifier SimpleRNNClassifier TapNetClassifier @@ -180,8 +194,15 @@ Shapelet-based MrSEQL MrSQM -sklearn -------- + +sklearn classifiers +------------------- + +This section contains classifiers which are not time series classifiers but +simple tabular classifiers in ``sklearn`` compatible API. + +They are used internally in time series classifiers, but can also be used +directly in a tabular setting. .. currentmodule:: sktime.classification.sklearn diff --git a/docs/source/api_reference/forecasting.rst b/docs/source/api_reference/forecasting.rst index c73f70d3761..4abc257491d 100644 --- a/docs/source/api_reference/forecasting.rst +++ b/docs/source/api_reference/forecasting.rst @@ -51,6 +51,8 @@ Pipelines can also be constructed using ``*``, ``+``, and ``|`` dunders. Permute HierarchyEnsembleForecaster FhPlexForecaster + IgnoreX + FallbackForecaster Reduction --------- @@ -72,6 +74,7 @@ Use ``make_reduction`` for easy specification. DirectTabularRegressionForecaster DirectTimeSeriesRegressionForecaster + DirectReductionForecaster MultioutputTabularRegressionForecaster MultioutputTimeSeriesRegressionForecaster RecursiveTabularRegressionForecaster @@ -150,6 +153,7 @@ Trend forecasters PolynomialTrendForecaster STLForecaster CurveFitForecaster + ProphetPiecewiseLinearTrendForecaster .. currentmodule:: sktime.forecasting.statsforecast @@ -228,6 +232,7 @@ All "ARIMA" and "Auto-ARIMA" models below include SARIMAX capability. :template: class.rst ARIMA + StatsModelsARIMA .. currentmodule:: sktime.forecasting.sarimax @@ -256,6 +261,8 @@ All "ARIMA" and "Auto-ARIMA" models below include SARIMAX capability. Auto-ARIMA models ~~~~~~~~~~~~~~~~~ +.. currentmodule:: sktime.forecasting.arima + .. autosummary:: :toctree: auto_generated/ :template: class.rst @@ -287,6 +294,14 @@ ARCH models Structural time series models ----------------------------- +.. currentmodule:: sktime.forecasting.ardl + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + ARDL + .. currentmodule:: sktime.forecasting.bats .. autosummary:: @@ -303,6 +318,14 @@ Structural time series models TBATS +.. currentmodule:: sktime.forecasting.statsforecast + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + StatsForecastAutoTBATS + .. currentmodule:: sktime.forecasting.fbprophet .. autosummary:: @@ -327,6 +350,20 @@ Structural time series models DynamicFactor +Transformer (deep learning) based forecasters +--------------------------------------------- + +.. currentmodule:: sktime.forecasting.ltsf + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + LTSFLinearForecaster + LTSFDLinearForecaster + LTSFNLinearForecaster + + Intermittent time series forecasters ------------------------------------ @@ -422,25 +459,8 @@ Model Evaluation (Backtesting) evaluate -Time series splitters ---------------------- - -Time series splitters can be used in both evaluation and tuning. - -.. currentmodule:: sktime.forecasting.model_selection - -.. autosummary:: - :toctree: auto_generated/ - :template: class.rst - - CutoffSplitter - SingleWindowSplitter - SlidingWindowSplitter - ExpandingWindowSplitter - ExpandingGreedySplitter - -.. autosummary:: - :toctree: auto_generated/ - :template: function.rst +Time index splitters +-------------------- - temporal_train_test_split +Evaluation and tuning can be customized using time index based splitters, +for a list of these consult the :ref:`splitter API <split_ref>` diff --git a/docs/source/api_reference/param_est.rst b/docs/source/api_reference/param_est.rst index 4352b6d32ae..9bb1a2cdd07 100644 --- a/docs/source/api_reference/param_est.rst +++ b/docs/source/api_reference/param_est.rst @@ -14,10 +14,6 @@ All parameter estimators in ``sktime`` can be listed using the using ``estimator_types="param_est"``, optionally filtered by tags. Valid tags can be listed using ``sktime.registry.all_tags``. -.. automodule:: sktime.param_est - :no-members: - :no-inherited-members: - Parameter estimators -------------------- @@ -32,6 +28,12 @@ Composition ParamFitterPipeline +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + FunctionParamFitter + .. currentmodule:: sktime.param_est.plugin .. autosummary:: @@ -75,3 +77,9 @@ Stationarity estimators StationarityADF StationarityKPSS + StationarityADFArch + StationarityDFGLS + StationarityPhillipsPerron + StationarityKPSSArch + StationarityZivotAndrews + StationarityVarianceRatio diff --git a/docs/source/api_reference/split.rst b/docs/source/api_reference/split.rst new file mode 100644 index 00000000000..7786983b334 --- /dev/null +++ b/docs/source/api_reference/split.rst @@ -0,0 +1,74 @@ + +.. _split_ref: + +Splitters +========= + +The :mod:`sktime.split` module contains algorithms for splitting and resampling data. + +All splitters in ``sktime`` can be listed using the ``sktime.registry.all_estimators`` utility, +using ``estimator_types="splitter"``, optionally filtered by tags. +Valid tags can be listed using ``sktime.registry.all_tags``. + + +Splitting utilities +------------------- + +``temporal_train_test_split`` is a quick utility function for +splitting a single time series into training and test fold. + +Forecasting users interested in performance evaluation are advised +to use full backtesting instead of a single split, e.g., via ``evaluate``, +see :ref:`forecasting API reference <forecasting_ref>`. + +.. currentmodule:: sktime.split + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + temporal_train_test_split + + +Time index splitters +-------------------- + +Time index splitters split one or multiple time series by temporal order. +They are typically used in both evaluation and tuning of forecasters. + +.. currentmodule:: sktime.split + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + CutoffSplitter + SingleWindowSplitter + SlidingWindowSplitter + ExpandingWindowSplitter + ExpandingGreedySplitter + TemporalTrainTestSplitter + + +Time index splitter composition +------------------------------- + +The following splitters are compositions that can be used to create +more complex time index based splitting strategies. + +.. currentmodule:: sktime.split.compose + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + Repeat + +.. currentmodule:: sktime.split + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + SameLocSplitter + TestPlusTrainSplitter diff --git a/docs/source/api_reference/transformations.rst b/docs/source/api_reference/transformations.rst index df9413e1051..0481a8739c1 100644 --- a/docs/source/api_reference/transformations.rst +++ b/docs/source/api_reference/transformations.rst @@ -7,7 +7,7 @@ The :mod:`sktime.transformations` module contains classes for data transformations. All (simple) transformers in ``sktime`` can be listed using the ``sktime.registry.all_estimators`` utility, -using ``estimator_types="regressor"``, optionally filtered by tags. +using ``estimator_types="transformer"``, optionally filtered by tags. Valid tags can be listed using ``sktime.registry.all_tags``. For pairwise transformers (time series distances, kernels), instead see :ref:`_transformations_pairwise_ref`. @@ -188,9 +188,7 @@ Dictionary-based features :toctree: auto_generated/ :template: class.rst - PAA SFA - SAX Moment-based features ~~~~~~~~~~~~~~~~~~~~~ @@ -242,6 +240,7 @@ Lagging :template: class.rst Lag + ReducerTransform Element-wise transforms ~~~~~~~~~~~~~~~~~~~~~~~ @@ -410,8 +409,8 @@ Differencing, slope, kinematics KinematicFeatures -Binning and segmentation -~~~~~~~~~~~~~~~~~~~~~~~~ +Binning, sampling and segmentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. currentmodule:: sktime.transformations.series.binning @@ -438,6 +437,39 @@ Binning and segmentation IntervalSegmenter RandomIntervalSegmenter +.. currentmodule:: sktime.transformations.series.dilation_mapping + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + DilationMappingTransformer + +.. currentmodule:: sktime.transformations.series.paa + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + PAA2 + +.. currentmodule:: sktime.transformations.series.sax + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + SAX2 + +.. currentmodule:: sktime.transformations.panel.dictionary_based + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + PAA + SAX + Missing value treatment ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -447,9 +479,16 @@ Missing value treatment :toctree: auto_generated/ :template: class.rst - DropNA Imputer +.. currentmodule:: sktime.transformations.series.dropna + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + DropNA + Seasonality and Date-Time Features ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -657,6 +696,19 @@ Bootstrap transformations SplitterBootstrapTransformer STLBootstrapTransformer +Panel-to-Series transformers +---------------------------- + +These transformers create a single series from a panel. + +.. currentmodule:: sktime.transformations.merger + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + Merger + Outlier detection, changepoint detection ---------------------------------------- diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 268a3bbb6c2..04e219f8643 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -3,7 +3,12 @@ Changelog ========= -All notable changes to this project will be documented in this file. We keep track of changes in this file since v0.4.0. The format is based on `Keep a Changelog <https://keepachangelog.com/en/1.0.0/>`_ and we adhere to `Semantic Versioning <https://semver.org/spec/v2.0.0.html>`_. The source code for all `releases <https://github.com/sktime/sktime/releases>`_ is available on GitHub. +All notable changes to this project will be documented in this file. +We keep track of changes in this file since v0.4.0. +The format is based on `Keep a Changelog <https://keepachangelog.com/en/1.0.0/>`_ and +we adhere to `Semantic Versioning <https://semver.org/spec/v2.0.0.html>`_. +The source code for all `releases <https://github.com/sktime/sktime/releases>`_ is +available on GitHub. .. note:: @@ -14,6 +19,936 @@ For upcoming changes and next releases, see our `milestones <https://github.com/ For our long-term plan, see our :ref:`roadmap`. +Version 0.26.0 - 2023-01-27 +--------------------------- + +Maintenance release: + +* support for ``scikit-learn 1.4.X`` +* scheduled deprecations +* minor bugfix + +For last non-maintenance content updates, see 0.25.1. + +Dependency changes +~~~~~~~~~~~~~~~~~~ + +* ``scikit-learn`` bounds have been updated to ``>=0.24.0,<1.5.0``. + +Deprecations and removals +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Benchmarking, Metrics, Splitters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* in forecasting ``evaluate``, ``kwargs`` have been removed. + Users should pass backend parameters via the ``backend_params`` + parameter instead. + +Data types, checks, conversions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* in ``check_is_mtype``, the default of ``msg_return_dict`` has now changed to ``"dict"`` + +Forecasting tuners +^^^^^^^^^^^^^^^^^^ + +* in forecasting tuners ``ForecastingGridSearchCV``, ``ForecastingRandomizedSearchCV``, + ``ForecastingSkoptSearchCV``, use of ``joblib`` backend specific parameters ``n_jobs``, + ``pre_dispatch`` has been deprecated, and will be removed in ``sktime`` 0.27.0. + Users should pass backend parameters via the ``backend_params`` parameter instead. + +Time series classification +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* In ``SimpleRNNClassifier``, the ``num_epochs`` parameter has been + renamed to ``n_epochs``. The original parameter of name ``num_epochs`` has now + been removed. + +Time series regression +^^^^^^^^^^^^^^^^^^^^^^ + +* In ``SimpleRNNRegressor``, the ``num_epochs`` parameter has been + renamed to ``n_epochs``. The original parameter of name ``num_epochs`` has now + been removed. + +Contents +~~~~~~~~ + +* [MNT] 0.26.0 deprecations and change actions (:pr:`5817`) :user:`fkiraly` +* [MNT] [Dependabot](deps-dev): Update ``scikit-learn`` requirement from + ``<1.4.0,>=0.24`` to ``>=0.24,<1.5.0`` (:pr:`5776`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps): Bump styfle/cancel-workflow-action from ``0.12.0`` + to ``0.12.1`` (:pr:`5839`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps): Bump dorny/paths-filter + from ``2`` to ``3`` (:pr:`5838`) :user:`dependabot[bot]` +* [BUG] fix tag handling in ``IgnoreX`` (:pr:`5843`) :user:`tpvasconcelos`, :user:`fkiraly` + + +Version 0.25.1 - 2023-01-24 +--------------------------- + +Highlights +~~~~~~~~~~ + +* in ``make_reduction``, direct reduction forecaster now supports probabilistic tabular regressors from ``skpro`` (:pr:`5536`) :user:`fkiraly` +* new, efficient, parallelizable PAA and SAX transformer implementations, available as ``PAA2``, ``SAX2`` (:pr:`5742`) :user:`steenrotsman` +* ``FallbackForecaster``, fallback chain of multiple forecaster for exception handling (:pr:`5779`) :user:`ninedigits` +* time series classification: ``sktime`` native grid search, multiplexer for autoML (:pr:`4596`, :pr:`5678`) :user:`achieveordie`, :user:`fkiraly` +* ``IgnoreX`` - forecasting compositor to ignore exogenous data, for use in tuning (:pr:`5769`) :user:`hliebert`, :user:`fkiraly` +* classifier migrated from ``sktime-dl``: CNTC classifier (:pr:`3978`) :user:`aurumnpegasus` +* authors and maintainers of algorithms are now tracked via tags ``"authors"`` and ``"maintainers"``, see below + +Dependency changes +~~~~~~~~~~~~~~~~~~ + +* ``arch`` (forecasting and parameter estimation soft dependency) bounds have been updated to ``>=5.6,<6.4.0`` (:pr:`5771`) :user:`dependabot[bot]` +* ``mne`` (transformations soft dependency) bounds have been updated to ``>=1.5,<1.7`` (:pr:`5585`) :user:`dependabot[bot]` +* ``dask`` (data container and parallelization back-end) bounds have been updated to ``<2024.1.1`` (:pr:`5748`) :user:`dependabot[bot]` + +Core interface changes +~~~~~~~~~~~~~~~~~~~~~~ + +BaseObject and base framework +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* estimators and objects now record author and maintainer information in the new + tags ``"authors"`` and ``"maintainers"``. This is required only for estimators + in ``sktime`` proper and compatible third party packages. It is also used to generate + mini-package headers used in lookup functionality of the ``sktime`` webpage. +* author and maintainer information in the ``sktime`` package is no longer recorded in + ``CODEOWNERS``, but in the new tags ``"authors"`` and ``"maintainers"``. + Authors and maintainer do not need to action this change, as it has been carried out + by the ``sktime`` maintainers. However, authors and maintainers are encouraged to + check the information in the tags, and to flag any accidental omissions or errors. + +Benchmarking, Metrics, Splitters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* forecasting point prediction metrics now also support parallelization via + ``set_config``, for broadcasting on hierarchical or multivariate data + +Forecasting +^^^^^^^^^^^ + +* forecasters can now be prevented from storing a reference to all seen data + as ``self._y`` and ``self._X`` by setting the config ``"remember_data"`` to + ``False`` via ``set_config``. This is useful for serialization of forecasters. + Currently, the setting is only supported for a combination of data and forecasters + where instance or variable broadcasting is not triggered, + but the feature will be extended to all situations in the future. + +Parameter estimation and hypothesis testing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Parameter plugin or estimation based parameter tuning estimators can now be quickly constructed + with the ``*`` dunder, which will construct a ``PluginParamsForecaster`` or ``PluginParamsTransformer`` + with all fitted paramters (``get_fitted_params``) of the left element plugged in into the right element + (``set_params``), where parameter names match. + For instance, ``SeasonalityACF() * Deseasonalizer()`` will construct + a ``Deseasonalizer`` whose ``sp`` (seasonality period) parameter is tuned + by ``SeasonalityACF``, estimating ``sp`` via the ACF significance criterion on the series. +* The ``*`` dunder binds to the left, for instance + ``Differencer() * SeasonalityACF() * Deseasonalizer()`` will construct + a ``Deseasonalizer`` whose ``sp`` (seasonality period) parameter is tuned + by ``SeasonalityACF``, estimating ``sp`` via the ACF significance criterion + on first differenced data (for stationarity). + Here first differencing is not applied to the ``Deseasonalizer``, + but only to the input of ``SeasonalityACF``, as the first ``*`` constructs + a parameter estimator, and the second ``*`` plugs in the parameter estimator into + the ``Deseasonalizer``. + +Transformations +^^^^^^^^^^^^^^^ + +* transformations, i.e., ``BaseTransformer`` descendant instances, + can now also return ``None`` in ``_transform``, this is interpreted as empty data. + +Deprecations and removals +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Transformations +^^^^^^^^^^^^^^^ + +* ``panel.dictionary_based.PAA`` will be renamed to ``PAAlegacy`` in ``sktime`` 0.27.0, + while ``sktime.transformations.series.PAA2`` will be renamed to ``PAA``. + ``PAA2`` will become the primary PAA implementation in ``sktime``, + while the current ``PAA`` will continue to be available as ``PAAlegacy``. + Both estimators are also available under their future name at their + current location, and will be available under their deprecated name + until 0.28.0. + To prepare for the name change, do one of the following: + 1. replace use of ``PAA`` from ``sktime.transformations.panel.dictionary_based`` + by use of ``PAA2`` from ``sktime.transformations.series.paa``, switching + parameter names appropriately, or + 2. replace use of ``PAA`` from ``sktime.transformations.panel.dictionary_based`` + by use of ``PAAlegacy`` from ``sktime.transformations.panel.dictionary_based``, + without change of parameter values. +* ``panel.dictionary_based.SAX`` will be renamed to ``SAXlegacy`` in ``sktime`` 0.27.0, + while ``sktime.transformations.series.SAX2`` will be renamed to ``SAX``. + ``SAX2`` will become the primary SAX implementation in ``sktime``, + while the current ``SAX`` will continue to be available as ``SAXlegacy``. + Both estimators are also available under their future name at their + current location, and will be available under their deprecated name + until 0.28.0. + To prepare for the name change, do one of the following: + 1. replace use of ``SAX`` from ``sktime.transformations.panel.dictionary_based`` + by use of ``SAX2`` from ``sktime.transformations.series.paa``, switching + parameter names appropriately, or + 2. replace use of ``SAX`` from ``sktime.transformations.panel.dictionary_based`` + by use of ``SAXlegacy`` from ``sktime.transformations.panel.dictionary_based``, + without change of parameter values. + +Enhancements +~~~~~~~~~~~~ + +BaseObject and base framework +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] update ``deep_equals`` to accommodate plugins, e.g., for ``polars`` (:pr:`5504`) :user:`fkiraly` +* [ENH] Replace ``isinstance`` by ``object_type`` tag based checks (:pr:`5657`) :user:`benheid` +* [ENH] author and maintainer tags (:pr:`5754`) :user:`fkiraly` +* [ENH] enable ``all_tags`` to retrieve estimator and object tags (:pr:`5798`) :user:`fkiraly` +* [ENH] remove maintainer information from ``CODEOWNERS`` in favour of estimator tags (:pr:`5808`) :user:`fkiraly` +* [ENH] author and maintainer tags for alignment and distances modules (:pr:`5801`) :user:`fkiraly` +* [ENH] author and maintainer tags for forecasting module (:pr:`5802`) :user:`fkiraly` +* [ENH] author and maintainer tags for distributions and parameter fitting module (:pr:`5803`) :user:`fkiraly` +* [ENH] author and maintainer tags for classification, clustering and regression modules (:pr:`5807`) :user:`fkiraly` +* [ENH] author and maintainer tags for transformer module (:pr:`5800`) :user:`fkiraly` + +Benchmarking, Metrics, Splitters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] Repeat splitter composition (:pr:`5737`) :user:`fkiraly` +* [ENH] parallelization support and config for forecasting performance metrics (:pr:`5813`) :user:`fkiraly` + +Data types, checks, conversions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] in ``VectorizedDF``, partially decouple internal data store from methods (:pr:`5681`) :user:`fkiraly` + +Forecasting +^^^^^^^^^^^ + +* [ENH] ``Imputer``: conditional parameter handling logic (:pr:`3916`) :user:`aiwalter`, :user:`fkiraly`` +* [ENH] support for probabilistic regressors (``skpro``) in ``make_reduction``, direct reduction (:pr:`5536`) :user:`fkiraly` +* [ENH] private utility for ``BaseForecaster`` get columns, for all ``predict``-like functions (:pr:`5590`) :user:`fkiraly` +* [ENH] adding second test parameters for ``TBATS`` (:pr:`5689`) :user:`NguyenChienFelix33` +* [ENH] config to turn off data memory in forecasters (:pr:`5676`) :user:`fkiraly`, :user:`corradomio` +* [ENH] Simplify conditional statements in direct reducer (:pr:`5725`) :user:`fkiraly` +* [ENH] forecasting compositor to ignore exogenous data (:pr:`5769`) :user:`hliebert`, :user:`fkiraly` +* [ENH] add ``disp`` parameter to ``SARIMAX`` to control output verbosity (:pr:`5770`) :user:`tvdboom` +* [ENH] expose parameters supported by ``fit`` method of ``SARIMAX`` in ``statsmodels`` (:pr:`5787`) :user:`yarnabrina` +* [ENH] ``FallbackForecaster``, fallback upon fail with multiple forecaster chain (:pr:`5779`) :user:`ninedigits` + +Parameter estimation and hypothesis testing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] Simplify ``BaseEstimator._get_fitted_params()`` and ``BaseParamFitter`` inheritance of that method (:pr:`5633`) :user:`tpvasconcelos` +* [ENH] parameter plugin for estimator into transformers, right concat dunder (:pr:`5764`) :user:`fkiraly` + +Probability distributions and simulators +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] bring distributions module on par with ``skpro`` distributions (:pr:`5708`) :user:`fkiraly`, :user:`alex-jg3` + +Time series classification +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] migrating CNTC network and classifier for classification from ``sktime-dl`` (:pr:`3978`) :user:`aurumnpegasus`, :user:`fkiraly` +* [ENH] grid search for time series classification (:pr:`4596`) :user:`achieveordie`, :user:`fkiraly` +* [ENH] reduce private coupling of ``IndividualBOSS`` classifier and ``BaseClassifier`` (:pr:`5654`) :user:`fkiraly` +* [ENH] multiplexer classifier (:pr:`5678`) :user:`fkiraly` +* [ENH] refactor structure of time series forest classifier related files (:pr:`5751`) :user:`fkiraly` + +Transformations +^^^^^^^^^^^^^^^ + +* [ENH] better explanation about fit/transform instance linking in instance-wise transformers in error messages, and pointer to common solution (:pr:`5652`) :user:`fkiraly` +* [ENH] New ``PAA`` and ``SAX`` transformer implementations (:pr:`5742`) :user:`steenrotsman` +* [ENH] feature upgrade for ``SplitterSummarizer`` - granular control of inner ``fit``/``transform`` input (:pr:`5750`) :user:`fkiraly` +* [ENH] allow ``BaseTransformer._transform`` to return ``None`` (:pr:`5772`) :user:`fkiraly`, :user:`hliebert` + +Test framework +^^^^^^^^^^^^^^ + +* [ENH] refactor tests with parallelization backend fixtures to programmatic backend fixture lookup (:pr:`5714`) :user:`fkiraly` +* [ENH] further refactor parallelization backend test fixtures to use central location (:pr:`5734`) :user:`fkiraly` + + +Fixes +~~~~~ + +BaseObject and base framework +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [BUG] fix scitype inference utility for all cases (:pr:`5672`) :user:`fkiraly` +* [BUG] fixes for minor typos in error message related to custom ``joblib`` backend selection (:pr:`5724`) :user:`fkiraly` +* [BUG] handles ``AttributeError`` in ``show_versions`` when dependency lacks ``__version__`` (:pr:`5793`) :user:`yarnabrina` +* [BUG] fix type error in parallelization backend test fixture refactor (:pr:`5760`) :user:`fkiraly` + +Benchmarking, Metrics, Splitters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [BUG] Fix dynamic ``make_forecasting_scorer`` for newer ``sklearn`` metrics (:pr:`5717`) :user:`fkiraly` +* [BUG] fix ``test_evaluate_error_score`` to skip test of expected warning raised if the ``joblib`` backend is ``"loky"`` or ``"multiprocessing"`` (:pr:`5780`) :user:`fkiraly` + +Data loaders +^^^^^^^^^^^^ + +* [BUG] fix ``extract_path`` arg in ``sktime.datasets.load_UCR_UEA_dataset`` (:pr:`5744`) :user:`steenrotsman` + +Data types, checks, conversions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [BUG] fix ``deep_equals`` for ``np.array`` with ``dtype="object"`` (:pr:`5697`) :user:`fkiraly` + +Forecasting +^^^^^^^^^^^ + +* [BUG] fix ``ForecastingHorizon.get_expected_pred_idx`` ``sort_time`` (:pr:`5726`) :user:`fkiraly` +* [BUG] in ``BaggingForecaster``, fix ``random_state`` handling (:pr:`5730`) :user:`fkiraly` + +Pipelines +^^^^^^^^^ + +* [BUG] Enable ``pipeline.fit`` without X (:pr:`5656`) :user:`benheid` + +Time series classification +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [BUG] fix ``predict`` output conversion failure in ``BaseClassifier``, ``BaseRegressor``, if ``y_inner_mtype`` tag is a list (:pr:`5680`) :user:`fkiraly` +* [BUG] fix ``test_multioutput`` for genuinely multioutput classifiers (:pr:`5700`) :user:`fkiraly` + +Time series regression +^^^^^^^^^^^^^^^^^^^^^^ + +* [BUG] fix ``predict`` output conversion failure in ``BaseClassifier``, ``BaseRegressor``, if ``y_inner_mtype`` tag is a list (:pr:`5680`) :user:`fkiraly` + +Transformations +^^^^^^^^^^^^^^^ + +* [BUG] skip sporadic test errors in ``ExponentialSmoothing`` (:pr:`5516`) :user:`achieveordie` +* [BUG] fix sporadic permutation of internal feature columns in ``TSFreshClassifier.predict`` (:pr:`5673`) :user:`fkiraly` +* [BUG] fix backend strings in transformer ``test_base`` (:pr:`5695`) :user:`fkiraly` +* [BUG] Ensure ``MultiRocketMultivariate`` uses ``random_state`` (:pr:`5710`) :user:`chrico-bu-uab` + +Test framework +^^^^^^^^^^^^^^ + +* [BUG] Fixing dockerized tests (:pr:`5426`) :user:`kurayami07734` + + +Maintenance +~~~~~~~~~~~ + +* [MNT] [Dependabot](deps-dev): Update sphinx-issues requirement from ``<4.0.0`` to ``<5.0.0`` (:pr:`5792`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps): Bump tj-actions/changed-files from 41 to 42 (:pr:`5777`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update arch requirement from ``<6.3.0,>=5.6`` to ``>=5.6,<6.4.0`` (:pr:`5771`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update mne requirement from ``<1.6,>=1.5`` to ``>=1.5,<1.7`` (:pr:`5585`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update dask requirement from ``<2023.12.2`` to ``<2024.1.1`` (:pr:`5748`) :user:`dependabot[bot]` +* [MNT] improvements to modular CI framework - clearer naming, ``pyproject`` handling (:pr:`5713`) :user:`fkiraly` +* [MNT] temporary deactivation of new CI (:pr:`5795`) :user:`fkiraly` +* [MNT] fix faulty deprecation logic for ``n_jobs``, ``pre_dispatch`` in forecasting tuners, bump deprecation to 0.27.0 (:pr:`5784`) :user:`fkiraly` +* [MNT] update python version in binder dockerfile to 3.11 (:pr:`5762`) :user:`fkiraly` +* [MNT] address various deprecations from ``pandas`` (:pr:`5733`) :user:`fkiraly`, :user:`yarnabrina` +* [MNT] ``scikit-learn 1.4.0`` compatibility patches (:pr:`5782`, :pr:`5811`) :user:`fkiraly` +* [MNT] Code quality updates (:pr:`5786`) :user:`yarnabrina` +* [MNT] change cycle for making ``SAX2`` and ``PAA2`` primary implementation renamed to ``SAX``, ``PAA`` (:pr:`5799`) :user:`fkiraly` +* [MNT] remove maintainer information from ``CODEOWNERS`` in favour of estimator tags (:pr:`5808`) :user:`fkiraly` +* [MNT] addressing more ``pandas`` deprecations (:pr:`5816`) :user:`fkiraly` +* [MNT] address ``pd.DataFrame.groupby(axis=1)`` deprecation in ``EnsembleForecaster`` (:pr:`5707`) :user:`ninedigits` +* [MNT] add missing ``__author__`` field for ``MultiRocket`` and ``MultiRocketMultivariate`` (:pr:`5698`) :user:`fkiraly` +* [MNT] addressing ``DataFrame.groupby(axis=1)`` deprecation in metric classes (:pr:`5709`) :user:`fkiraly` +* [MNT] added upper bound ``pycatch22<0.4.5`` in ``transformations`` dependency set to avoid installation error on windows (:pr:`5670`) :user:`yarnabrina` +* [MNT] refactoring new CI to fix some bugs and other minor enhancements (:pr:`5638`) :user:`yarnabrina` +* [MNT] Update ``tslearn`` dependency version in pyproject.toml (:pr:`5686`) :user:`DManowitz` +* [MNT] fix several spelling mistakes (:pr:`5639`) :user:`yarnabrina` + +Documentation +~~~~~~~~~~~~~ + +* [DOC] comment in ``CONTRIBUTORS.md`` that source file is ``all-contributorsrc`` (:pr:`5687`) :user:`fkiraly` +* [DOC] improved docstring for ``TrendForecaster`` and ``PolynomialTrendForecaster`` (:pr:`5747`) :user:`fkiraly` +* [DOC] updated algorithm inclusion guide (:pr:`5753`) :user:`fkiraly` +* [DOC] improved docstring for ``TimeSeriesForestClassifier`` (:pr:`5741`) :user:`fkiraly` +* [DOC] fix ``scitype`` string of transformers in API ref (:pr:`5759`) :user:`fkiraly` +* [DOC] improved formatting of tag section in extension templates (:pr:`5812`) :user:`fkiraly` +* [DOC] ``Imputer``: docstring clarity improvement, conditional parameter handling logic (:pr:`3916`) :user:`aiwalter`, :user:`fkiraly`` +* [DOC] extension template for time series splitters (:pr:`5769`) :user:`fkiraly` +* [DOC] update soft dependency handling guide for tests with tag based dependency checking (:pr:`5756`) :user:`fkiraly` +* [DOC] fix all import failures in API docs and related missing exports (:pr:`5752`) :user:`fkiraly` +* [DOC] improve clarity in describing ``strategy="refit"`` in forecasting tuners' docstrings (:pr:`5711`) :user:`fkiraly` +* [DOC] correct type statement in forecasting tuner regarding ``forecaster`` (:pr:`5699`) :user:`fkiraly` +* [DOC] various minor API reference improvements (:pr:`5721`) :user:`fkiraly` +* [DOC] add ``ReducerTransform`` and ``DirectReductionForecaster`` to API reference (:pr:`5690`) :user:`fkiraly` +* [DOC] remove outdated ``sktime-dl`` reference in ``README.md`` (:pr:`5685`) :user:`fkiraly` + +Contributors +~~~~~~~~~~~~ + +:user:`achieveordie`, +:user:`aiwalter`, +:user:`alex-jg3`, +:user:`aurumnpegasus`, +:user:`benheid`, +:user:`chrico-bu-uab`, +:user:`corradomio`, +:user:`DManowitz`, +:user:`fkiraly`, +:user:`hliebert`, +:user:`NguyenChienFelix33`, +:user:`ninedigits`, +:user:`kurayami07734`, +:user:`steenrotsman`, +:user:`tpvasconcelos`, +:user:`tvdboom`, +:user:`yarnabrina` + + +Version 0.25.0 - 2023-12-26 +--------------------------- + +Release with base class updates and scheduled deprecations: + +* framework support for multioutput classifiers, regressors + (:pr:`5408`, :pr:`5651`, :pr:`5662`) :user:`Vasudeva-bit`, :user:`fkiraly` +* framework support for panel-to-series transformers (:pr:`5351`) :user:`benHeid` +* scheduled deprecations + +For last larger feature update, see 0.24.2. + +Core interface changes +~~~~~~~~~~~~~~~~~~~~~~ + +Time series classification and regression +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* the base class framework now supports multioutput classifiers or regressors. + All classifiers and regressors are now able to make multioutput predictions, + including all third party classifiers and regressors. + A multioutput ``y`` can now be passed, in the form of a 2D ``np.ndarray`` or + ``pd.DataFrame``, with one column per output. + The ``predict`` method will then return a predicted output of the same type. + To retain downwards compatibility, ``predict`` will always return a 1D ``np.ndarray`` + for univariate outputs, this is currently not subject to deprecation. + +* Genuinely multioutput classifiers and regressors are labelled with the new + tag ``capability:multioutput`` being ``True``. + All other classifiers and regressors broadcast by column of ``y``, + and a parallelization backend can be selected via ``set_config``, + by setting the ``backend:parallel`` and ``backend:parallel:params`` configuration + flags, see the ``set_config`` docstring for details. + Broadcasting extends automatically to all existing third party classifiers + and regressors via base class inheritance once ``sktime`` is updated, + the estimator classes themselves do not need to be updated. + +* classifiers and regressors now have a tag ``y_inner_mtype``, this allows extenders + to specify an internal ``mtype``, of ``Table`` scitype. + The mtype specified i the tag is the guaranteed + mtype of ``y`` seen in the private ``_fit`` method. + The default is the same as previously + implicit, the ``numpy1D`` mtype. + Therefore, third party classifiers and regressors do not need to be updated, + and should be fully upwards compatible. + +Transformations +^^^^^^^^^^^^^^^ + +* the base class framework now supports transformations that aggregate ``Panel`` data + to ``Series`` data, i.e., panel-to-series transformers, e.g., averaging. + Such transformers are identified by the tags + ``scitype:transform-input`` being ``"Panel"``, + and ``scitype:transform-output`` being ``"Series"``. + An example is ``Merger``. + +Deprecations and removals +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Benchmarking, Metrics, Splitters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* time series splitters, i.e., descendants of ``BaseSplitter``, have moved from + ``sktime.forecasting.model_selection`` to ``sktime.split``. + They are no longer available in the old location + ``sktime.forecasting.model_selection``, since 0.25.0. + Forecasting tuners are still present in ``sktime.forecasting.model_selection``, + and their locationn is not subject to deprecation. + +* in forecasting ``evaluate``, the order of columns in the return data frame + has changed. Users should consult the docstring of ``evaluate`` for details. + +* in forecasting ``evaluate``, the ``compute`` argument was removed, + after deprecation in 0.24.0. + Its purpose was to distinguish lazy or eager evaluation in + the ``dask`` parallelization backend. + To switch between lazy and eager evaluation, users should instead + select ``dask`` or ``dask_lazy`` via the ``backend`` parameter. + +* in forecasting ``evaluate``, ``kwargs`` are deprecated, removal has been + moved to 0.26.0. Users should pass backend parameters via the ``backend_params`` + parameter instead. + + +Contents +~~~~~~~~ + +* [ENH] Multioutput capability for all time series classifiers and regressors, broadcasting and tag (:pr:`5408`) :user:`Vasudeva-bit` +* [ENH] Support for panel-to-series transformers, merger transformation (:pr:`5351`) :user:`benHeid` +* [ENH] allow object ``dtype``-s in ``pandas`` based ``Table`` mtype-s (:pr:`5651`) :user:`fkiraly` +* [ENH] intermediate base class for panel tasks - classification, regression (:pr:`5662`) :user:`fkiraly` +* [MNT] CI element to test blogpost notebooks (:pr:`5663`) :user:`fkiraly`, :user:`yarnabrina` +* [MNT] 0.25.0 deprecations and change actions (:pr:`5613`) :user:`fkiraly` + +Contributors +~~~~~~~~~~~~ + +:user:`benHeid`, +:user:`fkiraly`, +:user:`Vasudeva-bit`, +:user:`yarnabrina` + +Version 0.24.2 - 2023-12-24 +--------------------------- + +Highlights +~~~~~~~~~~ + +* ``FunctionParamFitter`` for custom parameter switching, e.g., applying forecaster or transformer + conditional on instance properties (:pr:`5630`) :user:`tpvasconcelos` +* ``calibration_plot`` for probabilistic forecasts (:pr:`5632`) :user:`benHeid` +* ``prophet`` based piecewise linear trend forecaster (:pr:`5592`) :user:`sbuse` +* new transformer: dilation mapping (:pr:`5557`) :user:`fspinna` +* custom ``joblib`` backends are now supported in parallelization via ``set_config`` (:pr:`5537`) :user:`fkiraly` + +Dependency changes +~~~~~~~~~~~~~~~~~~ + +* ``dask`` (data container and parallelization back-end) bounds have been updated to ``<2023.12.2``. +* ``holidays`` (transformations soft dependency) bounds have been updated to ``>=0.29,<0.40``. + +Core interface changes +~~~~~~~~~~~~~~~~~~~~~~ + +Forecasting +^^^^^^^^^^^ + +* ``fit_predict`` now allows specification of ``X_pred`` argument for ``predict``. + If passed, ``X_pred`` is used as ``X`` in ``predict``, instead of ``X``. + This is useful for forecasters that expect ``X`` to be subset to the + forecasting horizon. +* custom ``joblib`` backends for hierarchical and multivariate forecast broadcasting + are now supported. To use a custom ``joblib`` backend, use ``set_config`` to + set the ``backend:parallel`` configuration flag to ``"joblib"``, + and set the ``backend`` parameter in the ``dict`` set via ``backend:parallel:params`` + to the name of the custom ``joblib`` backend. Further bakcend parameters + can be passed in the same ``dict``. See docstring of ``set_config`` for details. + +Time series classification +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* In ``SimpleRNNClassifier``, the ``num_epochs`` parameter is deprecated and has been + renamed to ``n_epochs``. ``num_epochs`` can be used until ``sktime`` 0.25.last, + but will be removed in ``sktime`` 0.26.0. A deprecation warning is raised if + ``num_epochs`` is used. + +Time series regression +^^^^^^^^^^^^^^^^^^^^^^ + +* In ``SimpleRNNRegressor``, the ``num_epochs`` parameter is deprecated and has been + renamed to ``n_epochs``. ``num_epochs`` can be used until ``sktime`` 0.25.last, + but will be removed in ``sktime`` 0.26.0. A deprecation warning is raised if + ``num_epochs`` is used. + +Transformations +^^^^^^^^^^^^^^^ + +* custom ``joblib`` backends for hierarchical and multivariate transformer broadcasting + are now supported. To use a custom ``joblib`` backend, use ``set_config`` to + set the ``backend:parallel`` configuration flag to ``"joblib"``, + and set the ``backend`` parameter in the ``dict`` set via ``backend:parallel:params`` + to the name of the custom ``joblib`` backend. Further bakcend parameters + can be passed in the same ``dict``. See docstring of ``set_config`` for details. + +Enhancements +~~~~~~~~~~~~ + +BaseObject and base framework +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] improved error messages for input checks in base classes (:pr:`5510`) :user:`fkiraly` +* [ENH] support for custom ``joblib`` backends in parallelization (:pr:`5537`) :user:`fkiraly` +* [ENH] consistent use of ``np.ndarray`` for mtype tags (:pr:`5648`) :user:`fkiraly` +* [ENH] set output format parameter in ``sktime`` internal ``check_is_mtype`` calls to silence deprecation warnings (:pr:`5563`) :user:`benHeid` + +Benchmarking, Metrics, Splitters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] cutoff and forecasting horizon ``loc`` based splitter (:pr:`5575`) :user:`fkiraly` +* [ENH] enable tag related registry tests for ``splitter`` estimator type (:pr:`5576`) :user:`fkiraly` + +Data types, checks, conversions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] ``sklearn`` facing coercion utility for ``pd.DataFrame``, to ``str`` columns (:pr:`5550`) :user:`fkiraly` +* [ENH] ``deep_equals`` - clearer return on diffs from ``dtypes`` and ``index``, relaxation of ``MultiIndex`` equality check (:pr:`5560`) :user:`fkiraly` +* [ENH] Uniformization of ``pandas`` index types in mtypes (:pr:`5561`) :user:`fkiraly` +* [ENH] ``n_features`` and ``feature_names`` metadata field for time series mtypes (:pr:`5596`) :user:`fkiraly` + +Forecasting +^^^^^^^^^^^ + +* [ENH] expected forecast prediction index utility in ``ForecastingHorizon`` (:pr:`5501`) :user:`fkiraly` +* [ENH] refactor index generation in reducers to use ``ForecastingHorizon`` method (:pr:`5539`) :user:`fkiraly` +* [ENH] fix index name check for reduction forecasters (:pr:`5543`) :user:`fkiraly` +* [ENH] forecaster ``fit_predict`` with ``X_pred`` argument for ``predict`` (:pr:`5562`) :user:`fkiraly` +* [ENH] refactor ``DirectReductionForecaster``to use ``sklearn`` input coercion utility (:pr:`5581`) :user:`fkiraly` +* [ENH] export and test ``DirectReductionForecaster`` (:pr:`5582`) :user:`fkiraly` +* [ENH] ``prophet`` based piecewise linear trend forecaster (:pr:`5592`) :user:`sbuse` +* [ENH] Add ``fit_kwargs`` to ``Prophet`` (:pr:`5597`) :user:`tpvasconcelos` +* [ENH] ``Croston`` test parameters - integer smoothing parameter (:pr:`5608`) :user:`NguyenChienFelix33` +* [ENH] ``prophet`` adapter - safer handling of ``fit_kwargs`` (:pr:`5622`) :user:`fkiraly` + +Parameter estimation and hypothesis testing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] Add new ``FunctionParamFitter`` parameter estimator (:pr:`5630`) :user:`tpvasconcelos` + +Time series annotation +^^^^^^^^^^^^^^^^^^^^^^ +* [ENH] Change ``GGS`` to inherit from ``BaseSeriesAnnotator`` (:pr:`5315`) :user:`Alex-JG3` + +Time series classification +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] enable testing ``MrSQM`` for persistence in ``nsfa>0`` case after upstream bugfix (:pr:`5171`) :user:`fkiraly` +* [ENH] ``num_epochs`` renamed to ``n_epochs`` in ``SimpleRNNClassifier`` and ``SimpleRNNRegressor`` (:pr:`5607`) :user:`aeyazadil` + +Time series clustering +^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] enable tag related registry tests for ``clusterer`` estimator type (:pr:`5576`) :user:`fkiraly` + +Transformations +^^^^^^^^^^^^^^^ + +* [ENH] dilation mapping transformer (:pr:`5557`) :user:`fspinna` +* [ENH] second test parameter set for ``TSFreshRelevantFeatureExtractor`` (:pr:`5623`) :user:`fkiraly` + +Visualization +^^^^^^^^^^^^^ + +* [ENH] Add ``calibration_plot`` for probabilistic forecasts (:pr:`5632`) :user:`benHeid` + +Test framework +^^^^^^^^^^^^^^ + +* [ENH] reactivate and fix ``test_multiprocessing_idempotent`` (:pr:`5573`) :user:`fkiraly` +* [ENH] test class register, refactor ``check_estimator`` test gathering to central location (:pr:`5574`) :user:`fkiraly` +* [ENH] conditional testing of objects - test if covering test class has changed (:pr:`5579`) :user:`fkiraly` + + +Fixes +~~~~~ + +BaseObject and base framework +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [BUG] fix ``scitype`` ``coerce_to_list`` parameter, add test coverage (:pr:`5578`) :user:`fkiraly` + +Data types, checks, conversions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [BUG] Fix typos in mtype tags ``np.ndarray``, from erroneous ``nd.array`` (:pr:`5645`) :user:`yarnabrina` + +Forecasting +^^^^^^^^^^^ + +* [BUG] in ``ARCH``, fix ``str`` coercion of ``pd.Series`` name (:pr:`5407`) :user:`Vasudeva-bit` +* [BUG] in reduced regressor, copy or truncate ``X`` if it does not fit the forecasting horizon (:pr:`5542`) :user:`benHeid` +* [BUG] pass correct level argument from ``StatsForecastBackAdapter`` to ``statsforecast`` (:pr:`5587`) :user:`sd2k` +* [BUG] fix ``HierarchyEnsembleForecaster`` returned unexpected predictions if data had only one hierarchy level and forecasters specified by node (:pr:`5615`) :user:`VyomkeshVyas` +* [BUG] fix loss of time zone attribute in ``ForecastingHorizon.to_absolute`` (:pr:`5628`) :user:`fkiraly` +* [BUG] change index match to integer in ``_StatsModelsAdapter`` predict (:pr:`5642`) :user:`ciaran-g` + +Transformations +^^^^^^^^^^^^^^^ + +* [BUG] ``TsFreshFeatureExtractor`` - correct wrong forwarded parameter name ``profiling`` (:pr:`5600`) :user:`sssilvar` +* [BUG] Correct inference of ``TransformerPipeline`` output type tag (:pr:`5625`) :user:`fkiraly` + +Visualization +^^^^^^^^^^^^^ + +* [BUG] Fix multiple figures created by ``plot_windows`` (:pr:`5636`) :user:`benHeid` + + +Maintenance +~~~~~~~~~~~ + +* [MNT] CI Modifications (:pr:`5498`) :user:`yarnabrina` +* [MNT] rename variables in base (:pr:`5502`) :user:`yarnabrina` +* [MNT] addressing various ``pandas`` related deprecations (:pr:`5583`) :user:`fkiraly` +* [MNT] Update pre commit hooks (:pr:`5646`) :user:`yarnabrina` +* [MNT] [Dependabot](deps-dev): Update ``pytest-xdist`` requirement from ``<3.4,>=3.3`` to ``>=3.3,<3.5`` (:pr:`5551`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update ``dask`` requirement from ``<2023.7.1`` to ``<2023.11.1`` (:pr:`5552`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update ``dask`` requirement from ``<2023.11.1`` to ``<2023.12.2`` (:pr:`5629`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update ``holidays`` requirement from ``<0.36,>=0.29`` to ``>=0.29,<0.37`` (:pr:`5538`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update ``holidays`` requirement from ``<0.37,>=0.29`` to ``>=0.29,<0.38`` (:pr:`5565`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update ``holidays`` requirement from ``<0.38,>=0.29`` to ``>=0.29,<0.40`` (:pr:`5637`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update ``sphinx-gallery`` requirement from ``<0.15.0`` to ``<0.16.0`` (:pr:`5566`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update ``pytest-xdist`` requirement from ``<3.5,>=3.3`` to ``>=3.3,<3.6`` (:pr:`5567`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update ``pycatch22`` requirement from ``<0.4.4`` to ``<0.4.5`` (:pr:`5542`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps): Bump actions/download-artifact from 3 to 4 (:pr:`5627`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps): Bump actions/setup-python from 4 to 5 (:pr:`5605`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps): Bump actions/upload-artifact from 3 to 4 (:pr:`5626`) :user:`dependabot[bot]` + +Documentation +~~~~~~~~~~~~~ + +* [DOC] splitter full API reference page (:pr:`5577`) :user:`fkiraly` +* [DOC] Correct ReST syntax in "RocketClassifier" (:pr:`5564`) :user:`rahulporuri` +* [DOC] Added notebook accompanying Joanna Lenczuk's blog post for testing (:pr:`5604`) :user:`onyekaugochukwu`, :user:`joanlenczuk` +* [DOC] Remove extra parameter in docstring with incorrect definition (:pr:`5617`) :user:`wayneadams` +* [DOC] fix and complete ``YfromX`` docstring (:pr:`5593`) :user:`fkiraly` +* [DOC] fix typo in ``AA_datatypes_and_datasets.ipynb`` panel data loading example (:pr:`5594`) :user:`fkiraly` +* [DOC] forecasting ``evaluate`` utility - improved algorithm description in docstring #5603 (:pr:`5603`) :user:`adamkells` +* [DOC] add explanation about fit/transform instance linking behaviour of rocket transformers (:pr:`5621`) :user:`fkiraly` +* [DOC] Adjust ``FunctionTransformer``'s docstring (:pr:`5634`) :user:`tpvasconcelos` +* [DOC] fixed typo in ``pytest.mark.skipif`` (:pr:`5640`) :user:`yarnabrina` + +Contributors +~~~~~~~~~~~~ + +:user:`adamkells`, +:user:`aeyazadil`, +:user:`Alex-JG3`, +:user:`benHeid`, +:user:`ciaran-g`, +:user:`fkiraly`, +:user:`fspinna`, +:user:`joanlenczuk`, +:user:`NguyenChienFelix33`, +:user:`onyekaugochukwu`, +:user:`rahulporuri`, +:user:`sbuse`, +:user:`sd2k`, +:user:`sssilvar`, +:user:`tpvasconcelos`, +:user:`Vasudeva-bit`, +:user:`VyomkeshVyas`, +:user:`wayneadams`, +:user:`yarnabrina` + +Version 0.24.1 - 2023-11-05 +--------------------------- + +Highlights +~~~~~~~~~~ + +* ``torch`` adapter, LTSF forecasters - linear, D-linear, N-linear (:pr:`4891`, :pr:`5514`) :user:`luca-miniati` +* more period options in ``FourierFeatures``: ``pandas`` period alias and from offset column (:pr:`5513`) :user:`Ram0nB` +* ``iisignature`` backend option for ``SignatureTransformer`` (:pr:`5398`) :user:`sz85512678` +* ``TimeSeriesForestClassifier`` feature importance and optimized interval generation (:pr:`5338`) :user:`YHallouard` +* all stationarity tests from ``arch`` package available as estimators (:pr:`5439`) :user:`Vasudeva-bit` +* Hyperbolic sine transformation and its inverse, ``ScaledAsinhTransformer``, for soft input or output clipping (:pr:`5389`) :user:`ali-parizad` +* estimator serialization: user choice of ``serialization_format`` in ``save`` method and ``mlfow`` plugin, + support for ``cloudpickle`` (:pr:`5486`, :pr:`5526`) :user:`achieveordie` + +Dependency changes +~~~~~~~~~~~~~~~~~~ + +* ``holidays`` (transformations soft dependency) bounds have been updated to ``>=0.29,<0.36``. +* ``torch`` is now a managed soft dependency for neural networks (``dl`` test set) + +Core interface changes +~~~~~~~~~~~~~~~~~~~~~~ + +* if using ``scikit-base>=0.6.1``: ``set_params`` now recognizes unique ``__``-separated + suffixes as aliases for full parameter string, e.g., ``set_params(foo="bar")`` + instead of ``set_params(estimator__detrender__forecaster__supercalifragilistic__foo="bar")``. + This extends to use of parameter names in tuners, e.g., ``ForecastingGridSearchCV`` grids, + and estimators internally using ``set_params``. The behaviour of ``get_params`` is unchanged. +* ``sktime`` now supports ``cloudpickle`` for estimator serialization, with ``pickle`` + being the standard serialization backend. + To select the serialization backend, use the ``serialization_format`` parameter + of estimators' ``save`` method. + ``cloudpickle`` is already a soft dependency, therefore no dependency change is required. + +Enhancements +~~~~~~~~~~~~ + +BaseObject and base framework +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] test that ``set_params`` recognizes unique suffixes as aliases for full parameter string (:pr:`2931`) :user:`fkiraly` +* [ENH] estimator serialization: user choice of ``serialization_format``, support for ``cloudpickle`` (:pr:`5486`) :user:`achieveordie` + +Benchmarking, Metrics, Splitters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] in ``ExpandingGreedySplitter``, allow ``float`` ``step_size`` (:pr:`5329`) :user:`fkiraly` +* [ENH] Sensible default for ``BaseSplitter.get_n_splits`` (:pr:`5412`) :user:`fkiraly` + +Data sets and data loaders +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] Add tecator dataset for time series regression as ``sktime`` onboard dataset (:pr:`5428`) :user:`JonathanBechtel` + +Forecasting +^^^^^^^^^^^ + +* [ENH] ``LTSFLinearForecaster``, ``LTSFLinearNetwork``, ``BaseDeepNetworkPyTorch`` (:pr:`4891`) :user:`luca-miniati` +* [ENH] ``LTSFDLinearForecaster``, ``LTSFNLinearForecaster`` (:pr:`5514`) :user:`luca-miniati` +* [ENH] parallel backend selection for forecasting tuners (:pr:`5430`) :user:`fkiraly` +* [ENH] in ``NaiveForecaster``, add valid variance prediction for in-sample forecasts (:pr:`5499`) :user:`fkiraly` + +MLOps & Deployment +~~~~~~~~~~~~~~~~~~ + +* [ENH] in ``mlflow`` plugin, improve informativity of ``ModuleNotFoundError`` messages (:pr:`5487`) :user:`achieveordie` +* [ENH] Add support for DL estimator persistence in ``mlflow`` plugin (:pr:`5526`) :user:`achieveordie` + +Neural networks +^^^^^^^^^^^^^^^ + +* [ENH] ``pytorch`` adapter for neural networks (:pr:`4891`) :user:`luca-miniati` +* [ENH] add placeholder test suite for neural networks (:pr:`5511`) :user:`fkiraly` + +Parameter estimation and hypothesis testing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] Interface to stationarity tests from ``arch`` package (:pr:`5439`) :user:`Vasudeva-bit` + +Time series annotation +^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] Add unit tests for change point and segmentation plotting functions (:pr:`5509`) :user:`adamkells` + +Time series classification +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [ENH] ``TimeSeriesForestClassifier`` feature importance and optimized interval generation (:pr:`5338`) :user:`YHallouard` + +Transformations +^^^^^^^^^^^^^^^ + +* [ENH] Add Hyperbolic Sine transformation and its inverse (ScaledAsinhTransformer) (:pr:`5389`) :user:`ali-parizad` +* [ENH] ``iisignature`` backend option for ``SignatureTransformer`` (:pr:`5398`) :user:`sz85512678` +* [ENH] general inverse transform for ``MSTL`` transformer (:pr:`5457`) :user:`fkiraly` +* [ENH] more period options in ``FourierFeatures``: ``pandas`` period alias and from offset column (:pr:`5513`) :user:`Ram0nB` + +Maintenance +~~~~~~~~~~~ + +* [MNT] Auto format pyproject (:pr:`5425`) :user:`yarnabrina` +* [MNT] bound ``pycatch22<0.4.4`` due to breaking change in patch version (:pr:`5434`) :user:`fkiraly` +* [MNT] removed two recently added hooks (:pr:`5453`) :user:`yarnabrina` +* [MNT] xfail remote data loaders to silence sporadic failures (:pr:`5461`) :user:`fkiraly` +* [MNT] new CI workflow to test extras (:pr:`5375`) :user:`yarnabrina` +* [MNT] Split CI jobs per components with specific soft-dependencies (:pr:`5304`) :user:`yarnabrina` +* [MNT] Programmatically fix (all) typos (:pr:`5424`) :user:`kianmeng` +* [MNT] fix typos in ``base`` module (:pr:`5313`) :user:`yarnabrina` +* [MNT] fix typos in ``forecasting`` module (:pr:`5314`) :user:`yarnabrina` +* [MNT] added missing checkout steps (:pr:`5471`) :user:`yarnabrina` +* [MNT] adds code quality checks without outdated/deprecated Github actions (:pr:`5427`) :user:`yarnabrina` +* [MNT] revert PR #4681 (:pr:`5508`) :user:`yarnabrina` +* [MNT] address ``pandas`` constructor deprecation message from ``ExpandingGreedySplitter`` (:pr:`5500`) :user:`fkiraly` +* [MNT] address deprecation of ``pd.DataFrame.fillna`` with ``method`` arg (:pr:`5497`) :user:`fkiraly` +* [MNT] Dataset downloader testing workflow (:pr:`5437`) :user:`yarnabrina` +* [MNT] shorter names for CI workflow elements (:pr:`5470`) :user:`fkiraly` +* [MNT] skip ``load_solar`` in doctests (:pr:`5528`) :user:`fkiraly` +* [MNT] revert PR #4681 (:pr:`5508`) :user:`yarnabrina` +* [MNT] exclude downloads in "no soft dependencies" CI element (:pr:`5529`) :user:`fkiraly` + +* [MNT] [Dependabot](deps): Bump actions/setup-node from 3 to 4 (:pr:`5483`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update pytest-timeout requirement from <2.2,>=2.1 to >=2.1,<2.3 (:pr:`5482`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps): Bump tj-actions/changed-files from 39 to 40 (:pr:`5492`) :user:`dependabot[bot]` +* [MNT] [Dependabot](deps-dev): Update holidays requirement from <0.35,>=0.29 to >=0.29,<0.36 (:pr:`5443`) :user:`dependabot[bot]` + +Documentation +~~~~~~~~~~~~~ + +* [DOC] fixing docstring example for ``FhPlexForecaster`` (:pr:`4931`) :user:`fkiraly` +* [DOC] Programmatically fix (all) typos (:pr:`5424`) :user:`kianmeng` +* [DOC] comments for readability of ``pyproject.toml`` (:pr:`5472`) :user:`fkiraly` +* [DOC] streamlining API reference, fixing minor issues (:pr:`5466`) :user:`fkiraly` +* [DOC] Fix more typos (:pr:`5478`) :user:`szepeviktor` +* [DOC] update docstring of ``STLTransformer`` to correct statements on inverse and pipelines (:pr:`5455`) :user:`fkiraly` +* [DOC] improved docstrings for ``statsforecast`` estimators (:pr:`5409`) :user:`fkiraly` +* [DOC] add missing API reference entries for five deep learning classifiers (:pr:`5522`) :user:`fkiraly` +* [DOC] fixed docstrings for stationarity tests (:pr:`5531`) :user:`fkiraly` + +Fixes +~~~~~ + +BaseObject and base framework +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [BUG] fix error message in ``_check_python_version`` (:pr:`5473`) :user:`fkiraly` + +Benchmarking, Metrics, Splitters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [BUG] fix bug in deprecation logic of ``kwargs`` in ``evaluate`` that always set + backend to ``dask_lazy`` if deprecated ``kwargs`` are passed (:pr:`5469`) :user:`fkiraly` + +Forecasting +^^^^^^^^^^^ + +* [BUG] Fix ``pandas`` ``FutureWarning`` for silent upcasting (:pr:`5395`) :user:`tpvasconcelos` +* [BUG] fix predict function of ``make_reduction`` (recursive, global) to work with tz aware data (:pr:`5464`) :user:`ciaran-g` +* [BUG] in ``TransformedTargetForecaster``, ensure correct setting of ``ignores-exogenous-X`` tag if forecaster ignores ``X``, but at least one transformer uses ``y=X``, e.g., feature selector (:pr:`5521`) :user:`fkiraly` + +Parameter estimation and hypothesis testing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [BUG] fixed incorrect signs for some stationarity tests (:pr:`5531`) :user:`fkiraly` + +Time series annotation +^^^^^^^^^^^^^^^^^^^^^^ + +* [BUG] CLASP logic: remove indexes from exclusion zone that are out of range (:pr:`5459`) :user:`Alex-JG3` +* [BUG] in ``ClaSPSegmentation``, deal with ``k`` when it is too large for ``np.argpartition`` (:pr:`5490`) :user:`Alex-JG3` + +Time series classification +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* [BUG] fix missing epochs parameter in ``MCDCNNClassifier._fit`` (#4996) (:pr:`5422`) :user:`pseudomo` +* [BUG] add missing exports five deep learning classifiers (:pr:`5522`) :user:`fkiraly` + +Transformations +^^^^^^^^^^^^^^^ + +* [BUG] fix test excepts for ``SignatureTransformer`` (:pr:`5474`) :user:`fkiraly` + +Visualization +^^^^^^^^^^^^^ + +* [BUG] fix ``plot_series`` prediction interval plotting for 3 or less points in forecasting horizon (:pr:`5494`) :user:`fkiraly` + +Contributors +~~~~~~~~~~~~ + +:user:`achieveordie`, +:user:`adamkells`, +:user:`Alex-JG3`, +:user:`ali-parizad`, +:user:`ciaran-g`, +:user:`fkiraly`, +:user:`JonathanBechtel`, +:user:`kianmeng`, +:user:`luca-miniati`, +:user:`pseudomo`, +:user:`Ram0nB`, +:user:`sz85512678`, +:user:`szepeviktor`, +:user:`tpvasconcelos`, +:user:`Vasudeva-bit`, +:user:`yarnabrina`, +:user:`YHallouard` + + Version 0.24.0 - 2023-10-13 --------------------------- @@ -129,7 +1064,7 @@ Benchmarking, Metrics, Splitters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * time series splitters, i.e., descendants of ``BaseSplitter``, have moved from - ``sktime.forecasting.model_selection`` to ``sktime.`split``. + ``sktime.forecasting.model_selection`` to ``sktime.split``. The old location ``model_selection`` is deprecated and will be removed in 0.25.0. Until 0.25.0, it is still available but will raise an informative warning message. @@ -198,7 +1133,7 @@ Transformations * [ENH] add proper ``inverse_transform`` to ``STLTransformer`` (:pr:`5300`) :user:`fkiraly` * [ENH] ``joblib`` and ``dask`` backends in broadcasting of estimators in multivariate or hierarchical case - part 1, ``VectorizedDF.vectorize_est`` (:pr:`5267`) :user:`fkiraly` * [ENH] ``joblib`` and ``dask`` backends in broadcasting of estimators in multivariate or hierarchical case - part 2, base class config (:pr:`5301`) :user:`fkiraly` -* [ENH] Refactor of `DateTimeFeatures` tests to `pytest` fixtures (:pr:`5397`) :user:`adamkells` +* [ENH] Refactor of ``DateTimeFeatures`` tests to ``pytest`` fixtures (:pr:`5397`) :user:`adamkells` Testing framework ^^^^^^^^^^^^^^^^^ @@ -253,7 +1188,7 @@ Benchmarking, Metrics, Splitters Forecasting ^^^^^^^^^^^ -* [BUG] fix ``STLForecaster`` tag ``ignores-exogenous-X`` to be correctly set for composites (:pr:`5365`) :user:`yarnabrina` +* [BUG] fix ``STLForecaster`` tag ``ignores-exogeneous-X`` to be correctly set for composites (:pr:`5365`) :user:`yarnabrina` * [BUG] ``statsforecast 1.6.0`` compatibility - in ``statsforecast`` adapter, fixing ``RuntimeError: dictionary changed size during iteration`` (:pr:`5317`) :user:`arnaujc91` * [BUG] ``statsforecast 1.6.0`` compatibility - fix argument differences between ``sktime`` and ``statsforecast`` (:pr:`5393`) :user:`luca-miniati` * [BUG] Fix ``ARCH._check_predict_proba`` (:pr:`5384`) :user:`Vasudeva-bit` @@ -393,8 +1328,8 @@ Benchmarking, Metrics, Splitters * [ENH] speed up ``BaseSplitter`` boilerplate (:pr:`5063`) :user:`fkiraly` * [ENH] Allow unrestricted ID string for ``BaseBenchmarking`` (:pr:`5130`) :user:`hazrulakmal` -Data loaders -^^^^^^^^^^^^ +Data sets and data loaders +^^^^^^^^^^^^^^^^^^^^^^^^^^ * [ENH] set mirrors for time series classification data loaders (:pr:`5260`) :user:`fkiraly` @@ -670,8 +1605,8 @@ Benchmarking, Metrics, Splitters * [ENH] input checks for ``BaseBenchmark``, allow ``add_estimator`` to accept multiple estimators (:pr:`4877`) :user:`hazrulakmal` * [ENH] tests and fixes for ``numpy`` weights in performance metrics - probabilistic metrics (:pr:`5104`) :user:`fkiraly` -Data loaders -^^^^^^^^^^^^ +Data sets and data loaders +^^^^^^^^^^^^^^^^^^^^^^^^^^ * [ENH] rework data loader module, ability to specify download mirrors (:pr:`4985`) :user:`fkiraly` diff --git a/docs/source/conf.py b/docs/source/conf.py index 40e61719795..66c87cff0f5 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -4,7 +4,6 @@ import datetime import os import sys -from importlib import import_module import sktime @@ -325,33 +324,41 @@ def _process_author_info(author_info): Multiple author names will be separated by a comma, with the final name always preceded by "&". """ - if isinstance(author_info, list): - if len(author_info) > 1: - return ", ".join(author_info[:-1]) + " & " + author_info[-1] - else: - return author_info[0] + if isinstance(author_info, str) and author_info.lower() == "sktime developers": + link = ( + '<a href="https://www.sktime.net/en/stable/about/team.html">' + "sktime developers</a>" + ) + return link + + if not isinstance(author_info, list): + author_info = [author_info] + + def _add_link(github_id_str): + link = '<a href="https://www.github.com/{0}">{0}</a>'.format(github_id_str) + return link + + author_info = [_add_link(author) for author in author_info] + + if len(author_info) > 1: + return ", ".join(author_info[:-1]) + " & " + author_info[-1] else: - return author_info + return author_info[0] def _does_not_start_with_underscore(input_string): return not input_string.startswith("_") # creates dataframe as df - COLNAMES = ["Class Name", "Estimator Type", "Authors"] + COLNAMES = ["Class Name", "Estimator Type", "Authors", "Maintainers"] - df = pd.DataFrame([], columns=COLNAMES) + records = [] for modname, modclass in all_estimators(): - algorithm_type = "::".join(str(modclass).split(".")[1:-2]) - try: - author_info = _process_author_info(modclass.__author__) - except AttributeError: - try: - author_info = _process_author_info( - import_module(modclass.__module__).__author__ - ) - except AttributeError: - author_info = "no author info" + algorithm_type = modclass.get_class_tag("object_type", "object") + author_tag = modclass.get_class_tag("authors", "sktime developers") + author_info = _process_author_info(author_tag) + maintainer_tag = modclass.get_class_tag("maintainers", "sktime developers") + maintainer_info = _process_author_info(maintainer_tag) # includes part of class string modpath = str(modclass)[8:-2] @@ -368,8 +375,9 @@ def _does_not_start_with_underscore(input_string): + "</a>" ) - record = pd.DataFrame([modname, algorithm_type, author_info], index=COLNAMES).T - df = pd.concat([df, record], ignore_index=True) + records.append([modname, algorithm_type, author_info, maintainer_info]) + + df = pd.DataFrame(records, columns=COLNAMES) with open("estimator_overview_table.md", "w") as file: df.to_markdown(file, index=False) diff --git a/docs/source/developer_guide/dependencies.rst b/docs/source/developer_guide/dependencies.rst index 92f4f3b4ac2..c083398493a 100644 --- a/docs/source/developer_guide/dependencies.rst +++ b/docs/source/developer_guide/dependencies.rst @@ -52,11 +52,19 @@ Estimators with a soft dependency need to ensure the following: To do this, add a ``# doctest: +SKIP`` to the end of each line in the doctest to skip it entirely. See ``forecasting.arima.ARIMA`` as as an example. If concerned that skipping the test will reduce test coverage, consider exposing the doctest example as a pytest test function instead, see below how to handle soft dependencies in pytest functions. -* Decorate all ``pytest`` tests that import soft dependencies with a ``@pytest.mark.skipif(...)`` conditional on a check to ``_check_soft_dependencies`` - for your new soft dependency, with ``severity="none"``. Be sure that all soft dependencies imported for testing are imported within the test function itself, - rather than for the whole module! This decorator will then skip your test, including imports, unless the system has the required packages installed. - This prevents crashes for any users running ``check_estimator`` on all estimators, or a full local ``pytest`` run without the required soft dependency. - See the tests in ``forecasting.tests.test_pmdarima`` for a concrete example. +* Decorate all ``pytest`` tests that import soft dependencies with a ``@pytest.mark.skipif(...)`` conditional on a soft dependency check. + If the test is specific to a single estimator or object, use ``run_test_for_class`` from ``sktime.tests.test_switch`` + to mediate the condition through the class tags. + Otherwise, use ``_check_soft_dependencies`` for your new soft dependency, with ``severity="none"``. + Be sure that all soft dependencies imported for testing are imported within the test function itself, + rather than at root level (at the top) of the module. + This decorator will then skip your test, including imports, + unless the system has the required packages installed. + This prevents crashes for any users running ``check_estimator`` on all estimators, + or a full local ``pytest`` run without the required soft dependency. + See the tests in ``forecasting.tests.test_pmdarima`` for a concrete example of + ``run_test_for_class`` usage to decorate a test. See ``utils.tests.test_plotting`` + for an example of ``_check_soft_dependencies`` usage. Adding and maintaining soft dependencies ---------------------------------------- diff --git a/docs/source/developer_guide/deprecation.rst b/docs/source/developer_guide/deprecation.rst index 80f0ca94b55..a37a88f85be 100644 --- a/docs/source/developer_guide/deprecation.rst +++ b/docs/source/developer_guide/deprecation.rst @@ -83,6 +83,157 @@ Special deprecations This section outlines the deprecation process for some advanced cases. +Deprecating and change of parameters +------------------------------------ + +The following are common cases of deprecation or change around parameters +of functions or classes (e.g., estimators): + +* changing the default value of a parameter +* renaming a parameter +* adding a parameter with a default value that changes prior behaviour +* changing the sequence of parameters +* removing a parameter + +In all cases, it needs to be ensured that: + +* warnings are raised in cases where user logic would change +* the warning message includes a complete recipe for how to change the code, + to retain current behaviour, or change to alternative behaviour +* sufficient notice is given, i.e., the warning message is present for at least + one MINOR version cycle before the change is carried out +* "todo" comments are left for the release managers to carry out the change, + and optimally a merge-ready change branch/PR is provided, to be merged at the + scheduled version of change + +No such warning is necessary if no working user logic would change, this is the case if: + +* a parameter is added with a default value that retains prior behaviour, + at the end of the parameter list +* a parameter is removed where non-defaults would always raise unexpected exceptions + +Recipes for individual cases above follow. + +Changing the default value of a parameter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To change the default value of a parameter, follow steps 1-3 in the pull request +implementing the change. + +1. at current version, change the default value to ``"changing_value"``. + Internally, add logic that overrides the value of the parameter with the old default + value, if the parameter is set to ``"changing_value"``. If the parameter is an + ``__init__`` parameter of an estimator class, + the value cannot be directly overridden, but this needs to be done in a private + parameter copy, since all ``__init__`` parameters must be written + to ``self`` unchanged. I.e., write the parameter to ``self._<param_name>`` unchanged, + and add logic that overrides the value of ``self._<param_name>`` with the old default, + and ensure to use ``self._<param_name>`` in the rest of the code instead of + ``self.<param_name>``. +2. add a warning, using ``sktime.utils.warnings.warn``, if the parameter is called + with a non-default. This warning should always include the name of the estimator/function, + the version of change, and a clear instruction on how to change the code to retain + prior behaviour. E.g., ``"Parameter <param_name> of <estimator_name> will change + default value from <old_value> to <new_value> in sktime version <version_number>. + To retain prior behaviour, set <param_name> to <old_value> explicitly"``. +3. add a TODO comment to the code, to remove the warning and change the default value, + in the next MINOR version cycle. E.g., add the comment + ``# TODO <version_number>: change default of <param_name> to <new_value>, + update docstring, and remove warning``, + at the top of the function or class where the parameter is defined. +4. the release manager will carry out the TODO action in the next MINOR version cycle, + and remove the TODO comment. Optimally, a change branch is provided that the + release manager can merge, and its PR ID is mentioned in the todo. + +Renaming a parameter +~~~~~~~~~~~~~~~~~~~~ + +To rename a parameter, follow steps 1-6 in the pull request +implementing the change. + +1. at current version, add a parameter with the new name at the end of the + list of parameters, with the same default value as the old parameter. + Do not remove the old parameter. +2. change the value of the old parameter to the string ``"deprecated"``. + Change all code in the function or class that uses the old parameter to use + the new parameter instead. This can be done by a bulk-replace. +3. at the start of the function or class init, add logic that overrides the value + of the new parameter with the value of the old parameter, if the old parameter + is not ``"deprecated"``. If the parameter is an ``__init__`` parameter + of an estimator class, + the value cannot be directly overridden, but this needs to be done in a private + parameter, since all ``__init__`` parameters must be written to ``self`` unchanged. +4. add a warning, using ``sktime.utils.warnings.warn``, if the old parameter is called + with a non-default. This warning should always include the name of the estimator/function, + the version of change, and a clear instruction on how to change the code to retain + prior behaviour. E.g., ``"Parameter <param_name> of <estimator_name> will be renamed + from <old_name> to <new_name> in sktime version <version_number>. + To retain prior behaviour, use a kwargs call of <new_name> instead of <old_name>"``. +5. update the docstring of the function or class to refer only to the new parameter. +6. add a TODO comment to the code, to remove the warning and change the default value, + in the next MINOR version cycle. E.g., add the comment + ``# TODO <version_number>: change name of parameter <old_name> to <new_name>, + remove old parameter at the end, and remove warning``, + at the top of the function or class where the parameter is defined. +7. the release manager will carry out the TODO action in the next MINOR version cycle, + and remove the TODO comment. Optimally, a change branch is provided that the + release manager can merge, and its PR ID is mentioned in the todo. + +Adding a parameter with a default value that changes prior behaviour +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This should be done in two steps: + +* adding the parameter, but with a default value that retains prior behaviour. + As this preserves prior behaviour, no deprecation or change mechanism is necessary. +* then, follow the steps for changing the default value of a parameter, above. + +Changing the sequence of parameters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This type of change should be avoided, as it it is difficult to carry out. +If instead one of the above change patterns can be used, that is preferred. + +To change the sequence of parameters, follow steps 1-6 in the pull request +implementing the change. + +1. at current version, change the defaults of all parameters after and including + the first parameter to change position to ``"position_change"``. +2. Internally, add logic that overrides the value of the parameter with the old default + value, if the parameter is set to ``"position_change"``. + For ``__init__`` parameters of an estimator class, + the values cannot be directly overridden, but this needs to be done in a private + parameter copy, since all ``__init__`` parameters must be written + to ``self`` unchanged. I.e., write the parameter to ``self._<param_name>`` unchanged, + and add logic that overrides the value of ``self._<param_name>`` with the old default, + and ensure to use ``self._<param_name>`` in the rest of the code instead of + ``self.<param_name>``. +3. add a warning, using ``sktime.utils.warnings.warn``, if any of the position changing + paramters are called with a non-default. This warning should always include + the name of the estimator/function, the version of change, and a clear instruction + on how to change the code to retain prior behaviour. The instruction + should direct the user to use ``kwargs`` calls instead of positional calls, for + all parameters that change position. +4. add a TODO comment to the code, to remove the warning and change the sequence, + as well as changing default values to the old defaults, + in the next MINOR version cycle. + The TODO comment should contain complete lines of code. + Optimally, a change branch is provided that the + release manager can merge, and its PR ID is mentioned in the todo. + +Removing a parameter +~~~~~~~~~~~~~~~~~~~~ + +If the parameter is removed a position that is not at the end of the parameter list, +it should be first moved to the end o the parameter list. + +For removal of a parameter, follow the steps of "changing the default value", +with a different warning message, namely that the parameter will be removed. + +The error message should contain details on whether prior behaviour can be retained, +if yes in which cases, and if yes, how. + + Deprecating tags ---------------- @@ -108,3 +259,76 @@ When removing/renaming tags after the deprecation period, ensure to remove the removed tags from the dictionaries in ``TagAliaserMixin`` class. If no tags are deprecated anymore (e.g., all deprecated tags are removed/renamed), ensure to remove this class as a parent of ``BaseObject`` or ``BaseEstimator``. + +Example +======= +Here is a simple example template for renaming a parameter of an estimator while changing default value as well. + +Step 1: before any change +------------------------- + +.. code:: python + + class EstimatorName: + def __init__(self, old_parameter="old"): + self.old_parameter = old_parameter + + def fit(self, X, y): + # Fit the model using old_parameter + pass + + def predict(self, X): + # Predict using the fitted model + pass + +Step 2: during deprecation period +--------------------------------- + +.. code:: python + + from sktime.utils.warnings import warn + + + class EstimatorName: + def __init__(self, old_parameter=None, new_parameter="new"): + # TODO (release <MAJOR>.<MINOR>.0) + # remove the 'old_parameter' argument from '__init__' signature + # remove the following 'if' check + # de-indent the following 'else' check + if old_parameter is not None: + warn( + "'old_parameter' of `EstimatorName` is deprecated and will be removed" + " in the version '<MAJOR>.<MINOR>.0'. This has been renamed to " + " 'new_parameter', where you can pass 'old' to keep current behaviour." + " The new argument will use 'new' as its default value.", + category=DeprecationWarning, + obj=self, + ) + self.new_parameter = old_parameter + else: + self.new_parameter = new_parameter + + def fit(self, X, y): + # Fit the model using new_parameter + pass + + def predict(self, X): + # Predict using the fitted model + pass + +Step 3: after deprecation period +-------------------------------- + +.. code:: python + + class FinalEstimator: + def __init__(self, new_parameter="new"): + self.new_parameter = new_parameter + + def fit(self, X, y): + # Fit the model using new_parameter + pass + + def predict(self, X): + # Predict using the fitted model + pass diff --git a/docs/source/get_involved/governance.rst b/docs/source/get_involved/governance.rst index 153f8f411fb..d20d7e62713 100644 --- a/docs/source/get_involved/governance.rst +++ b/docs/source/get_involved/governance.rst @@ -160,11 +160,17 @@ Appointment The contributor who contributes an algorithm is automatically appointed as its first maintainer. -Algorithm maintainers are listed in the `CODEOWNERS <https://github -.com/alan-turing-institute/sktime/blob/main/CODEOWNERS>`__ file. +Algorithm maintainers are listed in the ``"maintainers"`` tag of the estimator class, +by their GitHub ID. The GitHub ID can be linked to further information via +the ``all-contributorsrc`` file. +The tag can be inspected directly in the source code of the class, +or via ``EstimatorName.get_class_tag("maintainers").`` +Inverse lookup such as "which algorithms does maintainer M maintain" +can be carried out using ``registry.all_estimators``. When an algorithm maintainer resigns, they can appoint another contributor as the -new algorithm maintainer. No vote is required. This change should be reflected in the ``CODEOWNERS`` file. +new algorithm maintainer. No vote is required. +This change should be reflected in the ``"maintainers"`` tag. Algorithm maintainers can be appointed by CC simple majority for any algorithm without maintainers. @@ -664,18 +670,19 @@ We have the following guidelines: will have to be extended first. For extending current API, see the `decision making process <#Decision-making>`__ for major changes. -Note that your implementation need not be in sktime to be used together -with sktime tools. You can implement your favorite algorithm in a sktime -compatible way in one of `our companion -repositories <https://github.com/sktime>`__ on GitHub. We will be happy -to list it under `related +Note that an algorithm need not be in sktime to be fully compatible with +sktime interfaces. You can implement your favorite algorithm in a sktime +compatible way in a third party codebase - open or closed - following +the guide for implmenting compatible estimators (see :ref:`developer_guide_add_estimators:`). + +We are happy to list any compatible open source project under `related software <https://github.com/sktime/sktime/wiki/related-software>`__. +Contributions are also welcome to any one of `our companion +repositories <https://github.com/sktime>`__ on GitHub. -If algorithms require major dependencies, we encourage to create a -separate companion repository. For smaller -dependencies which are limited to a few files, we encourage to use soft -dependencies, which are only required for particular modules, but not -for most of sktime's functionality and not for installing sktime. +Dependencies are managed on the level of estimators, hence it is entirely possible +to maintain an algorithm primarily in a third or second party package, and add a +thin interface to sktime proper which has that package as a dependency. .. _acknowledging-contributions: diff --git a/docs/source/get_started.rst b/docs/source/get_started.rst index cce05ec1370..5295cf59476 100644 --- a/docs/source/get_started.rst +++ b/docs/source/get_started.rst @@ -75,9 +75,9 @@ Forecasting >>> from sktime.datasets import load_airline >>> from sktime.forecasting.base import ForecastingHorizon - >>> from sktime.forecasting.model_selection import temporal_train_test_split >>> from sktime.forecasting.theta import ThetaForecaster >>> from sktime.performance_metrics.forecasting import mean_absolute_percentage_error + >>> from sktime.split import temporal_train_test_split >>> y = load_airline() >>> y_train, y_test = temporal_train_test_split(y) diff --git a/docs/source/installation.rst b/docs/source/installation.rst index f5340428f33..5b7a1403813 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -10,8 +10,6 @@ Installation See here for a `full list of precompiled wheels available on PyPI <https://pypi.org/simple/sktime/>`_. -We appreciate community contributions towards compatibility with python 3.10, or other operating systems. - .. contents:: :local: diff --git a/docs/source/user_guide/introduction.rst b/docs/source/user_guide/introduction.rst index 672dca8a61a..70f6c7fd88b 100644 --- a/docs/source/user_guide/introduction.rst +++ b/docs/source/user_guide/introduction.rst @@ -31,8 +31,8 @@ can simply write: from sktime.datasets import load_airline from sktime.forecasting.compose import make_reduction from sklearn.ensemble import RandomForestRegressor - from sktime.forecasting.model_selection import temporal_train_test_split from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError + from sktime.split import temporal_train_test_split y = load_airline() y_train, y_test = temporal_train_test_split(y) diff --git a/examples/01b_forecasting_proba.ipynb b/examples/01b_forecasting_proba.ipynb index 9f5b00dd08f..ec5ff971cb3 100644 --- a/examples/01b_forecasting_proba.ipynb +++ b/examples/01b_forecasting_proba.ipynb @@ -102,12 +102,12 @@ " </tr>\n", " <tr>\n", " <th>1961-02</th>\n", - " <td>344.853206</td>\n", + " <td>344.853205</td>\n", " <td>497.712761</td>\n", " </tr>\n", " <tr>\n", " <th>1961-03</th>\n", - " <td>324.223996</td>\n", + " <td>324.223995</td>\n", " <td>508.191104</td>\n", " </tr>\n", " </tbody>\n", @@ -119,8 +119,8 @@ " 0.9 \n", " lower upper\n", "1961-01 371.535093 481.554608\n", - "1961-02 344.853206 497.712761\n", - "1961-03 324.223996 508.191104" + "1961-02 344.853205 497.712761\n", + "1961-03 324.223995 508.191104" ] }, "execution_count": 2, @@ -206,243 +206,257 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", + " <td>ARCH</td>\n", + " <td><class 'sktime.forecasting.arch._uarch.ARCH'></td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", " <td>ARIMA</td>\n", " <td><class 'sktime.forecasting.arima.ARIMA'></td>\n", " </tr>\n", " <tr>\n", - " <th>1</th>\n", + " <th>2</th>\n", " <td>AutoARIMA</td>\n", " <td><class 'sktime.forecasting.arima.AutoARIMA'></td>\n", " </tr>\n", " <tr>\n", - " <th>2</th>\n", + " <th>3</th>\n", " <td>AutoETS</td>\n", " <td><class 'sktime.forecasting.ets.AutoETS'></td>\n", " </tr>\n", " <tr>\n", - " <th>3</th>\n", + " <th>4</th>\n", " <td>BATS</td>\n", " <td><class 'sktime.forecasting.bats.BATS'></td>\n", " </tr>\n", " <tr>\n", - " <th>4</th>\n", + " <th>5</th>\n", " <td>BaggingForecaster</td>\n", " <td><class 'sktime.forecasting.compose._bagging.Ba...</td>\n", " </tr>\n", " <tr>\n", - " <th>5</th>\n", + " <th>6</th>\n", " <td>ColumnEnsembleForecaster</td>\n", " <td><class 'sktime.forecasting.compose._column_ens...</td>\n", " </tr>\n", " <tr>\n", - " <th>6</th>\n", + " <th>7</th>\n", " <td>ConformalIntervals</td>\n", " <td><class 'sktime.forecasting.conformal.Conformal...</td>\n", " </tr>\n", " <tr>\n", - " <th>7</th>\n", + " <th>8</th>\n", " <td>DynamicFactor</td>\n", " <td><class 'sktime.forecasting.dynamic_factor.Dyna...</td>\n", " </tr>\n", " <tr>\n", - " <th>8</th>\n", + " <th>9</th>\n", " <td>FhPlexForecaster</td>\n", " <td><class 'sktime.forecasting.compose._fhplex.FhP...</td>\n", " </tr>\n", " <tr>\n", - " <th>9</th>\n", + " <th>10</th>\n", " <td>ForecastX</td>\n", " <td><class 'sktime.forecasting.compose._pipeline.F...</td>\n", " </tr>\n", " <tr>\n", - " <th>10</th>\n", + " <th>11</th>\n", " <td>ForecastingGridSearchCV</td>\n", " <td><class 'sktime.forecasting.model_selection._tu...</td>\n", " </tr>\n", " <tr>\n", - " <th>11</th>\n", + " <th>12</th>\n", " <td>ForecastingPipeline</td>\n", " <td><class 'sktime.forecasting.compose._pipeline.F...</td>\n", " </tr>\n", " <tr>\n", - " <th>12</th>\n", + " <th>13</th>\n", " <td>ForecastingRandomizedSearchCV</td>\n", " <td><class 'sktime.forecasting.model_selection._tu...</td>\n", " </tr>\n", " <tr>\n", - " <th>13</th>\n", + " <th>14</th>\n", " <td>ForecastingSkoptSearchCV</td>\n", " <td><class 'sktime.forecasting.model_selection._tu...</td>\n", " </tr>\n", " <tr>\n", - " <th>14</th>\n", + " <th>15</th>\n", " <td>NaiveForecaster</td>\n", " <td><class 'sktime.forecasting.naive.NaiveForecast...</td>\n", " </tr>\n", " <tr>\n", - " <th>15</th>\n", + " <th>16</th>\n", " <td>NaiveVariance</td>\n", " <td><class 'sktime.forecasting.naive.NaiveVariance'></td>\n", " </tr>\n", " <tr>\n", - " <th>16</th>\n", + " <th>17</th>\n", " <td>Permute</td>\n", " <td><class 'sktime.forecasting.compose._pipeline.P...</td>\n", " </tr>\n", " <tr>\n", - " <th>17</th>\n", + " <th>18</th>\n", " <td>Prophet</td>\n", " <td><class 'sktime.forecasting.fbprophet.Prophet'></td>\n", " </tr>\n", " <tr>\n", - " <th>18</th>\n", + " <th>19</th>\n", " <td>SARIMAX</td>\n", " <td><class 'sktime.forecasting.sarimax.SARIMAX'></td>\n", " </tr>\n", " <tr>\n", - " <th>19</th>\n", + " <th>20</th>\n", " <td>SquaringResiduals</td>\n", " <td><class 'sktime.forecasting.squaring_residuals....</td>\n", " </tr>\n", " <tr>\n", - " <th>20</th>\n", + " <th>21</th>\n", " <td>StatsForecastARCH</td>\n", - " <td><class 'sktime.forecasting.arch.StatsForecastA...</td>\n", + " <td><class 'sktime.forecasting.arch._statsforecast...</td>\n", " </tr>\n", " <tr>\n", - " <th>21</th>\n", + " <th>22</th>\n", " <td>StatsForecastAutoARIMA</td>\n", " <td><class 'sktime.forecasting.statsforecast.Stats...</td>\n", " </tr>\n", " <tr>\n", - " <th>22</th>\n", + " <th>23</th>\n", " <td>StatsForecastAutoCES</td>\n", " <td><class 'sktime.forecasting.statsforecast.Stats...</td>\n", " </tr>\n", " <tr>\n", - " <th>23</th>\n", + " <th>24</th>\n", " <td>StatsForecastAutoETS</td>\n", " <td><class 'sktime.forecasting.statsforecast.Stats...</td>\n", " </tr>\n", " <tr>\n", - " <th>24</th>\n", + " <th>25</th>\n", " <td>StatsForecastAutoTheta</td>\n", " <td><class 'sktime.forecasting.statsforecast.Stats...</td>\n", " </tr>\n", " <tr>\n", - " <th>25</th>\n", + " <th>26</th>\n", " <td>StatsForecastGARCH</td>\n", - " <td><class 'sktime.forecasting.arch.StatsForecastG...</td>\n", + " <td><class 'sktime.forecasting.arch._statsforecast...</td>\n", " </tr>\n", " <tr>\n", - " <th>26</th>\n", + " <th>27</th>\n", " <td>StatsForecastMSTL</td>\n", " <td><class 'sktime.forecasting.statsforecast.Stats...</td>\n", " </tr>\n", " <tr>\n", - " <th>27</th>\n", + " <th>28</th>\n", " <td>TBATS</td>\n", " <td><class 'sktime.forecasting.tbats.TBATS'></td>\n", " </tr>\n", " <tr>\n", - " <th>28</th>\n", + " <th>29</th>\n", " <td>ThetaForecaster</td>\n", " <td><class 'sktime.forecasting.theta.ThetaForecast...</td>\n", " </tr>\n", " <tr>\n", - " <th>29</th>\n", + " <th>30</th>\n", " <td>TransformedTargetForecaster</td>\n", " <td><class 'sktime.forecasting.compose._pipeline.T...</td>\n", " </tr>\n", " <tr>\n", - " <th>30</th>\n", + " <th>31</th>\n", " <td>UnobservedComponents</td>\n", " <td><class 'sktime.forecasting.structural.Unobserv...</td>\n", " </tr>\n", " <tr>\n", - " <th>31</th>\n", + " <th>32</th>\n", " <td>VAR</td>\n", " <td><class 'sktime.forecasting.var.VAR'></td>\n", " </tr>\n", " <tr>\n", - " <th>32</th>\n", + " <th>33</th>\n", " <td>VECM</td>\n", " <td><class 'sktime.forecasting.vecm.VECM'></td>\n", " </tr>\n", + " <tr>\n", + " <th>34</th>\n", + " <td>YfromX</td>\n", + " <td><class 'sktime.forecasting.compose._reduce.Yfr...</td>\n", + " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " name \\\n", - "0 ARIMA \n", - "1 AutoARIMA \n", - "2 AutoETS \n", - "3 BATS \n", - "4 BaggingForecaster \n", - "5 ColumnEnsembleForecaster \n", - "6 ConformalIntervals \n", - "7 DynamicFactor \n", - "8 FhPlexForecaster \n", - "9 ForecastX \n", - "10 ForecastingGridSearchCV \n", - "11 ForecastingPipeline \n", - "12 ForecastingRandomizedSearchCV \n", - "13 ForecastingSkoptSearchCV \n", - "14 NaiveForecaster \n", - "15 NaiveVariance \n", - "16 Permute \n", - "17 Prophet \n", - "18 SARIMAX \n", - "19 SquaringResiduals \n", - "20 StatsForecastARCH \n", - "21 StatsForecastAutoARIMA \n", - "22 StatsForecastAutoCES \n", - "23 StatsForecastAutoETS \n", - "24 StatsForecastAutoTheta \n", - "25 StatsForecastGARCH \n", - "26 StatsForecastMSTL \n", - "27 TBATS \n", - "28 ThetaForecaster \n", - "29 TransformedTargetForecaster \n", - "30 UnobservedComponents \n", - "31 VAR \n", - "32 VECM \n", + "0 ARCH \n", + "1 ARIMA \n", + "2 AutoARIMA \n", + "3 AutoETS \n", + "4 BATS \n", + "5 BaggingForecaster \n", + "6 ColumnEnsembleForecaster \n", + "7 ConformalIntervals \n", + "8 DynamicFactor \n", + "9 FhPlexForecaster \n", + "10 ForecastX \n", + "11 ForecastingGridSearchCV \n", + "12 ForecastingPipeline \n", + "13 ForecastingRandomizedSearchCV \n", + "14 ForecastingSkoptSearchCV \n", + "15 NaiveForecaster \n", + "16 NaiveVariance \n", + "17 Permute \n", + "18 Prophet \n", + "19 SARIMAX \n", + "20 SquaringResiduals \n", + "21 StatsForecastARCH \n", + "22 StatsForecastAutoARIMA \n", + "23 StatsForecastAutoCES \n", + "24 StatsForecastAutoETS \n", + "25 StatsForecastAutoTheta \n", + "26 StatsForecastGARCH \n", + "27 StatsForecastMSTL \n", + "28 TBATS \n", + "29 ThetaForecaster \n", + "30 TransformedTargetForecaster \n", + "31 UnobservedComponents \n", + "32 VAR \n", + "33 VECM \n", + "34 YfromX \n", "\n", " object \n", - "0 <class 'sktime.forecasting.arima.ARIMA'> \n", - "1 <class 'sktime.forecasting.arima.AutoARIMA'> \n", - "2 <class 'sktime.forecasting.ets.AutoETS'> \n", - "3 <class 'sktime.forecasting.bats.BATS'> \n", - "4 <class 'sktime.forecasting.compose._bagging.Ba... \n", - "5 <class 'sktime.forecasting.compose._column_ens... \n", - "6 <class 'sktime.forecasting.conformal.Conformal... \n", - "7 <class 'sktime.forecasting.dynamic_factor.Dyna... \n", - "8 <class 'sktime.forecasting.compose._fhplex.FhP... \n", - "9 <class 'sktime.forecasting.compose._pipeline.F... \n", - "10 <class 'sktime.forecasting.model_selection._tu... \n", - "11 <class 'sktime.forecasting.compose._pipeline.F... \n", - "12 <class 'sktime.forecasting.model_selection._tu... \n", + "0 <class 'sktime.forecasting.arch._uarch.ARCH'> \n", + "1 <class 'sktime.forecasting.arima.ARIMA'> \n", + "2 <class 'sktime.forecasting.arima.AutoARIMA'> \n", + "3 <class 'sktime.forecasting.ets.AutoETS'> \n", + "4 <class 'sktime.forecasting.bats.BATS'> \n", + "5 <class 'sktime.forecasting.compose._bagging.Ba... \n", + "6 <class 'sktime.forecasting.compose._column_ens... \n", + "7 <class 'sktime.forecasting.conformal.Conformal... \n", + "8 <class 'sktime.forecasting.dynamic_factor.Dyna... \n", + "9 <class 'sktime.forecasting.compose._fhplex.FhP... \n", + "10 <class 'sktime.forecasting.compose._pipeline.F... \n", + "11 <class 'sktime.forecasting.model_selection._tu... \n", + "12 <class 'sktime.forecasting.compose._pipeline.F... \n", "13 <class 'sktime.forecasting.model_selection._tu... \n", - "14 <class 'sktime.forecasting.naive.NaiveForecast... \n", - "15 <class 'sktime.forecasting.naive.NaiveVariance'> \n", - "16 <class 'sktime.forecasting.compose._pipeline.P... \n", - "17 <class 'sktime.forecasting.fbprophet.Prophet'> \n", - "18 <class 'sktime.forecasting.sarimax.SARIMAX'> \n", - "19 <class 'sktime.forecasting.squaring_residuals.... \n", - "20 <class 'sktime.forecasting.arch.StatsForecastA... \n", - "21 <class 'sktime.forecasting.statsforecast.Stats... \n", + "14 <class 'sktime.forecasting.model_selection._tu... \n", + "15 <class 'sktime.forecasting.naive.NaiveForecast... \n", + "16 <class 'sktime.forecasting.naive.NaiveVariance'> \n", + "17 <class 'sktime.forecasting.compose._pipeline.P... \n", + "18 <class 'sktime.forecasting.fbprophet.Prophet'> \n", + "19 <class 'sktime.forecasting.sarimax.SARIMAX'> \n", + "20 <class 'sktime.forecasting.squaring_residuals.... \n", + "21 <class 'sktime.forecasting.arch._statsforecast... \n", "22 <class 'sktime.forecasting.statsforecast.Stats... \n", "23 <class 'sktime.forecasting.statsforecast.Stats... \n", "24 <class 'sktime.forecasting.statsforecast.Stats... \n", - "25 <class 'sktime.forecasting.arch.StatsForecastG... \n", - "26 <class 'sktime.forecasting.statsforecast.Stats... \n", - "27 <class 'sktime.forecasting.tbats.TBATS'> \n", - "28 <class 'sktime.forecasting.theta.ThetaForecast... \n", - "29 <class 'sktime.forecasting.compose._pipeline.T... \n", - "30 <class 'sktime.forecasting.structural.Unobserv... \n", - "31 <class 'sktime.forecasting.var.VAR'> \n", - "32 <class 'sktime.forecasting.vecm.VECM'> " + "25 <class 'sktime.forecasting.statsforecast.Stats... \n", + "26 <class 'sktime.forecasting.arch._statsforecast... \n", + "27 <class 'sktime.forecasting.statsforecast.Stats... \n", + "28 <class 'sktime.forecasting.tbats.TBATS'> \n", + "29 <class 'sktime.forecasting.theta.ThetaForecast... \n", + "30 <class 'sktime.forecasting.compose._pipeline.T... \n", + "31 <class 'sktime.forecasting.structural.Unobserv... \n", + "32 <class 'sktime.forecasting.var.VAR'> \n", + "33 <class 'sktime.forecasting.vecm.VECM'> \n", + "34 <class 'sktime.forecasting.compose._reduce.Yfr... " ] }, "execution_count": 3, @@ -609,7 +623,7 @@ { "data": { "text/html": [ - "<style>#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e {color: black;background-color: white;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e pre{padding: 0;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-toggleable {background-color: white;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e label.sk-toggleable__label-arrow:before {content: \"βΈ\";float: left;margin-right: 0.25em;color: #696969;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"βΎ\";}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-estimator:hover {background-color: #d4ebff;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-item {z-index: 1;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-parallel-item:only-child::after {width: 0;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e div.sk-text-repr-fallback {display: none;}</style><div id='sk-628dc63f-6155-4eae-8eb6-2bdc0dc4437e' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ThetaForecaster(sp=12)</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('8ad53796-209d-4b30-87f5-e9980e415ced') type=\"checkbox\" checked><label for=UUID('8ad53796-209d-4b30-87f5-e9980e415ced') class='sk-toggleable__label sk-toggleable__label-arrow'>ThetaForecaster</label><div class=\"sk-toggleable__content\"><pre>ThetaForecaster(sp=12)</pre></div></div></div></div></div>" + "<style>#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff {color: black;background-color: white;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff pre{padding: 0;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-toggleable {background-color: white;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff label.sk-toggleable__label-arrow:before {content: \"βΈ\";float: left;margin-right: 0.25em;color: #696969;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"βΎ\";}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-estimator:hover {background-color: #d4ebff;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-item {z-index: 1;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-parallel-item:only-child::after {width: 0;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff div.sk-text-repr-fallback {display: none;}</style><div id='sk-9b5446bb-0900-46a6-988d-1aa88ae0e4ff' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ThetaForecaster(sp=12)</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('119f3171-5281-4480-893d-ca664488469f') type=\"checkbox\" checked><label for=UUID('119f3171-5281-4480-893d-ca664488469f') class='sk-toggleable__label sk-toggleable__label-arrow'>ThetaForecaster</label><div class=\"sk-toggleable__content\"><pre>ThetaForecaster(sp=12)</pre></div></div></div></div></div>" ], "text/plain": [ "ThetaForecaster(sp=12)" @@ -745,7 +759,7 @@ " </tr>\n", " <tr>\n", " <th>1961-09</th>\n", - " <td>477.658056</td>\n", + " <td>477.658055</td>\n", " <td>573.047752</td>\n", " </tr>\n", " <tr>\n", @@ -756,7 +770,7 @@ " <tr>\n", " <th>1961-11</th>\n", " <td>346.942924</td>\n", - " <td>451.082016</td>\n", + " <td>451.082017</td>\n", " </tr>\n", " <tr>\n", " <th>1961-12</th>\n", @@ -779,9 +793,9 @@ "1961-06 506.585814 587.087737\n", "1961-07 561.496768 647.248956\n", "1961-08 557.363322 648.062363\n", - "1961-09 477.658056 573.047752\n", + "1961-09 477.658055 573.047752\n", "1961-10 407.915090 507.775355\n", - "1961-11 346.942924 451.082016\n", + "1961-11 346.942924 451.082017\n", "1961-12 394.708221 502.957142" ] }, @@ -812,7 +826,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "<Figure size 1600x400 with 1 Axes>" ] @@ -964,7 +978,7 @@ " <th>1961-09</th>\n", " <td>505.795123</td>\n", " <td>544.910684</td>\n", - " <td>477.658056</td>\n", + " <td>477.658055</td>\n", " <td>573.047752</td>\n", " <td>468.520987</td>\n", " <td>582.184821</td>\n", @@ -983,9 +997,9 @@ " <td>377.660798</td>\n", " <td>420.364142</td>\n", " <td>346.942924</td>\n", - " <td>451.082016</td>\n", + " <td>451.082017</td>\n", " <td>336.967779</td>\n", - " <td>461.057161</td>\n", + " <td>461.057162</td>\n", " </tr>\n", " <tr>\n", " <th>1961-12</th>\n", @@ -993,7 +1007,7 @@ " <td>471.026993</td>\n", " <td>394.708221</td>\n", " <td>502.957142</td>\n", - " <td>384.339409</td>\n", + " <td>384.339408</td>\n", " <td>513.325954</td>\n", " </tr>\n", " </tbody>\n", @@ -1012,9 +1026,9 @@ "1961-06 530.331440 563.342111 506.585814 587.087737 \n", "1961-07 586.791063 621.954661 561.496768 647.248956 \n", "1961-08 584.116789 621.308897 557.363322 648.062363 \n", - "1961-09 505.795123 544.910684 477.658056 573.047752 \n", + "1961-09 505.795123 544.910684 477.658055 573.047752 \n", "1961-10 437.370840 478.319605 407.915090 507.775355 \n", - "1961-11 377.660798 420.364142 346.942924 451.082016 \n", + "1961-11 377.660798 420.364142 346.942924 451.082017 \n", "1961-12 426.638370 471.026993 394.708221 502.957142 \n", "\n", " \n", @@ -1030,8 +1044,8 @@ "1961-08 548.675556 656.750129 \n", "1961-09 468.520987 582.184821 \n", "1961-10 398.349800 517.340645 \n", - "1961-11 336.967779 461.057161 \n", - "1961-12 384.339409 513.325954 " + "1961-11 336.967779 461.057162 \n", + "1961-12 384.339408 513.325954 " ] }, "execution_count": 7, @@ -1153,7 +1167,7 @@ " </tr>\n", " <tr>\n", " <th>1961-06</th>\n", - " <td>515.476124</td>\n", + " <td>515.476123</td>\n", " <td>530.331440</td>\n", " <td>546.836776</td>\n", " <td>563.342111</td>\n", @@ -1161,7 +1175,7 @@ " </tr>\n", " <tr>\n", " <th>1961-07</th>\n", - " <td>570.966896</td>\n", + " <td>570.966895</td>\n", " <td>586.791063</td>\n", " <td>604.372862</td>\n", " <td>621.954661</td>\n", @@ -1219,8 +1233,8 @@ "1961-03 466.829089 478.296822 491.038306 503.779790 \n", "1961-04 450.188398 462.886144 476.994261 491.102379 \n", "1961-05 451.794965 465.613670 480.967253 496.320837 \n", - "1961-06 515.476124 530.331440 546.836776 563.342111 \n", - "1961-07 570.966896 586.791063 604.372862 621.954661 \n", + "1961-06 515.476123 530.331440 546.836776 563.342111 \n", + "1961-07 570.966895 586.791063 604.372862 621.954661 \n", "1961-08 567.379760 584.116789 602.712843 621.308897 \n", "1961-09 488.192511 505.795123 525.352904 544.910684 \n", "1961-10 418.943257 437.370840 457.845222 478.319605 \n", @@ -1270,7 +1284,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "<Figure size 1600x400 with 1 Axes>" ] @@ -1348,47 +1362,47 @@ " </tr>\n", " <tr>\n", " <th>1961-02</th>\n", - " <td>276.196509</td>\n", + " <td>276.196510</td>\n", " </tr>\n", " <tr>\n", " <th>1961-03</th>\n", - " <td>356.852968</td>\n", + " <td>356.852970</td>\n", " </tr>\n", " <tr>\n", " <th>1961-04</th>\n", - " <td>437.509428</td>\n", + " <td>437.509430</td>\n", " </tr>\n", " <tr>\n", " <th>1961-05</th>\n", - " <td>518.165887</td>\n", + " <td>518.165890</td>\n", " </tr>\n", " <tr>\n", " <th>1961-06</th>\n", - " <td>598.822347</td>\n", + " <td>598.822350</td>\n", " </tr>\n", " <tr>\n", " <th>1961-07</th>\n", - " <td>679.478807</td>\n", + " <td>679.478810</td>\n", " </tr>\n", " <tr>\n", " <th>1961-08</th>\n", - " <td>760.135266</td>\n", + " <td>760.135270</td>\n", " </tr>\n", " <tr>\n", " <th>1961-09</th>\n", - " <td>840.791726</td>\n", + " <td>840.791730</td>\n", " </tr>\n", " <tr>\n", " <th>1961-10</th>\n", - " <td>921.448185</td>\n", + " <td>921.448190</td>\n", " </tr>\n", " <tr>\n", " <th>1961-11</th>\n", - " <td>1002.104645</td>\n", + " <td>1002.104650</td>\n", " </tr>\n", " <tr>\n", " <th>1961-12</th>\n", - " <td>1082.761105</td>\n", + " <td>1082.761110</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -1397,17 +1411,17 @@ "text/plain": [ " Number of airline passengers\n", "1961-01 195.540049\n", - "1961-02 276.196509\n", - "1961-03 356.852968\n", - "1961-04 437.509428\n", - "1961-05 518.165887\n", - "1961-06 598.822347\n", - "1961-07 679.478807\n", - "1961-08 760.135266\n", - "1961-09 840.791726\n", - "1961-10 921.448185\n", - "1961-11 1002.104645\n", - "1961-12 1082.761105" + "1961-02 276.196510\n", + "1961-03 356.852970\n", + "1961-04 437.509430\n", + "1961-05 518.165890\n", + "1961-06 598.822350\n", + "1961-07 679.478810\n", + "1961-08 760.135270\n", + "1961-09 840.791730\n", + "1961-10 921.448190\n", + "1961-11 1002.104650\n", + "1961-12 1082.761110" ] }, "execution_count": 10, @@ -1476,183 +1490,183 @@ " <tbody>\n", " <tr>\n", " <th>1961-01</th>\n", - " <td>292.337338</td>\n", - " <td>255.743002</td>\n", - " <td>264.805454</td>\n", - " <td>227.703065</td>\n", - " <td>146.093860</td>\n", - " <td>154.452835</td>\n", - " <td>157.976801</td>\n", - " <td>105.160769</td>\n", - " <td>78.330257</td>\n", - " <td>81.835803</td>\n", + " <td>292.337333</td>\n", + " <td>255.742991</td>\n", + " <td>264.805437</td>\n", + " <td>227.703049</td>\n", + " <td>146.093848</td>\n", + " <td>154.452828</td>\n", + " <td>157.976795</td>\n", + " <td>105.160767</td>\n", + " <td>78.330263</td>\n", + " <td>81.835807</td>\n", " <td>78.048880</td>\n", - " <td>197.364513</td>\n", + " <td>197.364510</td>\n", " </tr>\n", " <tr>\n", " <th>1961-02</th>\n", - " <td>255.743002</td>\n", - " <td>422.704619</td>\n", - " <td>402.539279</td>\n", - " <td>353.437066</td>\n", - " <td>291.205423</td>\n", - " <td>236.587887</td>\n", - " <td>227.199386</td>\n", - " <td>205.653016</td>\n", - " <td>152.067422</td>\n", - " <td>121.629136</td>\n", - " <td>156.199113</td>\n", - " <td>245.437913</td>\n", + " <td>255.742991</td>\n", + " <td>422.704601</td>\n", + " <td>402.539255</td>\n", + " <td>353.437043</td>\n", + " <td>291.205404</td>\n", + " <td>236.587874</td>\n", + " <td>227.199374</td>\n", + " <td>205.653010</td>\n", + " <td>152.067425</td>\n", + " <td>121.629138</td>\n", + " <td>156.199110</td>\n", + " <td>245.437907</td>\n", " </tr>\n", " <tr>\n", " <th>1961-03</th>\n", - " <td>264.805454</td>\n", - " <td>402.539279</td>\n", - " <td>588.085358</td>\n", - " <td>506.095484</td>\n", - " <td>426.997535</td>\n", - " <td>394.503941</td>\n", - " <td>311.457854</td>\n", - " <td>282.072157</td>\n", + " <td>264.805437</td>\n", + " <td>402.539255</td>\n", + " <td>588.085328</td>\n", + " <td>506.095455</td>\n", + " <td>426.997512</td>\n", + " <td>394.503923</td>\n", + " <td>311.457837</td>\n", + " <td>282.072145</td>\n", " <td>243.688600</td>\n", - " <td>185.938841</td>\n", - " <td>185.070365</td>\n", - " <td>305.461220</td>\n", + " <td>185.938840</td>\n", + " <td>185.070360</td>\n", + " <td>305.461211</td>\n", " </tr>\n", " <tr>\n", " <th>1961-04</th>\n", - " <td>227.703065</td>\n", - " <td>353.437066</td>\n", - " <td>506.095484</td>\n", - " <td>634.350469</td>\n", - " <td>526.180900</td>\n", - " <td>482.653111</td>\n", - " <td>422.777319</td>\n", - " <td>323.453753</td>\n", - " <td>280.749314</td>\n", - " <td>242.065791</td>\n", - " <td>211.397170</td>\n", - " <td>294.971041</td>\n", + " <td>227.703049</td>\n", + " <td>353.437043</td>\n", + " <td>506.095455</td>\n", + " <td>634.350443</td>\n", + " <td>526.180879</td>\n", + " <td>482.653094</td>\n", + " <td>422.777303</td>\n", + " <td>323.453741</td>\n", + " <td>280.749312</td>\n", + " <td>242.065788</td>\n", + " <td>211.397164</td>\n", + " <td>294.971031</td>\n", " </tr>\n", " <tr>\n", " <th>1961-05</th>\n", - " <td>146.093860</td>\n", - " <td>291.205423</td>\n", - " <td>426.997535</td>\n", - " <td>526.180900</td>\n", - " <td>628.659359</td>\n", - " <td>570.277532</td>\n", - " <td>499.460195</td>\n", - " <td>419.166450</td>\n", + " <td>146.093848</td>\n", + " <td>291.205404</td>\n", + " <td>426.997512</td>\n", + " <td>526.180879</td>\n", + " <td>628.659343</td>\n", + " <td>570.277520</td>\n", + " <td>499.460184</td>\n", + " <td>419.166444</td>\n", " <td>325.582777</td>\n", - " <td>281.608607</td>\n", - " <td>269.847443</td>\n", - " <td>318.534683</td>\n", + " <td>281.608605</td>\n", + " <td>269.847439</td>\n", + " <td>318.534675</td>\n", " </tr>\n", " <tr>\n", " <th>1961-06</th>\n", - " <td>154.452835</td>\n", - " <td>236.587887</td>\n", - " <td>394.503941</td>\n", - " <td>482.653111</td>\n", - " <td>570.277532</td>\n", - " <td>728.132505</td>\n", - " <td>629.184846</td>\n", - " <td>527.767036</td>\n", - " <td>444.690514</td>\n", - " <td>330.643653</td>\n", - " <td>313.248427</td>\n", - " <td>382.803221</td>\n", + " <td>154.452828</td>\n", + " <td>236.587874</td>\n", + " <td>394.503923</td>\n", + " <td>482.653094</td>\n", + " <td>570.277520</td>\n", + " <td>728.132497</td>\n", + " <td>629.184840</td>\n", + " <td>527.767034</td>\n", + " <td>444.690518</td>\n", + " <td>330.643655</td>\n", + " <td>313.248426</td>\n", + " <td>382.803216</td>\n", " </tr>\n", " <tr>\n", " <th>1961-07</th>\n", - " <td>157.976801</td>\n", - " <td>227.199386</td>\n", - " <td>311.457854</td>\n", - " <td>422.777319</td>\n", - " <td>499.460195</td>\n", - " <td>629.184846</td>\n", - " <td>753.550007</td>\n", + " <td>157.976795</td>\n", + " <td>227.199374</td>\n", + " <td>311.457837</td>\n", + " <td>422.777303</td>\n", + " <td>499.460184</td>\n", + " <td>629.184840</td>\n", + " <td>753.550004</td>\n", " <td>629.138725</td>\n", - " <td>536.407564</td>\n", - " <td>441.998603</td>\n", - " <td>352.570968</td>\n", - " <td>415.110922</td>\n", + " <td>536.407567</td>\n", + " <td>441.998605</td>\n", + " <td>352.570966</td>\n", + " <td>415.110916</td>\n", " </tr>\n", " <tr>\n", " <th>1961-08</th>\n", - " <td>105.160769</td>\n", - " <td>205.653016</td>\n", - " <td>282.072157</td>\n", - " <td>323.453753</td>\n", - " <td>419.166450</td>\n", - " <td>527.767036</td>\n", + " <td>105.160767</td>\n", + " <td>205.653010</td>\n", + " <td>282.072145</td>\n", + " <td>323.453741</td>\n", + " <td>419.166444</td>\n", + " <td>527.767034</td>\n", " <td>629.138725</td>\n", - " <td>729.423302</td>\n", - " <td>615.142484</td>\n", - " <td>506.155610</td>\n", - " <td>439.994837</td>\n", - " <td>430.992295</td>\n", + " <td>729.423304</td>\n", + " <td>615.142491</td>\n", + " <td>506.155614</td>\n", + " <td>439.994838</td>\n", + " <td>430.992291</td>\n", " </tr>\n", " <tr>\n", " <th>1961-09</th>\n", - " <td>78.330257</td>\n", - " <td>152.067422</td>\n", + " <td>78.330263</td>\n", + " <td>152.067425</td>\n", " <td>243.688600</td>\n", - " <td>280.749314</td>\n", + " <td>280.749312</td>\n", " <td>325.582777</td>\n", - " <td>444.690514</td>\n", - " <td>536.407564</td>\n", - " <td>615.142484</td>\n", - " <td>744.225555</td>\n", - " <td>609.227136</td>\n", - " <td>527.489574</td>\n", - " <td>546.637590</td>\n", + " <td>444.690518</td>\n", + " <td>536.407567</td>\n", + " <td>615.142491</td>\n", + " <td>744.225561</td>\n", + " <td>609.227140</td>\n", + " <td>527.489573</td>\n", + " <td>546.637585</td>\n", " </tr>\n", " <tr>\n", " <th>1961-10</th>\n", - " <td>81.835803</td>\n", - " <td>121.629136</td>\n", - " <td>185.938841</td>\n", - " <td>242.065791</td>\n", - " <td>281.608607</td>\n", - " <td>330.643653</td>\n", - " <td>441.998603</td>\n", - " <td>506.155610</td>\n", - " <td>609.227136</td>\n", - " <td>697.805477</td>\n", - " <td>590.542045</td>\n", - " <td>604.681136</td>\n", + " <td>81.835807</td>\n", + " <td>121.629138</td>\n", + " <td>185.938840</td>\n", + " <td>242.065788</td>\n", + " <td>281.608605</td>\n", + " <td>330.643655</td>\n", + " <td>441.998605</td>\n", + " <td>506.155614</td>\n", + " <td>609.227140</td>\n", + " <td>697.805479</td>\n", + " <td>590.542043</td>\n", + " <td>604.681130</td>\n", " </tr>\n", " <tr>\n", " <th>1961-11</th>\n", " <td>78.048880</td>\n", - " <td>156.199113</td>\n", - " <td>185.070365</td>\n", - " <td>211.397170</td>\n", - " <td>269.847443</td>\n", - " <td>313.248427</td>\n", - " <td>352.570968</td>\n", - " <td>439.994837</td>\n", - " <td>527.489574</td>\n", - " <td>590.542045</td>\n", - " <td>706.960631</td>\n", - " <td>698.982589</td>\n", + " <td>156.199110</td>\n", + " <td>185.070360</td>\n", + " <td>211.397164</td>\n", + " <td>269.847439</td>\n", + " <td>313.248426</td>\n", + " <td>352.570966</td>\n", + " <td>439.994838</td>\n", + " <td>527.489573</td>\n", + " <td>590.542043</td>\n", + " <td>706.960626</td>\n", + " <td>698.982580</td>\n", " </tr>\n", " <tr>\n", " <th>1961-12</th>\n", - " <td>197.364513</td>\n", - " <td>245.437913</td>\n", - " <td>305.461220</td>\n", - " <td>294.971041</td>\n", - " <td>318.534683</td>\n", - " <td>382.803221</td>\n", - " <td>415.110922</td>\n", - " <td>430.992295</td>\n", - " <td>546.637590</td>\n", - " <td>604.681136</td>\n", - " <td>698.982589</td>\n", - " <td>913.698243</td>\n", + " <td>197.364510</td>\n", + " <td>245.437907</td>\n", + " <td>305.461211</td>\n", + " <td>294.971031</td>\n", + " <td>318.534675</td>\n", + " <td>382.803216</td>\n", + " <td>415.110916</td>\n", + " <td>430.992291</td>\n", + " <td>546.637585</td>\n", + " <td>604.681130</td>\n", + " <td>698.982580</td>\n", + " <td>913.698229</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -1660,46 +1674,46 @@ ], "text/plain": [ " 1961-01 1961-02 1961-03 1961-04 1961-05 \\\n", - "1961-01 292.337338 255.743002 264.805454 227.703065 146.093860 \n", - "1961-02 255.743002 422.704619 402.539279 353.437066 291.205423 \n", - "1961-03 264.805454 402.539279 588.085358 506.095484 426.997535 \n", - "1961-04 227.703065 353.437066 506.095484 634.350469 526.180900 \n", - "1961-05 146.093860 291.205423 426.997535 526.180900 628.659359 \n", - "1961-06 154.452835 236.587887 394.503941 482.653111 570.277532 \n", - "1961-07 157.976801 227.199386 311.457854 422.777319 499.460195 \n", - "1961-08 105.160769 205.653016 282.072157 323.453753 419.166450 \n", - "1961-09 78.330257 152.067422 243.688600 280.749314 325.582777 \n", - "1961-10 81.835803 121.629136 185.938841 242.065791 281.608607 \n", - "1961-11 78.048880 156.199113 185.070365 211.397170 269.847443 \n", - "1961-12 197.364513 245.437913 305.461220 294.971041 318.534683 \n", + "1961-01 292.337333 255.742991 264.805437 227.703049 146.093848 \n", + "1961-02 255.742991 422.704601 402.539255 353.437043 291.205404 \n", + "1961-03 264.805437 402.539255 588.085328 506.095455 426.997512 \n", + "1961-04 227.703049 353.437043 506.095455 634.350443 526.180879 \n", + "1961-05 146.093848 291.205404 426.997512 526.180879 628.659343 \n", + "1961-06 154.452828 236.587874 394.503923 482.653094 570.277520 \n", + "1961-07 157.976795 227.199374 311.457837 422.777303 499.460184 \n", + "1961-08 105.160767 205.653010 282.072145 323.453741 419.166444 \n", + "1961-09 78.330263 152.067425 243.688600 280.749312 325.582777 \n", + "1961-10 81.835807 121.629138 185.938840 242.065788 281.608605 \n", + "1961-11 78.048880 156.199110 185.070360 211.397164 269.847439 \n", + "1961-12 197.364510 245.437907 305.461211 294.971031 318.534675 \n", "\n", " 1961-06 1961-07 1961-08 1961-09 1961-10 \\\n", - "1961-01 154.452835 157.976801 105.160769 78.330257 81.835803 \n", - "1961-02 236.587887 227.199386 205.653016 152.067422 121.629136 \n", - "1961-03 394.503941 311.457854 282.072157 243.688600 185.938841 \n", - "1961-04 482.653111 422.777319 323.453753 280.749314 242.065791 \n", - "1961-05 570.277532 499.460195 419.166450 325.582777 281.608607 \n", - "1961-06 728.132505 629.184846 527.767036 444.690514 330.643653 \n", - "1961-07 629.184846 753.550007 629.138725 536.407564 441.998603 \n", - "1961-08 527.767036 629.138725 729.423302 615.142484 506.155610 \n", - "1961-09 444.690514 536.407564 615.142484 744.225555 609.227136 \n", - "1961-10 330.643653 441.998603 506.155610 609.227136 697.805477 \n", - "1961-11 313.248427 352.570968 439.994837 527.489574 590.542045 \n", - "1961-12 382.803221 415.110922 430.992295 546.637590 604.681136 \n", + "1961-01 154.452828 157.976795 105.160767 78.330263 81.835807 \n", + "1961-02 236.587874 227.199374 205.653010 152.067425 121.629138 \n", + "1961-03 394.503923 311.457837 282.072145 243.688600 185.938840 \n", + "1961-04 482.653094 422.777303 323.453741 280.749312 242.065788 \n", + "1961-05 570.277520 499.460184 419.166444 325.582777 281.608605 \n", + "1961-06 728.132497 629.184840 527.767034 444.690518 330.643655 \n", + "1961-07 629.184840 753.550004 629.138725 536.407567 441.998605 \n", + "1961-08 527.767034 629.138725 729.423304 615.142491 506.155614 \n", + "1961-09 444.690518 536.407567 615.142491 744.225561 609.227140 \n", + "1961-10 330.643655 441.998605 506.155614 609.227140 697.805479 \n", + "1961-11 313.248426 352.570966 439.994838 527.489573 590.542043 \n", + "1961-12 382.803216 415.110916 430.992291 546.637585 604.681130 \n", "\n", " 1961-11 1961-12 \n", - "1961-01 78.048880 197.364513 \n", - "1961-02 156.199113 245.437913 \n", - "1961-03 185.070365 305.461220 \n", - "1961-04 211.397170 294.971041 \n", - "1961-05 269.847443 318.534683 \n", - "1961-06 313.248427 382.803221 \n", - "1961-07 352.570968 415.110922 \n", - "1961-08 439.994837 430.992295 \n", - "1961-09 527.489574 546.637590 \n", - "1961-10 590.542045 604.681136 \n", - "1961-11 706.960631 698.982589 \n", - "1961-12 698.982589 913.698243 " + "1961-01 78.048880 197.364510 \n", + "1961-02 156.199110 245.437907 \n", + "1961-03 185.070360 305.461211 \n", + "1961-04 211.397164 294.971031 \n", + "1961-05 269.847439 318.534675 \n", + "1961-06 313.248426 382.803216 \n", + "1961-07 352.570966 415.110916 \n", + "1961-08 439.994838 430.992291 \n", + "1961-09 527.489573 546.637585 \n", + "1961-10 590.542043 604.681130 \n", + "1961-11 706.960626 698.982580 \n", + "1961-12 698.982580 913.698229 " ] }, "execution_count": 11, @@ -1748,7 +1762,7 @@ { "data": { "text/html": [ - "<style>#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 {color: black;background-color: white;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 pre{padding: 0;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-toggleable {background-color: white;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 label.sk-toggleable__label-arrow:before {content: \"βΈ\";float: left;margin-right: 0.25em;color: #696969;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"βΎ\";}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-estimator:hover {background-color: #d4ebff;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-item {z-index: 1;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-parallel-item:only-child::after {width: 0;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409 div.sk-text-repr-fallback {display: none;}</style><div id='sk-b9a6a3c0-6c86-4f30-b3b2-b0775dd73409' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Normal(columns=Index(['Number of airline passengers'], dtype='object'),\n", + "<style>#sk-8125c019-59cd-4edc-b962-4062375337e4 {color: black;background-color: white;}#sk-8125c019-59cd-4edc-b962-4062375337e4 pre{padding: 0;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-toggleable {background-color: white;}#sk-8125c019-59cd-4edc-b962-4062375337e4 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-8125c019-59cd-4edc-b962-4062375337e4 label.sk-toggleable__label-arrow:before {content: \"βΈ\";float: left;margin-right: 0.25em;color: #696969;}#sk-8125c019-59cd-4edc-b962-4062375337e4 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-8125c019-59cd-4edc-b962-4062375337e4 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-8125c019-59cd-4edc-b962-4062375337e4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"βΎ\";}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-8125c019-59cd-4edc-b962-4062375337e4 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-estimator:hover {background-color: #d4ebff;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-item {z-index: 1;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-parallel-item:only-child::after {width: 0;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-8125c019-59cd-4edc-b962-4062375337e4 div.sk-text-repr-fallback {display: none;}</style><div id='sk-8125c019-59cd-4edc-b962-4062375337e4' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Normal(columns=Index(['Number of airline passengers'], dtype='object'),\n", " index=PeriodIndex(['1961-01', '1961-02', '1961-03', '1961-04', '1961-05', '1961-06',\n", " '1961-07', '1961-08', '1961-09', '1961-10', '1961-11', '1961-12'],\n", " dtype='period[M]'),\n", @@ -1776,8 +1790,8 @@ "1961-08 27.570551\n", "1961-09 28.996409\n", "1961-10 30.355365\n", - "1961-11 31.656036\n", - "1961-12 32.905336)</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('bb0b02b9-326f-47df-87cb-c88bfdc5a336') type=\"checkbox\" checked><label for=UUID('bb0b02b9-326f-47df-87cb-c88bfdc5a336') class='sk-toggleable__label sk-toggleable__label-arrow'>Normal</label><div class=\"sk-toggleable__content\"><pre>Normal(columns=Index(['Number of airline passengers'], dtype='object'),\n", + "1961-11 31.656037\n", + "1961-12 32.905336)</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('7e30e48d-3be6-4e33-a8b5-ba4ff8e29a87') type=\"checkbox\" checked><label for=UUID('7e30e48d-3be6-4e33-a8b5-ba4ff8e29a87') class='sk-toggleable__label sk-toggleable__label-arrow'>Normal</label><div class=\"sk-toggleable__content\"><pre>Normal(columns=Index(['Number of airline passengers'], dtype='object'),\n", " index=PeriodIndex(['1961-01', '1961-02', '1961-03', '1961-04', '1961-05', '1961-06',\n", " '1961-07', '1961-08', '1961-09', '1961-10', '1961-11', '1961-12'],\n", " dtype='period[M]'),\n", @@ -1805,7 +1819,7 @@ "1961-08 27.570551\n", "1961-09 28.996409\n", "1961-10 30.355365\n", - "1961-11 31.656036\n", + "1961-11 31.656037\n", "1961-12 32.905336)</pre></div></div></div></div></div>" ], "text/plain": [ @@ -1837,7 +1851,7 @@ "1961-08 27.570551\n", "1961-09 28.996409\n", "1961-10 30.355365\n", - "1961-11 31.656036\n", + "1961-11 31.656037\n", "1961-12 32.905336)" ] }, @@ -1914,12 +1928,12 @@ " </tr>\n", " <tr>\n", " <th>1961-06</th>\n", - " <td>515.476124</td>\n", + " <td>515.476123</td>\n", " <td>578.197428</td>\n", " </tr>\n", " <tr>\n", " <th>1961-07</th>\n", - " <td>570.966896</td>\n", + " <td>570.966895</td>\n", " <td>637.778829</td>\n", " </tr>\n", " <tr>\n", @@ -1959,8 +1973,8 @@ "1961-03 466.829089 515.247523\n", "1961-04 450.188398 503.800124\n", "1961-05 451.794965 510.139542\n", - "1961-06 515.476124 578.197428\n", - "1961-07 570.966896 637.778829\n", + "1961-06 515.476123 578.197428\n", + "1961-07 570.966895 637.778829\n", "1961-08 567.379760 638.045925\n", "1961-09 488.192511 562.513297\n", "1961-10 418.943257 496.747188\n", @@ -2067,63 +2081,63 @@ " <tbody>\n", " <tr>\n", " <th>1951-01</th>\n", - " <td>125.707998</td>\n", - " <td>141.744251</td>\n", + " <td>125.708002</td>\n", + " <td>141.744261</td>\n", " </tr>\n", " <tr>\n", " <th>1951-02</th>\n", - " <td>135.554586</td>\n", - " <td>154.422381</td>\n", + " <td>135.554588</td>\n", + " <td>154.422393</td>\n", " </tr>\n", " <tr>\n", " <th>1951-03</th>\n", - " <td>149.921348</td>\n", - " <td>171.247998</td>\n", + " <td>149.921349</td>\n", + " <td>171.248013</td>\n", " </tr>\n", " <tr>\n", " <th>1951-04</th>\n", - " <td>140.807417</td>\n", - " <td>164.337362</td>\n", + " <td>140.807416</td>\n", + " <td>164.337377</td>\n", " </tr>\n", " <tr>\n", " <th>1951-05</th>\n", - " <td>127.941097</td>\n", - " <td>153.484993</td>\n", + " <td>127.941095</td>\n", + " <td>153.485009</td>\n", " </tr>\n", " <tr>\n", " <th>1951-06</th>\n", - " <td>152.968277</td>\n", - " <td>180.378548</td>\n", + " <td>152.968275</td>\n", + " <td>180.378566</td>\n", " </tr>\n", " <tr>\n", " <th>1951-07</th>\n", - " <td>167.193935</td>\n", - " <td>196.351356</td>\n", + " <td>167.193932</td>\n", + " <td>196.351377</td>\n", " </tr>\n", " <tr>\n", " <th>1951-08</th>\n", - " <td>166.316512</td>\n", - " <td>197.122153</td>\n", + " <td>166.316508</td>\n", + " <td>197.122174</td>\n", " </tr>\n", " <tr>\n", " <th>1951-09</th>\n", - " <td>150.425516</td>\n", - " <td>182.795561</td>\n", + " <td>150.425511</td>\n", + " <td>182.795583</td>\n", " </tr>\n", " <tr>\n", " <th>1951-10</th>\n", - " <td>128.623033</td>\n", - " <td>162.485285</td>\n", + " <td>128.623026</td>\n", + " <td>162.485306</td>\n", " </tr>\n", " <tr>\n", " <th>1951-11</th>\n", - " <td>109.567283</td>\n", - " <td>144.858705</td>\n", + " <td>109.567274</td>\n", + " <td>144.858726</td>\n", " </tr>\n", " <tr>\n", " <th>1951-12</th>\n", - " <td>125.641292</td>\n", - " <td>162.306217</td>\n", + " <td>125.641283</td>\n", + " <td>162.306240</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -2133,18 +2147,18 @@ " Number of airline passengers \n", " 0.9 \n", " lower upper\n", - "1951-01 125.707998 141.744251\n", - "1951-02 135.554586 154.422381\n", - "1951-03 149.921348 171.247998\n", - "1951-04 140.807417 164.337362\n", - "1951-05 127.941097 153.484993\n", - "1951-06 152.968277 180.378548\n", - "1951-07 167.193935 196.351356\n", - "1951-08 166.316512 197.122153\n", - "1951-09 150.425516 182.795561\n", - "1951-10 128.623033 162.485285\n", - "1951-11 109.567283 144.858705\n", - "1951-12 125.641292 162.306217" + "1951-01 125.708002 141.744261\n", + "1951-02 135.554588 154.422393\n", + "1951-03 149.921349 171.248013\n", + "1951-04 140.807416 164.337377\n", + "1951-05 127.941095 153.485009\n", + "1951-06 152.968275 180.378566\n", + "1951-07 167.193932 196.351377\n", + "1951-08 166.316508 197.122174\n", + "1951-09 150.425511 182.795583\n", + "1951-10 128.623026 162.485306\n", + "1951-11 109.567274 144.858726\n", + "1951-12 125.641283 162.306240" ] }, "execution_count": 15, @@ -2206,63 +2220,63 @@ " <tbody>\n", " <tr>\n", " <th>1951-02</th>\n", - " <td>136.659398</td>\n", - " <td>152.695651</td>\n", + " <td>136.659402</td>\n", + " <td>152.695661</td>\n", " </tr>\n", " <tr>\n", " <th>1951-03</th>\n", - " <td>150.894540</td>\n", - " <td>169.762336</td>\n", + " <td>150.894543</td>\n", + " <td>169.762349</td>\n", " </tr>\n", " <tr>\n", " <th>1951-04</th>\n", - " <td>141.748826</td>\n", - " <td>163.075476</td>\n", + " <td>141.748827</td>\n", + " <td>163.075491</td>\n", " </tr>\n", " <tr>\n", " <th>1951-05</th>\n", - " <td>128.876521</td>\n", - " <td>152.406466</td>\n", + " <td>128.876520</td>\n", + " <td>152.406481</td>\n", " </tr>\n", " <tr>\n", " <th>1951-06</th>\n", - " <td>153.906406</td>\n", - " <td>179.450302</td>\n", + " <td>153.906405</td>\n", + " <td>179.450320</td>\n", " </tr>\n", " <tr>\n", " <th>1951-07</th>\n", - " <td>168.170069</td>\n", - " <td>195.580339</td>\n", + " <td>168.170068</td>\n", + " <td>195.580359</td>\n", " </tr>\n", " <tr>\n", " <th>1951-08</th>\n", - " <td>167.339648</td>\n", - " <td>196.497069</td>\n", + " <td>167.339646</td>\n", + " <td>196.497090</td>\n", " </tr>\n", " <tr>\n", " <th>1951-09</th>\n", - " <td>151.478088</td>\n", - " <td>182.283729</td>\n", + " <td>151.478084</td>\n", + " <td>182.283750</td>\n", " </tr>\n", " <tr>\n", " <th>1951-10</th>\n", - " <td>129.681615</td>\n", - " <td>162.051660</td>\n", + " <td>129.681609</td>\n", + " <td>162.051681</td>\n", " </tr>\n", " <tr>\n", " <th>1951-11</th>\n", - " <td>110.621201</td>\n", - " <td>144.483453</td>\n", + " <td>110.621193</td>\n", + " <td>144.483474</td>\n", " </tr>\n", " <tr>\n", " <th>1951-12</th>\n", - " <td>126.786551</td>\n", - " <td>162.077973</td>\n", + " <td>126.786543</td>\n", + " <td>162.077995</td>\n", " </tr>\n", " <tr>\n", " <th>1952-01</th>\n", - " <td>121.345120</td>\n", - " <td>158.010045</td>\n", + " <td>121.345111</td>\n", + " <td>158.010067</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -2272,18 +2286,18 @@ " Number of airline passengers \n", " 0.9 \n", " lower upper\n", - "1951-02 136.659398 152.695651\n", - "1951-03 150.894540 169.762336\n", - "1951-04 141.748826 163.075476\n", - "1951-05 128.876521 152.406466\n", - "1951-06 153.906406 179.450302\n", - "1951-07 168.170069 195.580339\n", - "1951-08 167.339648 196.497069\n", - "1951-09 151.478088 182.283729\n", - "1951-10 129.681615 162.051660\n", - "1951-11 110.621201 144.483453\n", - "1951-12 126.786551 162.077973\n", - "1952-01 121.345120 158.010045" + "1951-02 136.659402 152.695661\n", + "1951-03 150.894543 169.762349\n", + "1951-04 141.748827 163.075491\n", + "1951-05 128.876520 152.406481\n", + "1951-06 153.906405 179.450320\n", + "1951-07 168.170068 195.580359\n", + "1951-08 167.339646 196.497090\n", + "1951-09 151.478084 182.283750\n", + "1951-10 129.681609 162.051681\n", + "1951-11 110.621193 144.483474\n", + "1951-12 126.786543 162.077995\n", + "1952-01 121.345111 158.010067" ] }, "execution_count": 16, @@ -2352,63 +2366,63 @@ " <tbody>\n", " <tr>\n", " <th>1951-03</th>\n", - " <td>151.754366</td>\n", - " <td>167.790619</td>\n", + " <td>151.754371</td>\n", + " <td>167.790630</td>\n", " </tr>\n", " <tr>\n", " <th>1951-04</th>\n", - " <td>142.481687</td>\n", - " <td>161.349482</td>\n", + " <td>142.481690</td>\n", + " <td>161.349495</td>\n", " </tr>\n", " <tr>\n", " <th>1951-05</th>\n", " <td>129.549186</td>\n", - " <td>150.875836</td>\n", + " <td>150.875849</td>\n", " </tr>\n", " <tr>\n", " <th>1951-06</th>\n", " <td>154.439360</td>\n", - " <td>177.969305</td>\n", + " <td>177.969321</td>\n", " </tr>\n", " <tr>\n", " <th>1951-07</th>\n", " <td>168.623239</td>\n", - " <td>194.167135</td>\n", + " <td>194.167153</td>\n", " </tr>\n", " <tr>\n", " <th>1951-08</th>\n", - " <td>167.770039</td>\n", - " <td>195.180310</td>\n", + " <td>167.770038</td>\n", + " <td>195.180329</td>\n", " </tr>\n", " <tr>\n", " <th>1951-09</th>\n", - " <td>151.929281</td>\n", - " <td>181.086702</td>\n", + " <td>151.929278</td>\n", + " <td>181.086722</td>\n", " </tr>\n", " <tr>\n", " <th>1951-10</th>\n", - " <td>130.167033</td>\n", - " <td>160.972675</td>\n", + " <td>130.167028</td>\n", + " <td>160.972694</td>\n", " </tr>\n", " <tr>\n", " <th>1951-11</th>\n", - " <td>111.133102</td>\n", - " <td>143.503147</td>\n", + " <td>111.133094</td>\n", + " <td>143.503166</td>\n", " </tr>\n", " <tr>\n", " <th>1951-12</th>\n", - " <td>127.264390</td>\n", - " <td>161.126643</td>\n", + " <td>127.264383</td>\n", + " <td>161.126664</td>\n", " </tr>\n", " <tr>\n", " <th>1952-01</th>\n", - " <td>121.830227</td>\n", - " <td>157.121649</td>\n", + " <td>121.830219</td>\n", + " <td>157.121670</td>\n", " </tr>\n", " <tr>\n", " <th>1952-02</th>\n", - " <td>132.976436</td>\n", - " <td>169.641361</td>\n", + " <td>132.976427</td>\n", + " <td>169.641384</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -2418,18 +2432,18 @@ " Number of airline passengers \n", " 0.9 \n", " lower upper\n", - "1951-03 151.754366 167.790619\n", - "1951-04 142.481687 161.349482\n", - "1951-05 129.549186 150.875836\n", - "1951-06 154.439360 177.969305\n", - "1951-07 168.623239 194.167135\n", - "1951-08 167.770039 195.180310\n", - "1951-09 151.929281 181.086702\n", - "1951-10 130.167033 160.972675\n", - "1951-11 111.133102 143.503147\n", - "1951-12 127.264390 161.126643\n", - "1952-01 121.830227 157.121649\n", - "1952-02 132.976436 169.641361" + "1951-03 151.754371 167.790630\n", + "1951-04 142.481690 161.349495\n", + "1951-05 129.549186 150.875849\n", + "1951-06 154.439360 177.969321\n", + "1951-07 168.623239 194.167153\n", + "1951-08 167.770038 195.180329\n", + "1951-09 151.929278 181.086722\n", + "1951-10 130.167028 160.972694\n", + "1951-11 111.133094 143.503166\n", + "1951-12 127.264383 161.126664\n", + "1952-01 121.830219 157.121670\n", + "1952-02 132.976427 169.641384" ] }, "execution_count": 17, @@ -2489,63 +2503,63 @@ " <tbody>\n", " <tr>\n", " <th>1951-04</th>\n", - " <td>143.421741</td>\n", - " <td>159.457994</td>\n", + " <td>143.421746</td>\n", + " <td>159.458004</td>\n", " </tr>\n", " <tr>\n", " <th>1951-05</th>\n", - " <td>130.401488</td>\n", - " <td>149.269284</td>\n", + " <td>130.401490</td>\n", + " <td>149.269296</td>\n", " </tr>\n", " <tr>\n", " <th>1951-06</th>\n", - " <td>155.166803</td>\n", - " <td>176.493453</td>\n", + " <td>155.166804</td>\n", + " <td>176.493468</td>\n", " </tr>\n", " <tr>\n", " <th>1951-07</th>\n", - " <td>169.300650</td>\n", - " <td>192.830595</td>\n", + " <td>169.300651</td>\n", + " <td>192.830612</td>\n", " </tr>\n", " <tr>\n", " <th>1951-08</th>\n", " <td>168.451755</td>\n", - " <td>193.995651</td>\n", + " <td>193.995669</td>\n", " </tr>\n", " <tr>\n", " <th>1951-09</th>\n", - " <td>152.643333</td>\n", - " <td>180.053604</td>\n", + " <td>152.643331</td>\n", + " <td>180.053622</td>\n", " </tr>\n", " <tr>\n", " <th>1951-10</th>\n", - " <td>130.913435</td>\n", - " <td>160.070856</td>\n", + " <td>130.913430</td>\n", + " <td>160.070874</td>\n", " </tr>\n", " <tr>\n", " <th>1951-11</th>\n", - " <td>111.900919</td>\n", - " <td>142.706560</td>\n", + " <td>111.900912</td>\n", + " <td>142.706578</td>\n", " </tr>\n", " <tr>\n", " <th>1951-12</th>\n", - " <td>128.054402</td>\n", - " <td>160.424448</td>\n", + " <td>128.054396</td>\n", + " <td>160.424468</td>\n", " </tr>\n", " <tr>\n", " <th>1952-01</th>\n", - " <td>122.645052</td>\n", - " <td>156.507304</td>\n", + " <td>122.645044</td>\n", + " <td>156.507325</td>\n", " </tr>\n", " <tr>\n", " <th>1952-02</th>\n", - " <td>133.834107</td>\n", - " <td>169.125529</td>\n", + " <td>133.834100</td>\n", + " <td>169.125551</td>\n", " </tr>\n", " <tr>\n", " <th>1952-03</th>\n", - " <td>149.605277</td>\n", - " <td>186.270202</td>\n", + " <td>149.605269</td>\n", + " <td>186.270225</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -2555,18 +2569,18 @@ " Number of airline passengers \n", " 0.9 \n", " lower upper\n", - "1951-04 143.421741 159.457994\n", - "1951-05 130.401488 149.269284\n", - "1951-06 155.166803 176.493453\n", - "1951-07 169.300650 192.830595\n", - "1951-08 168.451755 193.995651\n", - "1951-09 152.643333 180.053604\n", - "1951-10 130.913435 160.070856\n", - "1951-11 111.900919 142.706560\n", - "1951-12 128.054402 160.424448\n", - "1952-01 122.645052 156.507304\n", - "1952-02 133.834107 169.125529\n", - "1952-03 149.605277 186.270202" + "1951-04 143.421746 159.458004\n", + "1951-05 130.401490 149.269296\n", + "1951-06 155.166804 176.493468\n", + "1951-07 169.300651 192.830612\n", + "1951-08 168.451755 193.995669\n", + "1951-09 152.643331 180.053622\n", + "1951-10 130.913430 160.070874\n", + "1951-11 111.900912 142.706578\n", + "1951-12 128.054396 160.424468\n", + "1952-01 122.645044 156.507325\n", + "1952-02 133.834100 169.125551\n", + "1952-03 149.605269 186.270225" ] }, "execution_count": 18, @@ -2600,7 +2614,18 @@ "outputs": [ { "data": { - "image/png": "", + "text/plain": [ + "(<Figure size 1600x480 with 1 Axes>,\n", + " <Axes: xlabel='Time', ylabel='Window number'>)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", "text/plain": [ "<Figure size 1600x480 with 1 Axes>" ] @@ -2650,7 +2675,7 @@ { "data": { "text/html": [ - "<style>#sk-98a069bc-b795-48e2-9eac-e0374eab901a {color: black;background-color: white;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a pre{padding: 0;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-toggleable {background-color: white;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a label.sk-toggleable__label-arrow:before {content: \"βΈ\";float: left;margin-right: 0.25em;color: #696969;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"βΎ\";}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-estimator:hover {background-color: #d4ebff;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-item {z-index: 1;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-parallel-item:only-child::after {width: 0;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-98a069bc-b795-48e2-9eac-e0374eab901a div.sk-text-repr-fallback {display: none;}</style><div id='sk-98a069bc-b795-48e2-9eac-e0374eab901a' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>VAR()</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('c1503cc9-3f13-4fe9-aa27-2267fe0497e1') type=\"checkbox\" checked><label for=UUID('c1503cc9-3f13-4fe9-aa27-2267fe0497e1') class='sk-toggleable__label sk-toggleable__label-arrow'>VAR</label><div class=\"sk-toggleable__content\"><pre>VAR()</pre></div></div></div></div></div>" + "<style>#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 {color: black;background-color: white;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 pre{padding: 0;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-toggleable {background-color: white;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 label.sk-toggleable__label-arrow:before {content: \"βΈ\";float: left;margin-right: 0.25em;color: #696969;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"βΎ\";}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-estimator:hover {background-color: #d4ebff;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-item {z-index: 1;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-parallel-item:only-child::after {width: 0;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-f23c3876-2cde-4425-8d64-a316fb81fd51 div.sk-text-repr-fallback {display: none;}</style><div id='sk-f23c3876-2cde-4425-8d64-a316fb81fd51' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>VAR()</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('890d0cd6-eea0-4b29-a628-0deaaf61884c') type=\"checkbox\" checked><label for=UUID('890d0cd6-eea0-4b29-a628-0deaaf61884c') class='sk-toggleable__label sk-toggleable__label-arrow'>VAR</label><div class=\"sk-toggleable__content\"><pre>VAR()</pre></div></div></div></div></div>" ], "text/plain": [ "VAR()" @@ -2724,23 +2749,23 @@ " <th rowspan=\"5\" valign=\"top\">h0_0</th>\n", " <th rowspan=\"5\" valign=\"top\">h1_0</th>\n", " <th>2000-01-01</th>\n", - " <td>1.000000</td>\n", + " <td>5.272974</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-02</th>\n", - " <td>3.518414</td>\n", + " <td>4.416770</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-03</th>\n", - " <td>3.439018</td>\n", + " <td>2.991815</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-04</th>\n", - " <td>4.930397</td>\n", + " <td>2.360916</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-05</th>\n", - " <td>4.612988</td>\n", + " <td>2.269617</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", @@ -2752,23 +2777,23 @@ " <th rowspan=\"5\" valign=\"top\">h0_1</th>\n", " <th rowspan=\"5\" valign=\"top\">h1_3</th>\n", " <th>2000-01-08</th>\n", - " <td>3.877603</td>\n", + " <td>4.388797</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-09</th>\n", - " <td>2.867442</td>\n", + " <td>5.096147</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-10</th>\n", - " <td>5.020762</td>\n", + " <td>3.347833</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-11</th>\n", - " <td>4.566137</td>\n", + " <td>3.560713</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-12</th>\n", - " <td>3.806208</td>\n", + " <td>4.467743</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -2778,17 +2803,17 @@ "text/plain": [ " c0\n", "h0 h1 time \n", - "h0_0 h1_0 2000-01-01 1.000000\n", - " 2000-01-02 3.518414\n", - " 2000-01-03 3.439018\n", - " 2000-01-04 4.930397\n", - " 2000-01-05 4.612988\n", + "h0_0 h1_0 2000-01-01 5.272974\n", + " 2000-01-02 4.416770\n", + " 2000-01-03 2.991815\n", + " 2000-01-04 2.360916\n", + " 2000-01-05 2.269617\n", "... ...\n", - "h0_1 h1_3 2000-01-08 3.877603\n", - " 2000-01-09 2.867442\n", - " 2000-01-10 5.020762\n", - " 2000-01-11 4.566137\n", - " 2000-01-12 3.806208\n", + "h0_1 h1_3 2000-01-08 4.388797\n", + " 2000-01-09 5.096147\n", + " 2000-01-10 3.347833\n", + " 2000-01-11 3.560713\n", + " 2000-01-12 4.467743\n", "\n", "[96 rows x 1 columns]" ] @@ -2867,131 +2892,131 @@ " <th rowspan=\"12\" valign=\"top\">h0_0</th>\n", " <th rowspan=\"3\" valign=\"top\">h1_0</th>\n", " <th>2000-01-13</th>\n", - " <td>1.547240</td>\n", - " <td>5.656626</td>\n", + " <td>1.722621</td>\n", + " <td>5.035875</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-14</th>\n", - " <td>1.486515</td>\n", - " <td>6.111832</td>\n", + " <td>1.880358</td>\n", + " <td>5.280790</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-15</th>\n", - " <td>1.524294</td>\n", - " <td>6.277836</td>\n", + " <td>1.924552</td>\n", + " <td>5.329570</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"3\" valign=\"top\">h1_1</th>\n", " <th>2000-01-13</th>\n", - " <td>3.285397</td>\n", - " <td>5.304444</td>\n", + " <td>1.847150</td>\n", + " <td>4.690652</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-14</th>\n", - " <td>3.357664</td>\n", - " <td>5.387562</td>\n", + " <td>1.874098</td>\n", + " <td>4.740716</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-15</th>\n", - " <td>3.349540</td>\n", - " <td>5.379555</td>\n", + " <td>1.878830</td>\n", + " <td>4.745823</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"3\" valign=\"top\">h1_2</th>\n", " <th>2000-01-13</th>\n", - " <td>2.916079</td>\n", - " <td>6.618686</td>\n", + " <td>2.012331</td>\n", + " <td>5.262287</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-14</th>\n", - " <td>2.920492</td>\n", - " <td>6.653291</td>\n", + " <td>1.717852</td>\n", + " <td>4.986732</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-15</th>\n", - " <td>2.922742</td>\n", - " <td>6.656034</td>\n", + " <td>1.748543</td>\n", + " <td>5.017644</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"3\" valign=\"top\">h1_3</th>\n", " <th>2000-01-13</th>\n", - " <td>2.830648</td>\n", - " <td>5.936913</td>\n", + " <td>2.673739</td>\n", + " <td>4.996850</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-14</th>\n", - " <td>2.753652</td>\n", - " <td>5.974169</td>\n", + " <td>2.589237</td>\n", + " <td>4.975105</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-15</th>\n", - " <td>2.744015</td>\n", - " <td>5.972928</td>\n", + " <td>2.599973</td>\n", + " <td>4.989230</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"12\" valign=\"top\">h0_1</th>\n", " <th rowspan=\"3\" valign=\"top\">h1_0</th>\n", " <th>2000-01-13</th>\n", - " <td>2.371798</td>\n", - " <td>4.871334</td>\n", + " <td>2.596552</td>\n", + " <td>4.620861</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-14</th>\n", - " <td>2.447080</td>\n", - " <td>5.252941</td>\n", + " <td>2.144881</td>\n", + " <td>4.272040</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-15</th>\n", - " <td>2.526418</td>\n", - " <td>5.406628</td>\n", + " <td>2.268863</td>\n", + " <td>4.406453</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"3\" valign=\"top\">h1_1</th>\n", " <th>2000-01-13</th>\n", - " <td>2.648028</td>\n", - " <td>5.302283</td>\n", + " <td>2.353941</td>\n", + " <td>5.390139</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-14</th>\n", - " <td>2.673720</td>\n", - " <td>5.360916</td>\n", + " <td>2.267849</td>\n", + " <td>5.321420</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-15</th>\n", - " <td>2.679975</td>\n", - " <td>5.367988</td>\n", + " <td>2.259457</td>\n", + " <td>5.313227</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"3\" valign=\"top\">h1_2</th>\n", " <th>2000-01-13</th>\n", - " <td>3.541852</td>\n", - " <td>5.942392</td>\n", + " <td>1.877079</td>\n", + " <td>5.224196</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-14</th>\n", - " <td>3.423455</td>\n", - " <td>5.869728</td>\n", + " <td>1.975364</td>\n", + " <td>5.387870</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-15</th>\n", - " <td>3.441320</td>\n", - " <td>5.889336</td>\n", + " <td>2.000103</td>\n", + " <td>5.415163</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"3\" valign=\"top\">h1_3</th>\n", " <th>2000-01-13</th>\n", - " <td>3.150096</td>\n", - " <td>5.231645</td>\n", + " <td>2.655375</td>\n", + " <td>5.049080</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-14</th>\n", - " <td>2.981736</td>\n", - " <td>5.161234</td>\n", + " <td>2.577533</td>\n", + " <td>4.986714</td>\n", " </tr>\n", " <tr>\n", " <th>2000-01-15</th>\n", - " <td>3.014187</td>\n", - " <td>5.202889</td>\n", + " <td>2.569449</td>\n", + " <td>4.978829</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -3002,30 +3027,30 @@ " 0.9 \n", " lower upper\n", "h0 h1 time \n", - "h0_0 h1_0 2000-01-13 1.547240 5.656626\n", - " 2000-01-14 1.486515 6.111832\n", - " 2000-01-15 1.524294 6.277836\n", - " h1_1 2000-01-13 3.285397 5.304444\n", - " 2000-01-14 3.357664 5.387562\n", - " 2000-01-15 3.349540 5.379555\n", - " h1_2 2000-01-13 2.916079 6.618686\n", - " 2000-01-14 2.920492 6.653291\n", - " 2000-01-15 2.922742 6.656034\n", - " h1_3 2000-01-13 2.830648 5.936913\n", - " 2000-01-14 2.753652 5.974169\n", - " 2000-01-15 2.744015 5.972928\n", - "h0_1 h1_0 2000-01-13 2.371798 4.871334\n", - " 2000-01-14 2.447080 5.252941\n", - " 2000-01-15 2.526418 5.406628\n", - " h1_1 2000-01-13 2.648028 5.302283\n", - " 2000-01-14 2.673720 5.360916\n", - " 2000-01-15 2.679975 5.367988\n", - " h1_2 2000-01-13 3.541852 5.942392\n", - " 2000-01-14 3.423455 5.869728\n", - " 2000-01-15 3.441320 5.889336\n", - " h1_3 2000-01-13 3.150096 5.231645\n", - " 2000-01-14 2.981736 5.161234\n", - " 2000-01-15 3.014187 5.202889" + "h0_0 h1_0 2000-01-13 1.722621 5.035875\n", + " 2000-01-14 1.880358 5.280790\n", + " 2000-01-15 1.924552 5.329570\n", + " h1_1 2000-01-13 1.847150 4.690652\n", + " 2000-01-14 1.874098 4.740716\n", + " 2000-01-15 1.878830 4.745823\n", + " h1_2 2000-01-13 2.012331 5.262287\n", + " 2000-01-14 1.717852 4.986732\n", + " 2000-01-15 1.748543 5.017644\n", + " h1_3 2000-01-13 2.673739 4.996850\n", + " 2000-01-14 2.589237 4.975105\n", + " 2000-01-15 2.599973 4.989230\n", + "h0_1 h1_0 2000-01-13 2.596552 4.620861\n", + " 2000-01-14 2.144881 4.272040\n", + " 2000-01-15 2.268863 4.406453\n", + " h1_1 2000-01-13 2.353941 5.390139\n", + " 2000-01-14 2.267849 5.321420\n", + " 2000-01-15 2.259457 5.313227\n", + " h1_2 2000-01-13 1.877079 5.224196\n", + " 2000-01-14 1.975364 5.387870\n", + " 2000-01-15 2.000103 5.415163\n", + " h1_3 2000-01-13 2.655375 5.049080\n", + " 2000-01-14 2.577533 4.986714\n", + " 2000-01-15 2.569449 4.978829" ] }, "execution_count": 22, @@ -3175,99 +3200,99 @@ " <tbody>\n", " <tr>\n", " <th>1951-01</th>\n", - " <td>127.478978</td>\n", - " <td>130.438207</td>\n", - " <td>133.726125</td>\n", - " <td>137.014043</td>\n", - " <td>139.973272</td>\n", + " <td>127.478982</td>\n", + " <td>130.438212</td>\n", + " <td>133.726132</td>\n", + " <td>137.014051</td>\n", + " <td>139.973281</td>\n", " </tr>\n", " <tr>\n", " <th>1951-02</th>\n", - " <td>137.638269</td>\n", - " <td>141.120013</td>\n", - " <td>144.988483</td>\n", - " <td>148.856953</td>\n", - " <td>152.338697</td>\n", + " <td>137.638273</td>\n", + " <td>141.120018</td>\n", + " <td>144.988491</td>\n", + " <td>148.856963</td>\n", + " <td>152.338709</td>\n", " </tr>\n", " <tr>\n", " <th>1951-03</th>\n", - " <td>152.276577</td>\n", - " <td>156.212063</td>\n", - " <td>160.584673</td>\n", - " <td>164.957283</td>\n", - " <td>168.892768</td>\n", + " <td>152.276580</td>\n", + " <td>156.212068</td>\n", + " <td>160.584681</td>\n", + " <td>164.957294</td>\n", + " <td>168.892782</td>\n", " </tr>\n", " <tr>\n", " <th>1951-04</th>\n", - " <td>143.405969</td>\n", - " <td>147.748037</td>\n", - " <td>152.572389</td>\n", - " <td>157.396741</td>\n", - " <td>161.738809</td>\n", + " <td>143.405971</td>\n", + " <td>147.748041</td>\n", + " <td>152.572397</td>\n", + " <td>157.396752</td>\n", + " <td>161.738823</td>\n", " </tr>\n", " <tr>\n", " <th>1951-05</th>\n", " <td>130.762062</td>\n", - " <td>135.475772</td>\n", - " <td>140.713045</td>\n", - " <td>145.950318</td>\n", - " <td>150.664027</td>\n", + " <td>135.475775</td>\n", + " <td>140.713052</td>\n", + " <td>145.950329</td>\n", + " <td>150.664042</td>\n", " </tr>\n", " <tr>\n", " <th>1951-06</th>\n", " <td>155.995358</td>\n", - " <td>161.053476</td>\n", - " <td>166.673412</td>\n", - " <td>172.293349</td>\n", - " <td>177.351467</td>\n", + " <td>161.053480</td>\n", + " <td>166.673421</td>\n", + " <td>172.293362</td>\n", + " <td>177.351484</td>\n", " </tr>\n", " <tr>\n", " <th>1951-07</th>\n", - " <td>170.413963</td>\n", - " <td>175.794490</td>\n", - " <td>181.772645</td>\n", - " <td>187.750801</td>\n", - " <td>193.131328</td>\n", + " <td>170.413964</td>\n", + " <td>175.794494</td>\n", + " <td>181.772655</td>\n", + " <td>187.750815</td>\n", + " <td>193.131346</td>\n", " </tr>\n", " <tr>\n", " <th>1951-08</th>\n", - " <td>169.718563</td>\n", - " <td>175.403241</td>\n", - " <td>181.719332</td>\n", - " <td>188.035423</td>\n", - " <td>193.720101</td>\n", + " <td>169.718562</td>\n", + " <td>175.403245</td>\n", + " <td>181.719341</td>\n", + " <td>188.035437</td>\n", + " <td>193.720120</td>\n", " </tr>\n", " <tr>\n", " <th>1951-09</th>\n", - " <td>154.000334</td>\n", - " <td>159.973698</td>\n", - " <td>166.610539</td>\n", - " <td>173.247380</td>\n", - " <td>179.220743</td>\n", + " <td>154.000332</td>\n", + " <td>159.973701</td>\n", + " <td>166.610547</td>\n", + " <td>173.247393</td>\n", + " <td>179.220762</td>\n", " </tr>\n", " <tr>\n", " <th>1951-10</th>\n", - " <td>132.362644</td>\n", - " <td>138.611370</td>\n", - " <td>145.554159</td>\n", - " <td>152.496947</td>\n", - " <td>158.745674</td>\n", + " <td>132.362640</td>\n", + " <td>138.611371</td>\n", + " <td>145.554166</td>\n", + " <td>152.496960</td>\n", + " <td>158.745692</td>\n", " </tr>\n", " <tr>\n", " <th>1951-11</th>\n", - " <td>113.464726</td>\n", + " <td>113.464721</td>\n", " <td>119.977182</td>\n", - " <td>127.212994</td>\n", - " <td>134.448805</td>\n", - " <td>140.961262</td>\n", + " <td>127.213000</td>\n", + " <td>134.448818</td>\n", + " <td>140.961280</td>\n", " </tr>\n", " <tr>\n", " <th>1951-12</th>\n", - " <td>129.690419</td>\n", + " <td>129.690414</td>\n", " <td>136.456333</td>\n", - " <td>143.973754</td>\n", - " <td>151.491176</td>\n", - " <td>158.257090</td>\n", + " <td>143.973761</td>\n", + " <td>151.491189</td>\n", + " <td>158.257109</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -3276,33 +3301,33 @@ "text/plain": [ " Number of airline passengers \\\n", " 0.10 0.25 0.50 0.75 \n", - "1951-01 127.478978 130.438207 133.726125 137.014043 \n", - "1951-02 137.638269 141.120013 144.988483 148.856953 \n", - "1951-03 152.276577 156.212063 160.584673 164.957283 \n", - "1951-04 143.405969 147.748037 152.572389 157.396741 \n", - "1951-05 130.762062 135.475772 140.713045 145.950318 \n", - "1951-06 155.995358 161.053476 166.673412 172.293349 \n", - "1951-07 170.413963 175.794490 181.772645 187.750801 \n", - "1951-08 169.718563 175.403241 181.719332 188.035423 \n", - "1951-09 154.000334 159.973698 166.610539 173.247380 \n", - "1951-10 132.362644 138.611370 145.554159 152.496947 \n", - "1951-11 113.464726 119.977182 127.212994 134.448805 \n", - "1951-12 129.690419 136.456333 143.973754 151.491176 \n", + "1951-01 127.478982 130.438212 133.726132 137.014051 \n", + "1951-02 137.638273 141.120018 144.988491 148.856963 \n", + "1951-03 152.276580 156.212068 160.584681 164.957294 \n", + "1951-04 143.405971 147.748041 152.572397 157.396752 \n", + "1951-05 130.762062 135.475775 140.713052 145.950329 \n", + "1951-06 155.995358 161.053480 166.673421 172.293362 \n", + "1951-07 170.413964 175.794494 181.772655 187.750815 \n", + "1951-08 169.718562 175.403245 181.719341 188.035437 \n", + "1951-09 154.000332 159.973701 166.610547 173.247393 \n", + "1951-10 132.362640 138.611371 145.554166 152.496960 \n", + "1951-11 113.464721 119.977182 127.213000 134.448818 \n", + "1951-12 129.690414 136.456333 143.973761 151.491189 \n", "\n", " \n", " 0.90 \n", - "1951-01 139.973272 \n", - "1951-02 152.338697 \n", - "1951-03 168.892768 \n", - "1951-04 161.738809 \n", - "1951-05 150.664027 \n", - "1951-06 177.351467 \n", - "1951-07 193.131328 \n", - "1951-08 193.720101 \n", - "1951-09 179.220743 \n", - "1951-10 158.745674 \n", - "1951-11 140.961262 \n", - "1951-12 158.257090 " + "1951-01 139.973281 \n", + "1951-02 152.338709 \n", + "1951-03 168.892782 \n", + "1951-04 161.738823 \n", + "1951-05 150.664042 \n", + "1951-06 177.351484 \n", + "1951-07 193.131346 \n", + "1951-08 193.720120 \n", + "1951-09 179.220762 \n", + "1951-10 158.745692 \n", + "1951-11 140.961280 \n", + "1951-12 158.257109 " ] }, "execution_count": 23, @@ -3350,9 +3375,9 @@ "text/plain": [ "0.10 2.706601\n", "0.25 5.494502\n", - "0.50 8.162435\n", - "0.75 8.003799\n", - "0.90 5.220249\n", + "0.50 8.162432\n", + "0.75 8.003790\n", + "0.90 5.220235\n", "Name: 0, dtype: float64" ] }, @@ -3416,98 +3441,98 @@ " <tr>\n", " <th>0</th>\n", " <td>1.752102</td>\n", - " <td>3.640448</td>\n", - " <td>5.636938</td>\n", - " <td>5.989468</td>\n", - " <td>4.524056</td>\n", + " <td>3.640447</td>\n", + " <td>5.636934</td>\n", + " <td>5.989462</td>\n", + " <td>4.524047</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1.236173</td>\n", - " <td>2.219997</td>\n", - " <td>2.505758</td>\n", - " <td>0.857285</td>\n", - " <td>0.233870</td>\n", + " <td>2.219995</td>\n", + " <td>2.505755</td>\n", + " <td>0.857278</td>\n", + " <td>0.233871</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2.572342</td>\n", - " <td>5.446984</td>\n", - " <td>8.707664</td>\n", - " <td>9.782038</td>\n", - " <td>8.196509</td>\n", + " <td>5.446983</td>\n", + " <td>8.707660</td>\n", + " <td>9.782030</td>\n", + " <td>8.196497</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1.959403</td>\n", - " <td>3.812991</td>\n", - " <td>5.213805</td>\n", - " <td>4.202444</td>\n", - " <td>1.135072</td>\n", + " <td>3.812990</td>\n", + " <td>5.213802</td>\n", + " <td>4.202436</td>\n", + " <td>1.135059</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>4.123794</td>\n", - " <td>9.131057</td>\n", - " <td>15.643478</td>\n", - " <td>19.537261</td>\n", - " <td>19.202375</td>\n", + " <td>9.131056</td>\n", + " <td>15.643474</td>\n", + " <td>19.537253</td>\n", + " <td>19.202362</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>2.200464</td>\n", - " <td>4.236631</td>\n", - " <td>5.663294</td>\n", - " <td>4.279988</td>\n", - " <td>0.583679</td>\n", + " <td>4.236630</td>\n", + " <td>5.663290</td>\n", + " <td>4.279979</td>\n", + " <td>0.583664</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>2.858604</td>\n", - " <td>5.801378</td>\n", - " <td>8.613677</td>\n", - " <td>8.436899</td>\n", - " <td>5.281805</td>\n", + " <td>5.801376</td>\n", + " <td>8.613673</td>\n", + " <td>8.436889</td>\n", + " <td>5.281789</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>2.928144</td>\n", - " <td>5.899190</td>\n", - " <td>8.640334</td>\n", - " <td>8.223433</td>\n", - " <td>4.751909</td>\n", + " <td>5.899189</td>\n", + " <td>8.640329</td>\n", + " <td>8.223422</td>\n", + " <td>4.751892</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>2.999967</td>\n", - " <td>6.006576</td>\n", - " <td>8.694731</td>\n", - " <td>8.064465</td>\n", - " <td>4.301331</td>\n", + " <td>6.006575</td>\n", + " <td>8.694726</td>\n", + " <td>8.064455</td>\n", + " <td>4.301314</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>2.963736</td>\n", " <td>5.847157</td>\n", - " <td>8.222921</td>\n", - " <td>7.127290</td>\n", - " <td>2.928894</td>\n", + " <td>8.222917</td>\n", + " <td>7.127280</td>\n", + " <td>2.928877</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", - " <td>3.253527</td>\n", + " <td>3.253528</td>\n", " <td>6.505704</td>\n", - " <td>9.393503</td>\n", - " <td>8.663396</td>\n", - " <td>4.534864</td>\n", + " <td>9.393500</td>\n", + " <td>8.663387</td>\n", + " <td>4.534848</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", - " <td>3.630958</td>\n", + " <td>3.630959</td>\n", " <td>7.385917</td>\n", - " <td>11.013123</td>\n", - " <td>10.881618</td>\n", - " <td>6.968619</td>\n", + " <td>11.013119</td>\n", + " <td>10.881608</td>\n", + " <td>6.968602</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -3515,18 +3540,18 @@ ], "text/plain": [ " 0.10 0.25 0.50 0.75 0.90\n", - "0 1.752102 3.640448 5.636938 5.989468 4.524056\n", - "1 1.236173 2.219997 2.505758 0.857285 0.233870\n", - "2 2.572342 5.446984 8.707664 9.782038 8.196509\n", - "3 1.959403 3.812991 5.213805 4.202444 1.135072\n", - "4 4.123794 9.131057 15.643478 19.537261 19.202375\n", - "5 2.200464 4.236631 5.663294 4.279988 0.583679\n", - "6 2.858604 5.801378 8.613677 8.436899 5.281805\n", - "7 2.928144 5.899190 8.640334 8.223433 4.751909\n", - "8 2.999967 6.006576 8.694731 8.064465 4.301331\n", - "9 2.963736 5.847157 8.222921 7.127290 2.928894\n", - "10 3.253527 6.505704 9.393503 8.663396 4.534864\n", - "11 3.630958 7.385917 11.013123 10.881618 6.968619" + "0 1.752102 3.640447 5.636934 5.989462 4.524047\n", + "1 1.236173 2.219995 2.505755 0.857278 0.233871\n", + "2 2.572342 5.446983 8.707660 9.782030 8.196497\n", + "3 1.959403 3.812990 5.213802 4.202436 1.135059\n", + "4 4.123794 9.131056 15.643474 19.537253 19.202362\n", + "5 2.200464 4.236630 5.663290 4.279979 0.583664\n", + "6 2.858604 5.801376 8.613673 8.436889 5.281789\n", + "7 2.928144 5.899189 8.640329 8.223422 4.751892\n", + "8 2.999967 6.006575 8.694726 8.064455 4.301314\n", + "9 2.963736 5.847157 8.222917 7.127280 2.928877\n", + "10 3.253528 6.505704 9.393500 8.663387 4.534848\n", + "11 3.630959 7.385917 11.013119 10.881608 6.968602" ] }, "execution_count": 25, @@ -3557,7 +3582,7 @@ { "data": { "text/plain": [ - "5.917517291566615" + "5.917511873790087" ] }, "execution_count": 26, @@ -3591,18 +3616,18 @@ { "data": { "text/plain": [ - "0 4.308602\n", - "1 1.410617\n", - "2 6.941107\n", - "3 3.264743\n", - "4 13.527593\n", - "5 3.392811\n", - "6 6.198473\n", - "7 6.088602\n", - "8 6.013414\n", - "9 5.417999\n", - "10 6.470199\n", - "11 7.976047\n", + "0 4.308598\n", + "1 1.410614\n", + "2 6.941102\n", + "3 3.264738\n", + "4 13.527588\n", + "5 3.392805\n", + "6 6.198466\n", + "7 6.088595\n", + "8 6.013407\n", + "9 5.417993\n", + "10 6.470193\n", + "11 7.976041\n", "dtype: float64" ] }, @@ -3690,63 +3715,63 @@ " <tbody>\n", " <tr>\n", " <th>1951-01</th>\n", - " <td>127.478978</td>\n", - " <td>139.973272</td>\n", + " <td>127.478982</td>\n", + " <td>139.973281</td>\n", " </tr>\n", " <tr>\n", " <th>1951-02</th>\n", - " <td>137.638269</td>\n", - " <td>152.338697</td>\n", + " <td>137.638273</td>\n", + " <td>152.338709</td>\n", " </tr>\n", " <tr>\n", " <th>1951-03</th>\n", - " <td>152.276577</td>\n", - " <td>168.892768</td>\n", + " <td>152.276580</td>\n", + " <td>168.892782</td>\n", " </tr>\n", " <tr>\n", " <th>1951-04</th>\n", - " <td>143.405969</td>\n", - " <td>161.738809</td>\n", + " <td>143.405971</td>\n", + " <td>161.738823</td>\n", " </tr>\n", " <tr>\n", " <th>1951-05</th>\n", " <td>130.762062</td>\n", - " <td>150.664027</td>\n", + " <td>150.664042</td>\n", " </tr>\n", " <tr>\n", " <th>1951-06</th>\n", " <td>155.995358</td>\n", - " <td>177.351467</td>\n", + " <td>177.351484</td>\n", " </tr>\n", " <tr>\n", " <th>1951-07</th>\n", - " <td>170.413963</td>\n", - " <td>193.131328</td>\n", + " <td>170.413964</td>\n", + " <td>193.131346</td>\n", " </tr>\n", " <tr>\n", " <th>1951-08</th>\n", - " <td>169.718563</td>\n", - " <td>193.720101</td>\n", + " <td>169.718562</td>\n", + " <td>193.720120</td>\n", " </tr>\n", " <tr>\n", " <th>1951-09</th>\n", - " <td>154.000334</td>\n", - " <td>179.220743</td>\n", + " <td>154.000332</td>\n", + " <td>179.220762</td>\n", " </tr>\n", " <tr>\n", " <th>1951-10</th>\n", - " <td>132.362644</td>\n", - " <td>158.745674</td>\n", + " <td>132.362640</td>\n", + " <td>158.745692</td>\n", " </tr>\n", " <tr>\n", " <th>1951-11</th>\n", - " <td>113.464726</td>\n", - " <td>140.961262</td>\n", + " <td>113.464721</td>\n", + " <td>140.961280</td>\n", " </tr>\n", " <tr>\n", " <th>1951-12</th>\n", - " <td>129.690419</td>\n", - " <td>158.257090</td>\n", + " <td>129.690414</td>\n", + " <td>158.257109</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -3756,18 +3781,18 @@ " Number of airline passengers \n", " 0.8 \n", " lower upper\n", - "1951-01 127.478978 139.973272\n", - "1951-02 137.638269 152.338697\n", - "1951-03 152.276577 168.892768\n", - "1951-04 143.405969 161.738809\n", - "1951-05 130.762062 150.664027\n", - "1951-06 155.995358 177.351467\n", - "1951-07 170.413963 193.131328\n", - "1951-08 169.718563 193.720101\n", - "1951-09 154.000334 179.220743\n", - "1951-10 132.362644 158.745674\n", - "1951-11 113.464726 140.961262\n", - "1951-12 129.690419 158.257090" + "1951-01 127.478982 139.973281\n", + "1951-02 137.638273 152.338709\n", + "1951-03 152.276580 168.892782\n", + "1951-04 143.405971 161.738823\n", + "1951-05 130.762062 150.664042\n", + "1951-06 155.995358 177.351484\n", + "1951-07 170.413964 193.131346\n", + "1951-08 169.718562 193.720120\n", + "1951-09 154.000332 179.220762\n", + "1951-10 132.362640 158.745692\n", + "1951-11 113.464721 140.961280\n", + "1951-12 129.690414 158.257109" ] }, "execution_count": 28, @@ -3798,7 +3823,7 @@ "data": { "text/plain": [ "0.1 2.706601\n", - "0.9 5.220249\n", + "0.9 5.220235\n", "Name: 0, dtype: float64" ] }, @@ -3820,7 +3845,7 @@ { "data": { "text/plain": [ - "3.9634248763342055" + "3.9634182197580174" ] }, "execution_count": 30, @@ -3869,7 +3894,7 @@ " <th></th>\n", " <th>test_PinballLoss</th>\n", " <th>fit_time</th>\n", - " <th>pred_time</th>\n", + " <th>pred_quantiles_time</th>\n", " <th>len_train_window</th>\n", " <th>cutoff</th>\n", " </tr>\n", @@ -3878,40 +3903,40 @@ " <tr>\n", " <th>0</th>\n", " <td>0.865788</td>\n", - " <td>0.013478</td>\n", - " <td>0.013374</td>\n", + " <td>0.004671</td>\n", + " <td>0.002910</td>\n", " <td>72</td>\n", " <td>1954-12</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>0.958340</td>\n", - " <td>0.011405</td>\n", - " <td>0.012645</td>\n", + " <td>0.003195</td>\n", + " <td>0.003149</td>\n", " <td>84</td>\n", " <td>1955-12</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>0.981744</td>\n", - " <td>0.012250</td>\n", - " <td>0.016469</td>\n", + " <td>0.003315</td>\n", + " <td>0.002972</td>\n", " <td>96</td>\n", " <td>1956-12</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1.411309</td>\n", - " <td>0.011964</td>\n", - " <td>0.011275</td>\n", + " <td>0.003242</td>\n", + " <td>0.003038</td>\n", " <td>108</td>\n", " <td>1957-12</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1.187198</td>\n", - " <td>0.012172</td>\n", - " <td>0.011219</td>\n", + " <td>0.003085</td>\n", + " <td>0.002992</td>\n", " <td>120</td>\n", " <td>1958-12</td>\n", " </tr>\n", @@ -3920,12 +3945,12 @@ "</div>" ], "text/plain": [ - " test_PinballLoss fit_time pred_time len_train_window cutoff\n", - "0 0.865788 0.013478 0.013374 72 1954-12\n", - "1 0.958340 0.011405 0.012645 84 1955-12\n", - "2 0.981744 0.012250 0.016469 96 1956-12\n", - "3 1.411309 0.011964 0.011275 108 1957-12\n", - "4 1.187198 0.012172 0.011219 120 1958-12" + " test_PinballLoss fit_time pred_quantiles_time len_train_window cutoff\n", + "0 0.865788 0.004671 0.002910 72 1954-12\n", + "1 0.958340 0.003195 0.003149 84 1955-12\n", + "2 0.981744 0.003315 0.002972 96 1956-12\n", + "3 1.411309 0.003242 0.003038 108 1957-12\n", + "4 1.187198 0.003085 0.002992 120 1958-12" ] }, "execution_count": 31, @@ -3984,6 +4009,49 @@ "contributions are appreciated!" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Visual Evaluation \n", + "\n", + "Often, the probabilistic forecast's calibration is important. I.e., how many values are smaller then the 0.1 quantile, 0.2 quantile, etc.\n", + "\n", + "This evaluation can be made using calibration plots:" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(<Figure size 1600x400 with 1 Axes>, <Axes: >)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sktime.utils.plotting import plot_calibration\n", + "\n", + "plot_calibration(y_true=y_test.loc[pred_quantiles.index], y_pred=pred_quantiles)" + ] + }, { "cell_type": "markdown", "id": "public-union", @@ -4027,7 +4095,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "id": "6abda0ed", "metadata": {}, "outputs": [ @@ -4037,7 +4105,7 @@ "False" ] }, - "execution_count": 32, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -4061,7 +4129,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "id": "60b0664a", "metadata": {}, "outputs": [ @@ -4071,7 +4139,7 @@ "True" ] }, - "execution_count": 33, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -4097,7 +4165,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 35, "id": "99e6bc89", "metadata": {}, "outputs": [ @@ -4137,18 +4205,18 @@ " <tbody>\n", " <tr>\n", " <th>1961-01</th>\n", - " <td>341.950660</td>\n", - " <td>522.049339</td>\n", + " <td>341.960792</td>\n", + " <td>522.039207</td>\n", " </tr>\n", " <tr>\n", " <th>1961-02</th>\n", - " <td>319.828426</td>\n", - " <td>544.171573</td>\n", + " <td>319.835453</td>\n", + " <td>544.164546</td>\n", " </tr>\n", " <tr>\n", " <th>1961-03</th>\n", - " <td>307.334366</td>\n", - " <td>556.665632</td>\n", + " <td>307.334056</td>\n", + " <td>556.665943</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -4158,12 +4226,12 @@ " Number of airline passengers \n", " 0.9 \n", " lower upper\n", - "1961-01 341.950660 522.049339\n", - "1961-02 319.828426 544.171573\n", - "1961-03 307.334366 556.665632" + "1961-01 341.960792 522.039207\n", + "1961-02 319.835453 544.164546\n", + "1961-03 307.334056 556.665943" ] }, - "execution_count": 34, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -4218,7 +4286,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 36, "id": "difficult-belarus", "metadata": {}, "outputs": [], @@ -4247,20 +4315,20 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 37, "id": "562b301e", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "<style>#sk-cd404a54-ceab-4e6f-957f-192d402ecaba {color: black;background-color: white;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba pre{padding: 0;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-toggleable {background-color: white;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba label.sk-toggleable__label-arrow:before {content: \"βΈ\";float: left;margin-right: 0.25em;color: #696969;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"βΎ\";}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-estimator:hover {background-color: #d4ebff;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-item {z-index: 1;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-parallel-item:only-child::after {width: 0;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-cd404a54-ceab-4e6f-957f-192d402ecaba div.sk-text-repr-fallback {display: none;}</style><div id='sk-cd404a54-ceab-4e6f-957f-192d402ecaba' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ForecastingGridSearchCV(cv=SlidingWindowSplitter(fh=[1, 2, 3],\n", + "<style>#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 {color: black;background-color: white;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 pre{padding: 0;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-toggleable {background-color: white;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 label.sk-toggleable__label-arrow:before {content: \"βΈ\";float: left;margin-right: 0.25em;color: #696969;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"βΎ\";}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-estimator:hover {background-color: #d4ebff;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-item {z-index: 1;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-parallel-item:only-child::after {width: 0;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935 div.sk-text-repr-fallback {display: none;}</style><div id='sk-6648d0d2-8b27-4abf-ad8b-a038b4f6e935' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ForecastingGridSearchCV(cv=SlidingWindowSplitter(fh=[1, 2, 3],\n", " window_length=36),\n", " forecaster=ThetaForecaster(),\n", - " param_grid={'sp': [1, 6, 12]}, scoring=PinballLoss())</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class='sk-label-container'><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('95124b26-981a-40b4-8d48-671c42998f34') type=\"checkbox\" ><label for=UUID('95124b26-981a-40b4-8d48-671c42998f34') class='sk-toggleable__label sk-toggleable__label-arrow'>ForecastingGridSearchCV</label><div class=\"sk-toggleable__content\"><pre>ForecastingGridSearchCV(cv=SlidingWindowSplitter(fh=[1, 2, 3],\n", + " param_grid={'sp': [1, 6, 12]}, scoring=PinballLoss())</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class='sk-label-container'><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('5eb0427b-8e34-4027-8226-bf4b7929717e') type=\"checkbox\" ><label for=UUID('5eb0427b-8e34-4027-8226-bf4b7929717e') class='sk-toggleable__label sk-toggleable__label-arrow'>ForecastingGridSearchCV</label><div class=\"sk-toggleable__content\"><pre>ForecastingGridSearchCV(cv=SlidingWindowSplitter(fh=[1, 2, 3],\n", " window_length=36),\n", " forecaster=ThetaForecaster(),\n", - " param_grid={'sp': [1, 6, 12]}, scoring=PinballLoss())</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('79234729-8ca2-4697-9e23-5d63111a1261') type=\"checkbox\" ><label for=UUID('79234729-8ca2-4697-9e23-5d63111a1261') class='sk-toggleable__label sk-toggleable__label-arrow'>SlidingWindowSplitter</label><div class=\"sk-toggleable__content\"><pre>SlidingWindowSplitter(fh=[1, 2, 3], window_length=36)</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('41954e69-e437-4eea-a4db-9d71aee1f9d9') type=\"checkbox\" ><label for=UUID('41954e69-e437-4eea-a4db-9d71aee1f9d9') class='sk-toggleable__label sk-toggleable__label-arrow'>ThetaForecaster</label><div class=\"sk-toggleable__content\"><pre>ThetaForecaster()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('512b4501-de7b-4f64-be99-62b0bef6117f') type=\"checkbox\" ><label for=UUID('512b4501-de7b-4f64-be99-62b0bef6117f') class='sk-toggleable__label sk-toggleable__label-arrow'>PinballLoss</label><div class=\"sk-toggleable__content\"><pre>PinballLoss()</pre></div></div></div></div></div></div></div></div></div></div>" + " param_grid={'sp': [1, 6, 12]}, scoring=PinballLoss())</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('21d23b04-8b92-4647-828b-a38d62f12de7') type=\"checkbox\" ><label for=UUID('21d23b04-8b92-4647-828b-a38d62f12de7') class='sk-toggleable__label sk-toggleable__label-arrow'>SlidingWindowSplitter</label><div class=\"sk-toggleable__content\"><pre>SlidingWindowSplitter(fh=[1, 2, 3], window_length=36)</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('50d9faf8-d515-4ab7-a4d0-f337413ecd96') type=\"checkbox\" ><label for=UUID('50d9faf8-d515-4ab7-a4d0-f337413ecd96') class='sk-toggleable__label sk-toggleable__label-arrow'>ThetaForecaster</label><div class=\"sk-toggleable__content\"><pre>ThetaForecaster()</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('32a30784-a4fc-472d-827d-c779193775f4') type=\"checkbox\" ><label for=UUID('32a30784-a4fc-472d-827d-c779193775f4') class='sk-toggleable__label sk-toggleable__label-arrow'>PinballLoss</label><div class=\"sk-toggleable__content\"><pre>PinballLoss()</pre></div></div></div></div></div></div></div></div></div></div>" ], "text/plain": [ "ForecastingGridSearchCV(cv=SlidingWindowSplitter(fh=[1, 2, 3],\n", @@ -4269,7 +4337,7 @@ " param_grid={'sp': [1, 6, 12]}, scoring=PinballLoss())" ] }, - "execution_count": 36, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -4292,7 +4360,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 38, "id": "comparative-sampling", "metadata": {}, "outputs": [ @@ -4302,7 +4370,7 @@ "{'sp': 12}" ] }, - "execution_count": 37, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -4321,7 +4389,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 39, "id": "133d779e", "metadata": {}, "outputs": [ @@ -4367,11 +4435,11 @@ " <tr>\n", " <th>1954-02</th>\n", " <td>195.638436</td>\n", - " <td>226.620354</td>\n", + " <td>226.620355</td>\n", " </tr>\n", " <tr>\n", " <th>1954-03</th>\n", - " <td>221.947953</td>\n", + " <td>221.947952</td>\n", " <td>256.967883</td>\n", " </tr>\n", " </tbody>\n", @@ -4383,11 +4451,11 @@ " 0.9 \n", " lower upper\n", "1954-01 190.832917 217.164705\n", - "1954-02 195.638436 226.620354\n", - "1954-03 221.947953 256.967883" + "1954-02 195.638436 226.620355\n", + "1954-03 221.947952 256.967883" ] }, - "execution_count": 38, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -4406,7 +4474,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 40, "id": "differential-growth", "metadata": {}, "outputs": [ @@ -4416,7 +4484,7 @@ "{'selected_forecaster': 'naive'}" ] }, - "execution_count": 39, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -4491,7 +4559,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 41, "id": "dcab36a8", "metadata": {}, "outputs": [], @@ -4505,7 +4573,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 42, "id": "610257c8", "metadata": {}, "outputs": [], @@ -4519,7 +4587,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 43, "id": "1c04ef29", "metadata": {}, "outputs": [ @@ -4607,7 +4675,7 @@ "1998Q1 4.501319 5.913611" ] }, - "execution_count": 42, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -4671,7 +4739,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 44, "id": "behavioral-anger", "metadata": {}, "outputs": [], @@ -4684,7 +4752,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 45, "id": "5b4f08da", "metadata": {}, "outputs": [], @@ -4695,7 +4763,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 46, "id": "underlying-australia", "metadata": {}, "outputs": [ @@ -4761,7 +4829,7 @@ "2010Q2 8.438112 10.337207" ] }, - "execution_count": 45, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -4781,7 +4849,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 47, "id": "8c47e57b", "metadata": {}, "outputs": [ @@ -4842,7 +4910,7 @@ "2010Q2 8.438112 10.337207" ] }, - "execution_count": 46, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } @@ -4861,7 +4929,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 48, "id": "07deb7ec", "metadata": {}, "outputs": [], @@ -4871,7 +4939,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 49, "id": "4c9e9228", "metadata": {}, "outputs": [ @@ -4937,7 +5005,7 @@ "2010Q2 8.438112 10.337207" ] }, - "execution_count": 48, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -4980,7 +5048,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 50, "id": "59af90ce", "metadata": {}, "outputs": [], @@ -5006,7 +5074,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 51, "id": "12cdfc8e", "metadata": {}, "outputs": [], @@ -5075,7 +5143,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/examples/01c_forecasting_hierarchical_global.ipynb b/examples/01c_forecasting_hierarchical_global.ipynb index b486a2e165b..63c84a79939 100644 --- a/examples/01c_forecasting_hierarchical_global.ipynb +++ b/examples/01c_forecasting_hierarchical_global.ipynb @@ -2717,7 +2717,7 @@ "* focus on modeling individual products\n", "* hierarchical information is provided as exgoneous information. \n", "\n", - "For the M5 competition, winning solution used exogeneous features about the hierarchies like `\"dept_id\"`, `\"store_id\"` etc. to capture similarities and dissimilarities of the products. Other features include holiday events and snap days (specific assisstance program of US social security paid on certain days)." + "For the M5 competition, winning solution used exogeneous features about the hierarchies like `\"dept_id\"`, `\"store_id\"` etc. to capture similarities and dissimilarities of the products. Other features include holiday events and snap days (specific assistance program of US social security paid on certain days)." ] }, { diff --git a/examples/AA_datatypes_and_datasets.ipynb b/examples/AA_datatypes_and_datasets.ipynb index 867016e0fb5..8a17bb5b923 100644 --- a/examples/AA_datatypes_and_datasets.ipynb +++ b/examples/AA_datatypes_and_datasets.ipynb @@ -3166,7 +3166,7 @@ } ], "source": [ - "df_panel = df_panel.set_index([\"timepoints\", \"level_0\"])\n", + "df_panel = df_panel.set_index([\"level_0\", \"timepoints\"])\n", "type(df_panel.index)" ] }, diff --git a/examples/blog_posts/Overview_of_sktime_functionalities_for_forecasting.ipynb b/examples/blog_posts/Overview_of_sktime_functionalities_for_forecasting.ipynb new file mode 100644 index 00000000000..6fc534b219b --- /dev/null +++ b/examples/blog_posts/Overview_of_sktime_functionalities_for_forecasting.ipynb @@ -0,0 +1,4084 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d7b47a21", + "metadata": { + "id": "d7b47a21" + }, + "source": [ + "# **Overview of sktime's forecasting functionality**\n", + "## by Joanna Lenczuk\n", + "\n", + "The code in this notebook is explained comprehensively in this article:\n", + "https://towardsdatascience.com/why-start-using-sktime-for-forecasting-8d6881c0a518" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bd82fa45-b5ab-456d-93a8-aa04b3fbb04b", + "metadata": { + "id": "bd82fa45-b5ab-456d-93a8-aa04b3fbb04b" + }, + "outputs": [], + "source": [ + "# %%capture\n", + "# !pip install numpy\n", + "# !pip install pandas\n", + "# !pip install seaborn\n", + "# !pip install 'sktime[all_extras]'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fb6368a7", + "metadata": { + "id": "fb6368a7", + "scrolled": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "\n", + "from sktime.forecasting.base import ForecastingHorizon\n", + "from sktime.forecasting.model_selection import temporal_train_test_split\n", + "from sktime.forecasting.arima import AutoARIMA\n", + "from sktime.forecasting.exp_smoothing import ExponentialSmoothing\n", + "from sktime.forecasting.ets import AutoETS\n", + "from sktime.forecasting.naive import NaiveForecaster\n", + "from sktime.forecasting.tbats import TBATS\n", + "from sktime.forecasting.theta import ThetaForecaster\n", + "from sktime.forecasting.compose import (\n", + " EnsembleForecaster,\n", + " MultiplexForecaster,\n", + " TransformedTargetForecaster,\n", + " make_reduction,\n", + " ForecastingPipeline,\n", + " DirectTabularRegressionForecaster\n", + ")\n", + "from sktime.forecasting.model_selection import (\n", + " SlidingWindowSplitter,\n", + " ExpandingWindowSplitter,\n", + " ForecastingGridSearchCV,\n", + " ForecastingRandomizedSearchCV)\n", + "from sktime.forecasting.model_evaluation import evaluate\n", + "from sktime.transformations.series.detrend import Detrender, Deseasonalizer\n", + "from sktime.transformations.series.boxcox import BoxCoxTransformer, LogTransformer\n", + "from sktime.transformations.series.outlier_detection import HampelFilter\n", + "from sktime.transformations.compose import OptionalPassthrough\n", + "from sktime.performance_metrics.forecasting import (\n", + " mean_absolute_scaled_error,\n", + " median_absolute_percentage_error,\n", + " make_forecasting_scorer,\n", + " MeanAbsolutePercentageError,\n", + " MedianAbsolutePercentageError)\n", + "from sktime.utils.plotting import plot_series" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ae03dede-08f1-404b-bd41-a68381ed34d6", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + }, + "id": "ae03dede-08f1-404b-bd41-a68381ed34d6", + "outputId": "9ab4377a-cfc7-43e8-ad03-49a379c5eab9" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'0.24.1'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import sktime\n", + "sktime.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6be7e172", + "metadata": { + "id": "6be7e172" + }, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f3acc607", + "metadata": { + "id": "f3acc607" + }, + "outputs": [], + "source": [ + "sns.set(rc={'figure.figsize':(12,6)})" + ] + }, + { + "cell_type": "markdown", + "id": "22d3683b", + "metadata": { + "id": "22d3683b" + }, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "59029473", + "metadata": { + "id": "59029473" + }, + "outputs": [], + "source": [ + "df = sm.datasets.macrodata.load_pandas()['data']" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "31a71346", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 214 + }, + "id": "31a71346", + "outputId": "6cb4d182-93cf-4cac-ff52-6aed528cb181" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " <div id=\"df-ed3371a3-71a8-4953-84ff-ec4c49f66ebb\" class=\"colab-df-container\">\n", + " <div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>year</th>\n", + " <th>quarter</th>\n", + " <th>realgdp</th>\n", + " <th>realcons</th>\n", + " <th>realinv</th>\n", + " <th>realgovt</th>\n", + " <th>realdpi</th>\n", + " <th>cpi</th>\n", + " <th>m1</th>\n", + " <th>tbilrate</th>\n", + " <th>unemp</th>\n", + " <th>pop</th>\n", + " <th>infl</th>\n", + " <th>realint</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1959.0</td>\n", + " <td>1.0</td>\n", + " <td>2710.349</td>\n", + " <td>1707.4</td>\n", + " <td>286.898</td>\n", + " <td>470.045</td>\n", + " <td>1886.9</td>\n", + " <td>28.98</td>\n", + " <td>139.7</td>\n", + " <td>2.82</td>\n", + " <td>5.8</td>\n", + " <td>177.146</td>\n", + " <td>0.00</td>\n", + " <td>0.00</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1959.0</td>\n", + " <td>2.0</td>\n", + " <td>2778.801</td>\n", + " <td>1733.7</td>\n", + " <td>310.859</td>\n", + " <td>481.301</td>\n", + " <td>1919.7</td>\n", + " <td>29.15</td>\n", + " <td>141.7</td>\n", + " <td>3.08</td>\n", + " <td>5.1</td>\n", + " <td>177.830</td>\n", + " <td>2.34</td>\n", + " <td>0.74</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>1959.0</td>\n", + " <td>3.0</td>\n", + " <td>2775.488</td>\n", + " <td>1751.8</td>\n", + " <td>289.226</td>\n", + " <td>491.260</td>\n", + " <td>1916.4</td>\n", + " <td>29.35</td>\n", + " <td>140.5</td>\n", + " <td>3.82</td>\n", + " <td>5.3</td>\n", + " <td>178.657</td>\n", + " <td>2.74</td>\n", + " <td>1.09</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>1959.0</td>\n", + " <td>4.0</td>\n", + " <td>2785.204</td>\n", + " <td>1753.7</td>\n", + " <td>299.356</td>\n", + " <td>484.052</td>\n", + " <td>1931.3</td>\n", + " <td>29.37</td>\n", + " <td>140.0</td>\n", + " <td>4.33</td>\n", + " <td>5.6</td>\n", + " <td>179.386</td>\n", + " <td>0.27</td>\n", + " <td>4.06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>1960.0</td>\n", + " <td>1.0</td>\n", + " <td>2847.699</td>\n", + " <td>1770.5</td>\n", + " <td>331.722</td>\n", + " <td>462.199</td>\n", + " <td>1955.5</td>\n", + " <td>29.54</td>\n", + " <td>139.6</td>\n", + " <td>3.50</td>\n", + " <td>5.2</td>\n", + " <td>180.007</td>\n", + " <td>2.31</td>\n", + " <td>1.19</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>\n", + " <div class=\"colab-df-buttons\">\n", + "\n", + " <div class=\"colab-df-container\">\n", + " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ed3371a3-71a8-4953-84ff-ec4c49f66ebb')\"\n", + " title=\"Convert this dataframe to an interactive table.\"\n", + " style=\"display:none;\">\n", + "\n", + " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", + " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", + " </svg>\n", + " </button>\n", + "\n", + " <style>\n", + " .colab-df-container {\n", + " display:flex;\n", + " gap: 12px;\n", + " }\n", + "\n", + " .colab-df-convert {\n", + " background-color: #E8F0FE;\n", + " border: none;\n", + " border-radius: 50%;\n", + " cursor: pointer;\n", + " display: none;\n", + " fill: #1967D2;\n", + " height: 32px;\n", + " padding: 0 0 0 0;\n", + " width: 32px;\n", + " }\n", + "\n", + " .colab-df-convert:hover {\n", + " background-color: #E2EBFA;\n", + " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", + " fill: #174EA6;\n", + " }\n", + "\n", + " .colab-df-buttons div {\n", + " margin-bottom: 4px;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert {\n", + " background-color: #3B4455;\n", + " fill: #D2E3FC;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert:hover {\n", + " background-color: #434B5C;\n", + " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", + " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", + " fill: #FFFFFF;\n", + " }\n", + " </style>\n", + "\n", + " <script>\n", + " const buttonEl =\n", + " document.querySelector('#df-ed3371a3-71a8-4953-84ff-ec4c49f66ebb button.colab-df-convert');\n", + " buttonEl.style.display =\n", + " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", + "\n", + " async function convertToInteractive(key) {\n", + " const element = document.querySelector('#df-ed3371a3-71a8-4953-84ff-ec4c49f66ebb');\n", + " const dataTable =\n", + " await google.colab.kernel.invokeFunction('convertToInteractive',\n", + " [key], {});\n", + " if (!dataTable) return;\n", + "\n", + " const docLinkHtml = 'Like what you see? Visit the ' +\n", + " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", + " + ' to learn more about interactive tables.';\n", + " element.innerHTML = '';\n", + " dataTable['output_type'] = 'display_data';\n", + " await google.colab.output.renderOutput(dataTable, element);\n", + " const docLink = document.createElement('div');\n", + " docLink.innerHTML = docLinkHtml;\n", + " element.appendChild(docLink);\n", + " }\n", + " </script>\n", + " </div>\n", + "\n", + "\n", + "<div id=\"df-34af8841-7076-498a-9e0b-c0e2357031b6\">\n", + " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-34af8841-7076-498a-9e0b-c0e2357031b6')\"\n", + " title=\"Suggest charts\"\n", + " style=\"display:none;\">\n", + "\n", + "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", + " width=\"24px\">\n", + " <g>\n", + " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", + " </g>\n", + "</svg>\n", + " </button>\n", + "\n", + "<style>\n", + " .colab-df-quickchart {\n", + " --bg-color: #E8F0FE;\n", + " --fill-color: #1967D2;\n", + " --hover-bg-color: #E2EBFA;\n", + " --hover-fill-color: #174EA6;\n", + " --disabled-fill-color: #AAA;\n", + " --disabled-bg-color: #DDD;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-quickchart {\n", + " --bg-color: #3B4455;\n", + " --fill-color: #D2E3FC;\n", + " --hover-bg-color: #434B5C;\n", + " --hover-fill-color: #FFFFFF;\n", + " --disabled-bg-color: #3B4455;\n", + " --disabled-fill-color: #666;\n", + " }\n", + "\n", + " .colab-df-quickchart {\n", + " background-color: var(--bg-color);\n", + " border: none;\n", + " border-radius: 50%;\n", + " cursor: pointer;\n", + " display: none;\n", + " fill: var(--fill-color);\n", + " height: 32px;\n", + " padding: 0;\n", + " width: 32px;\n", + " }\n", + "\n", + " .colab-df-quickchart:hover {\n", + " background-color: var(--hover-bg-color);\n", + " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", + " fill: var(--button-hover-fill-color);\n", + " }\n", + "\n", + " .colab-df-quickchart-complete:disabled,\n", + " .colab-df-quickchart-complete:disabled:hover {\n", + " background-color: var(--disabled-bg-color);\n", + " fill: var(--disabled-fill-color);\n", + " box-shadow: none;\n", + " }\n", + "\n", + " .colab-df-spinner {\n", + " border: 2px solid var(--fill-color);\n", + " border-color: transparent;\n", + " border-bottom-color: var(--fill-color);\n", + " animation:\n", + " spin 1s steps(1) infinite;\n", + " }\n", + "\n", + " @keyframes spin {\n", + " 0% {\n", + " border-color: transparent;\n", + " border-bottom-color: var(--fill-color);\n", + " border-left-color: var(--fill-color);\n", + " }\n", + " 20% {\n", + " border-color: transparent;\n", + " border-left-color: var(--fill-color);\n", + " border-top-color: var(--fill-color);\n", + " }\n", + " 30% {\n", + " border-color: transparent;\n", + " border-left-color: var(--fill-color);\n", + " border-top-color: var(--fill-color);\n", + " border-right-color: var(--fill-color);\n", + " }\n", + " 40% {\n", + " border-color: transparent;\n", + " border-right-color: var(--fill-color);\n", + " border-top-color: var(--fill-color);\n", + " }\n", + " 60% {\n", + " border-color: transparent;\n", + " border-right-color: var(--fill-color);\n", + " }\n", + " 80% {\n", + " border-color: transparent;\n", + " border-right-color: var(--fill-color);\n", + " border-bottom-color: var(--fill-color);\n", + " }\n", + " 90% {\n", + " border-color: transparent;\n", + " border-bottom-color: var(--fill-color);\n", + " }\n", + " }\n", + "</style>\n", + "\n", + " <script>\n", + " async function quickchart(key) {\n", + " const quickchartButtonEl =\n", + " document.querySelector('#' + key + ' button');\n", + " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", + " quickchartButtonEl.classList.add('colab-df-spinner');\n", + " try {\n", + " const charts = await google.colab.kernel.invokeFunction(\n", + " 'suggestCharts', [key], {});\n", + " } catch (error) {\n", + " console.error('Error during call to suggestCharts:', error);\n", + " }\n", + " quickchartButtonEl.classList.remove('colab-df-spinner');\n", + " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", + " }\n", + " (() => {\n", + " let quickchartButtonEl =\n", + " document.querySelector('#df-34af8841-7076-498a-9e0b-c0e2357031b6 button');\n", + " quickchartButtonEl.style.display =\n", + " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", + " })();\n", + " </script>\n", + "</div>\n", + " </div>\n", + " </div>\n" + ], + "text/plain": [ + " year quarter realgdp realcons realinv realgovt realdpi cpi \\\n", + "0 1959.0 1.0 2710.349 1707.4 286.898 470.045 1886.9 28.98 \n", + "1 1959.0 2.0 2778.801 1733.7 310.859 481.301 1919.7 29.15 \n", + "2 1959.0 3.0 2775.488 1751.8 289.226 491.260 1916.4 29.35 \n", + "3 1959.0 4.0 2785.204 1753.7 299.356 484.052 1931.3 29.37 \n", + "4 1960.0 1.0 2847.699 1770.5 331.722 462.199 1955.5 29.54 \n", + "\n", + " m1 tbilrate unemp pop infl realint \n", + "0 139.7 2.82 5.8 177.146 0.00 0.00 \n", + "1 141.7 3.08 5.1 177.830 2.34 0.74 \n", + "2 140.5 3.82 5.3 178.657 2.74 1.09 \n", + "3 140.0 4.33 5.6 179.386 0.27 4.06 \n", + "4 139.6 3.50 5.2 180.007 2.31 1.19 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "cdcd575a", + "metadata": { + "id": "cdcd575a" + }, + "source": [ + "### Data overview\n", + "\n", + "Dataset contains USA quarterly macroeconomic data between 1959 and 2009.\n", + "\n", + "**Target variable - `realgdp`**" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "85619169", + "metadata": { + "id": "85619169" + }, + "outputs": [], + "source": [ + "df['q_date'] = df.apply(lambda x: str(int(x['year'])) + \"-\" + 'Q'+ str(int(x['quarter'])), axis=1)\n", + "df['date'] = pd.PeriodIndex(df['q_date'], freq='Q').to_timestamp()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d9529063", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 214 + }, + "id": "d9529063", + "outputId": "856234d4-5d6a-46bb-de59-091f49ea4b98" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " <div id=\"df-77b0ca1d-7761-4c8c-981e-2efe12d93883\" class=\"colab-df-container\">\n", + " <div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>year</th>\n", + " <th>quarter</th>\n", + " <th>realgdp</th>\n", + " <th>realcons</th>\n", + " <th>realinv</th>\n", + " <th>realgovt</th>\n", + " <th>realdpi</th>\n", + " <th>cpi</th>\n", + " <th>m1</th>\n", + " <th>tbilrate</th>\n", + " <th>unemp</th>\n", + " <th>pop</th>\n", + " <th>infl</th>\n", + " <th>realint</th>\n", + " <th>q_date</th>\n", + " <th>date</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1959.0</td>\n", + " <td>1.0</td>\n", + " <td>2710.349</td>\n", + " <td>1707.4</td>\n", + " <td>286.898</td>\n", + " <td>470.045</td>\n", + " <td>1886.9</td>\n", + " <td>28.98</td>\n", + " <td>139.7</td>\n", + " <td>2.82</td>\n", + " <td>5.8</td>\n", + " <td>177.146</td>\n", + " <td>0.00</td>\n", + " <td>0.00</td>\n", + " <td>1959-Q1</td>\n", + " <td>1959-01-01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1959.0</td>\n", + " <td>2.0</td>\n", + " <td>2778.801</td>\n", + " <td>1733.7</td>\n", + " <td>310.859</td>\n", + " <td>481.301</td>\n", + " <td>1919.7</td>\n", + " <td>29.15</td>\n", + " <td>141.7</td>\n", + " <td>3.08</td>\n", + " <td>5.1</td>\n", + " <td>177.830</td>\n", + " <td>2.34</td>\n", + " <td>0.74</td>\n", + " <td>1959-Q2</td>\n", + " <td>1959-04-01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>1959.0</td>\n", + " <td>3.0</td>\n", + " <td>2775.488</td>\n", + " <td>1751.8</td>\n", + " <td>289.226</td>\n", + " <td>491.260</td>\n", + " <td>1916.4</td>\n", + " <td>29.35</td>\n", + " <td>140.5</td>\n", + " <td>3.82</td>\n", + " <td>5.3</td>\n", + " <td>178.657</td>\n", + " <td>2.74</td>\n", + " <td>1.09</td>\n", + " <td>1959-Q3</td>\n", + " <td>1959-07-01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>1959.0</td>\n", + " <td>4.0</td>\n", + " <td>2785.204</td>\n", + " <td>1753.7</td>\n", + " <td>299.356</td>\n", + " <td>484.052</td>\n", + " <td>1931.3</td>\n", + " <td>29.37</td>\n", + " <td>140.0</td>\n", + " <td>4.33</td>\n", + " <td>5.6</td>\n", + " <td>179.386</td>\n", + " <td>0.27</td>\n", + " <td>4.06</td>\n", + " <td>1959-Q4</td>\n", + " <td>1959-10-01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>1960.0</td>\n", + " <td>1.0</td>\n", + " <td>2847.699</td>\n", + " <td>1770.5</td>\n", + " <td>331.722</td>\n", + " <td>462.199</td>\n", + " <td>1955.5</td>\n", + " <td>29.54</td>\n", + " <td>139.6</td>\n", + " <td>3.50</td>\n", + " <td>5.2</td>\n", + " <td>180.007</td>\n", + " <td>2.31</td>\n", + " <td>1.19</td>\n", + " <td>1960-Q1</td>\n", + " <td>1960-01-01</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>\n", + " <div class=\"colab-df-buttons\">\n", + "\n", + " <div class=\"colab-df-container\">\n", + " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-77b0ca1d-7761-4c8c-981e-2efe12d93883')\"\n", + " title=\"Convert this dataframe to an interactive table.\"\n", + " style=\"display:none;\">\n", + "\n", + " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", + " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", + " </svg>\n", + " </button>\n", + "\n", + " <style>\n", + " .colab-df-container {\n", + " display:flex;\n", + " gap: 12px;\n", + " }\n", + "\n", + " .colab-df-convert {\n", + " background-color: #E8F0FE;\n", + " border: none;\n", + " border-radius: 50%;\n", + " cursor: pointer;\n", + " display: none;\n", + " fill: #1967D2;\n", + " height: 32px;\n", + " padding: 0 0 0 0;\n", + " width: 32px;\n", + " }\n", + "\n", + " .colab-df-convert:hover {\n", + " background-color: #E2EBFA;\n", + " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", + " fill: #174EA6;\n", + " }\n", + "\n", + " .colab-df-buttons div {\n", + " margin-bottom: 4px;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert {\n", + " background-color: #3B4455;\n", + " fill: #D2E3FC;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert:hover {\n", + " background-color: #434B5C;\n", + " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", + " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", + " fill: #FFFFFF;\n", + " }\n", + " </style>\n", + "\n", + " <script>\n", + " const buttonEl =\n", + " document.querySelector('#df-77b0ca1d-7761-4c8c-981e-2efe12d93883 button.colab-df-convert');\n", + " buttonEl.style.display =\n", + " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", + "\n", + " async function convertToInteractive(key) {\n", + " const element = document.querySelector('#df-77b0ca1d-7761-4c8c-981e-2efe12d93883');\n", + " const dataTable =\n", + " await google.colab.kernel.invokeFunction('convertToInteractive',\n", + " [key], {});\n", + " if (!dataTable) return;\n", + "\n", + " const docLinkHtml = 'Like what you see? Visit the ' +\n", + " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", + " + ' to learn more about interactive tables.';\n", + " element.innerHTML = '';\n", + " dataTable['output_type'] = 'display_data';\n", + " await google.colab.output.renderOutput(dataTable, element);\n", + " const docLink = document.createElement('div');\n", + " docLink.innerHTML = docLinkHtml;\n", + " element.appendChild(docLink);\n", + " }\n", + " </script>\n", + " </div>\n", + "\n", + "\n", + "<div id=\"df-0d62868d-3511-4048-98b6-b0d1d4cd3e1b\">\n", + " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-0d62868d-3511-4048-98b6-b0d1d4cd3e1b')\"\n", + " title=\"Suggest charts\"\n", + " style=\"display:none;\">\n", + "\n", + "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", + " width=\"24px\">\n", + " <g>\n", + " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", + " </g>\n", + "</svg>\n", + " </button>\n", + "\n", + "<style>\n", + " .colab-df-quickchart {\n", + " --bg-color: #E8F0FE;\n", + " --fill-color: #1967D2;\n", + " --hover-bg-color: #E2EBFA;\n", + " --hover-fill-color: #174EA6;\n", + " --disabled-fill-color: #AAA;\n", + " --disabled-bg-color: #DDD;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-quickchart {\n", + " --bg-color: #3B4455;\n", + " --fill-color: #D2E3FC;\n", + " --hover-bg-color: #434B5C;\n", + " --hover-fill-color: #FFFFFF;\n", + " --disabled-bg-color: #3B4455;\n", + " --disabled-fill-color: #666;\n", + " }\n", + "\n", + " .colab-df-quickchart {\n", + " background-color: var(--bg-color);\n", + " border: none;\n", + " border-radius: 50%;\n", + " cursor: pointer;\n", + " display: none;\n", + " fill: var(--fill-color);\n", + " height: 32px;\n", + " padding: 0;\n", + " width: 32px;\n", + " }\n", + "\n", + " .colab-df-quickchart:hover {\n", + " background-color: var(--hover-bg-color);\n", + " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", + " fill: var(--button-hover-fill-color);\n", + " }\n", + "\n", + " .colab-df-quickchart-complete:disabled,\n", + " .colab-df-quickchart-complete:disabled:hover {\n", + " background-color: var(--disabled-bg-color);\n", + " fill: var(--disabled-fill-color);\n", + " box-shadow: none;\n", + " }\n", + "\n", + " .colab-df-spinner {\n", + " border: 2px solid var(--fill-color);\n", + " border-color: transparent;\n", + " border-bottom-color: var(--fill-color);\n", + " animation:\n", + " spin 1s steps(1) infinite;\n", + " }\n", + "\n", + " @keyframes spin {\n", + " 0% {\n", + " border-color: transparent;\n", + " border-bottom-color: var(--fill-color);\n", + " border-left-color: var(--fill-color);\n", + " }\n", + " 20% {\n", + " border-color: transparent;\n", + " border-left-color: var(--fill-color);\n", + " border-top-color: var(--fill-color);\n", + " }\n", + " 30% {\n", + " border-color: transparent;\n", + " border-left-color: var(--fill-color);\n", + " border-top-color: var(--fill-color);\n", + " border-right-color: var(--fill-color);\n", + " }\n", + " 40% {\n", + " border-color: transparent;\n", + " border-right-color: var(--fill-color);\n", + " border-top-color: var(--fill-color);\n", + " }\n", + " 60% {\n", + " border-color: transparent;\n", + " border-right-color: var(--fill-color);\n", + " }\n", + " 80% {\n", + " border-color: transparent;\n", + " border-right-color: var(--fill-color);\n", + " border-bottom-color: var(--fill-color);\n", + " }\n", + " 90% {\n", + " border-color: transparent;\n", + " border-bottom-color: var(--fill-color);\n", + " }\n", + " }\n", + "</style>\n", + "\n", + " <script>\n", + " async function quickchart(key) {\n", + " const quickchartButtonEl =\n", + " document.querySelector('#' + key + ' button');\n", + " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", + " quickchartButtonEl.classList.add('colab-df-spinner');\n", + " try {\n", + " const charts = await google.colab.kernel.invokeFunction(\n", + " 'suggestCharts', [key], {});\n", + " } catch (error) {\n", + " console.error('Error during call to suggestCharts:', error);\n", + " }\n", + " quickchartButtonEl.classList.remove('colab-df-spinner');\n", + " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", + " }\n", + " (() => {\n", + " let quickchartButtonEl =\n", + " document.querySelector('#df-0d62868d-3511-4048-98b6-b0d1d4cd3e1b button');\n", + " quickchartButtonEl.style.display =\n", + " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", + " })();\n", + " </script>\n", + "</div>\n", + " </div>\n", + " </div>\n" + ], + "text/plain": [ + " year quarter realgdp realcons realinv realgovt realdpi cpi \\\n", + "0 1959.0 1.0 2710.349 1707.4 286.898 470.045 1886.9 28.98 \n", + "1 1959.0 2.0 2778.801 1733.7 310.859 481.301 1919.7 29.15 \n", + "2 1959.0 3.0 2775.488 1751.8 289.226 491.260 1916.4 29.35 \n", + "3 1959.0 4.0 2785.204 1753.7 299.356 484.052 1931.3 29.37 \n", + "4 1960.0 1.0 2847.699 1770.5 331.722 462.199 1955.5 29.54 \n", + "\n", + " m1 tbilrate unemp pop infl realint q_date date \n", + "0 139.7 2.82 5.8 177.146 0.00 0.00 1959-Q1 1959-01-01 \n", + "1 141.7 3.08 5.1 177.830 2.34 0.74 1959-Q2 1959-04-01 \n", + "2 140.5 3.82 5.3 178.657 2.74 1.09 1959-Q3 1959-07-01 \n", + "3 140.0 4.33 5.6 179.386 0.27 4.06 1959-Q4 1959-10-01 \n", + "4 139.6 3.50 5.2 180.007 2.31 1.19 1960-Q1 1960-01-01 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b53d0557", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 530 + }, + "id": "b53d0557", + "outputId": "dfb17682-8a89-43c1-953a-b9c2319ff749" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1200x600 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "for val in df.columns[2:14]:\n", + " plt.plot(df.date, df[val], label = val)\n", + "plt.legend();" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c89a7230-33f7-42fa-9f31-90f6f38448a9", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "c89a7230-33f7-42fa-9f31-90f6f38448a9", + "outputId": "29f889a3-72ff-4c97-f7ce-2fd0235d826c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['year', 'quarter', 'realgdp', 'realcons', 'realinv', 'realgovt',\n", + " 'realdpi', 'cpi', 'm1', 'tbilrate', 'unemp', 'pop', 'infl', 'realint',\n", + " 'q_date', 'date'],\n", + " dtype='object')" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8b17033d", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8b17033d", + "outputId": "39adfd46-b976-4ebe-b9d1-bc3901b35d9f", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',\n", + " '1960Q3', '1960Q4', '1961Q1', '1961Q2',\n", + " ...\n", + " '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',\n", + " '2008Q4', '2009Q1', '2009Q2', '2009Q3'],\n", + " dtype='period[Q-DEC]', name='date', length=203)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.set_index('date').to_period(\"Q\")\n", + "df.index" + ] + }, + { + "cell_type": "markdown", + "id": "746efe42", + "metadata": { + "id": "746efe42" + }, + "source": [ + "## 1. Unified API" + ] + }, + { + "cell_type": "markdown", + "id": "632599a2", + "metadata": { + "id": "632599a2" + }, + "source": [ + "### Basic forecasters interface" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "95bbf1c7", + "metadata": { + "id": "95bbf1c7" + }, + "outputs": [], + "source": [ + "y = df['realgdp']\n", + "y_train, y_test = temporal_train_test_split(y, test_size=30)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "59a99374", + "metadata": { + "id": "59a99374" + }, + "outputs": [], + "source": [ + "forecaster = NaiveForecaster()\n", + "forecaster.fit(y_train)\n", + "y_pred = forecaster.predict(fh=1) #forecasting one step ahead" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "831cf6a8", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "831cf6a8", + "outputId": "2ecaca4b-4109-4a19-a61a-b2101051c3da", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2002Q2 11477.868\n", + "Freq: Q-DEC, Name: realgdp, dtype: float64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred" + ] + }, + { + "cell_type": "markdown", + "id": "28d6351b", + "metadata": { + "id": "28d6351b" + }, + "source": [ + "### Forecasting horizons" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "47684b1e", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "47684b1e", + "outputId": "4f72af3c-7bbd-4887-e231-b1b699ef774e", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "ForecastingHorizon(['2002Q2', '2002Q3', '2002Q4', '2003Q1', '2003Q2', '2003Q3',\n", + " '2003Q4', '2004Q1', '2004Q2', '2004Q3', '2004Q4', '2005Q1',\n", + " '2005Q2', '2005Q3', '2005Q4', '2006Q1', '2006Q2', '2006Q3',\n", + " '2006Q4', '2007Q1', '2007Q2', '2007Q3', '2007Q4', '2008Q1',\n", + " '2008Q2', '2008Q3', '2008Q4', '2009Q1', '2009Q2', '2009Q3'],\n", + " dtype='period[Q-DEC]', name='date', is_relative=False)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fh_abs = ForecastingHorizon(y_test.index, is_relative=False) #specific data points\n", + "fh_abs" + ] + }, + { + "cell_type": "markdown", + "id": "63ed7cf0", + "metadata": { + "id": "63ed7cf0" + }, + "source": [ + "**Forecasts on the test set**" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "69dfa56d", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "69dfa56d", + "outputId": "50500432-938a-4d1a-e765-7d9f9d89d56a", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "ForecastingHorizon([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", + " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],\n", + " dtype='int64', is_relative=True)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#cutoff is the last point in the training series\n", + "cutoff = pd.Period(\"2002-01-01\", freq=\"Q\")\n", + "\n", + "#forecasting 1, 2, ..., 30 steps ahead of the last point in the training series\n", + "fh_rel = fh_abs.to_relative(cutoff)\n", + "fh_rel" + ] + }, + { + "cell_type": "markdown", + "id": "e8206c7c", + "metadata": { + "id": "e8206c7c" + }, + "source": [ + "**In-sample forecasts**" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "d8ec488c", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "d8ec488c", + "outputId": "9b7210f5-910d-44c4-e29e-2a62302dd447" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "ForecastingHorizon([-29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17,\n", + " -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4,\n", + " -3, -2, -1, 0],\n", + " dtype='int64', is_relative=True)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#cutoff is 29 quarters after the last point in the training series\n", + "cutoff_insample = pd.Period(\"2009-09-01\", freq=\"Q\")\n", + "\n", + "#forecasting 0, 1, ..., 29 steps before the last point in the training series\n", + "fh_rel_insample = fh_abs.to_relative(cutoff_insample)\n", + "fh_rel_insample" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "e724f098", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 346 + }, + "id": "e724f098", + "outputId": "32ca1c27-bbcc-4962-d733-adad9ee2adb2" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "forecaster = NaiveForecaster()\n", + "forecaster.fit(y_train)\n", + "y_pred = forecaster.predict(fh=fh_rel)\n", + "y_pred_insample = forecaster.predict(fh=fh_rel_insample)\n", + "\n", + "plot_series(y_train, y_test, y_pred, y_pred_insample, labels=[\"y_train\", \"y_test\", \"y_pred\", \"y_pred_insample\"]);" + ] + }, + { + "cell_type": "markdown", + "id": "7b7017ad", + "metadata": { + "id": "7b7017ad" + }, + "source": [ + "**Forward forecasts**" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "5032d756", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5032d756", + "outputId": "a8111b9e-c7b6-48b7-b4d8-9f0a2e89c3e9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "ForecastingHorizon([13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,\n", + " 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42],\n", + " dtype='int64', is_relative=True)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#cutoff is 12 quarters before the last point in the training series\n", + "cutoff_forward = pd.Period(\"1999-01-01\", freq=\"Q\")\n", + "\n", + "#forecasting 13, 14, ..., 42 steps after the last point in the training series\n", + "fh_rel_forward = fh_abs.to_relative(cutoff_forward)\n", + "fh_rel_forward" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "acdb1926", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 346 + }, + "id": "acdb1926", + "outputId": "b0f31fc8-b743-4804-8647-18fddc59ee12" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "forecaster = NaiveForecaster()\n", + "forecaster.fit(y_train)\n", + "y_pred = forecaster.predict(fh=fh_rel)\n", + "y_pred_forward = forecaster.predict(fh=fh_rel_forward)\n", + "\n", + "plot_series(y_train, y_test, y_pred,y_pred_forward, labels=[\"y_train\", \"y_test\", \"y_pred\", \"y_pred_forward\"]);" + ] + }, + { + "cell_type": "markdown", + "id": "1b5f0813", + "metadata": { + "id": "1b5f0813" + }, + "source": [ + "### Univariate time series" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "f52e9196", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 365 + }, + "id": "f52e9196", + "outputId": "551a19a5-14dd-4387-d36c-16ca9bbacb38" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MASE: 2.864\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABUAAAAFkCAYAAADhdJEIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAACsiklEQVR4nOzdeXhU5dk/8O85Z5bMJJlMEkBAWZJAABUBRdZhDaDgWuv6tlQlRfoia8ViABHUSqQuSJCCGNxxr69aqCUKQoOo9QdILSBLgqAQI8skk8w+c35/hDnMycwkM1nI9v1clxflzDMnZ+JjKl/v+7kFWZZlEBEREREREREREbVCYlM/ABEREREREREREVFjYQBKRERERERERERErRYDUCIiIiIiIiIiImq1GIASERERERERERFRq8UAlIiIiIiIiIiIiFotBqBERERERERERETUajEAJSIiIiIiIiIiolaLASgRERERERERERG1WgxAiYiIiIiIiIiIqNXSNPUDtGWyLMPvl5v6MRqNKAqt+vNRw+J+oWhxr1AsuF8oWtwrFAvuF4oW9wrFgvuFosW9cp4oChAEodZ1zSoA/eGHH5Cfn49vv/0Whw4dQnp6Ov7+979HXP/pp5/i/vvvR8+ePUPW2Ww2LFu2DJ9++ik8Hg9GjBiBRYsWoUOHDqp1u3btwpNPPon9+/cjNTUVd911F6ZOnar65smyjHXr1mHDhg04c+YM+vTpg5ycHPTv379en9fvl3HmTGW97tFcaTQikpPjUV5uh9frb+rHoWaO+4Wixb1CseB+oWhxr1AsuF8oWtwrFAvuF4oW94paSko8JKn2ALRZtcAfOnQI27ZtQ7du3ZCRkVHjWqfTiSeeeALt2rUL+/qcOXOwY8cOLFmyBE899RSKi4sxdepUeL1eZc0PP/yA7OxstG/fHmvXrsXdd9+NlStXYv369ap7rVu3DitXrsQ999yDtWvXon379pgyZQqOHz9e/w9NREREREREREREjaZZVYCOHTsW48aNAwA89NBD+O677yKuXbt2LTp37oxLLrkkZN3u3btRWFiI/Px8WCwWAEBaWhomTZqEzZs3Y9KkSQCA/Px8JCcn45lnnoFOp8PQoUNx5swZrFmzBpMnT4ZOp4PL5cLatWsxZcoU3HPPPQCAq666Ctdeey3y8/OxZMmShv9GEBERERERERERUYNoVhWgohjd4xw7dgwvvfQSFi1aFPb17du3w2QyYfjw4cq19PR09OnTB9u3b1ety8rKgk6nU65NmjQJ5eXl2L17N4CqFvmKigpMnDhRWaPT6TB+/HjVvYiIiIiIiIiIiKj5aVYBaLT+/Oc/46abbkLv3r3Dvl5UVIS0tLSQQ1DT09NRVFQEALDb7Th58iTS09ND1giCoKwL/Fp9XUZGBk6cOAGn09kgn4mIiIiIiIiIiIgaXrNqgY/Gli1bsHv3bnzyyScR15SXlyMxMTHkelJSktIub7PZAAAmk0m1RqfTwWAwoKysTLmXTqeDXq9XrTOZTJBlGWVlZYiLi6vz59Foas+g/X4fvF4fgJYz4cvnE+F0OuHzeeD3t9ZDeQVoNBJEUWrqB2nxJElU/UoUCfcKxYL7haLFvUKx4H6haHGvUCy4Xyha3Ct106ICUJfLhSeeeAIzZ85ESkpKUz9OvYmigOTk+Iivy7KMkydPwmotgyy3nPAz4Oefm/oJGp8gCDCbk9CpU6eQimOKnclkaOpHoBaCe4Viwf1C0eJeoVhwv1C0uFcoFtwvFC3uldi0qAD0lVdegSiKuO6661BeXg4A8HiqKgzLy8sRFxcHnU4Hk8mEkpKSkPeXlZUhKSkJAJQK0UAlaIDb7YbD4VDWmUwmuN1uuFwuVRVoeXk5BEFQ1tWF3y+jvNwe8fXKShvOnj2DhAQz9Po4AC0nYBOEqoDX75fRArPbKMlwuZw4ffoM/H4R8fGhVccUHUkSYTIZUF7ugM/XWiuGqSFwr1AsuF8oWtwrFAvuF4oW9wrFgvuFosW9omYyGaKqhm1RAWhRURF++OEHDB06NOS1q6++GkuWLMFdd92F9PR07Ny5E7Isq6ryiouLkZmZCQAwGo3o1KmTcsZn8BpZlpUzPwO/FhcXq84cLSoqQufOnevV/g4AXm/4zVrVXn8WcXHxSEioe8jalDQaMeLnay20Wj28Xg/Kys5CpzOyCrSefD5/q98z1DC4VygW3C8ULe4VigX3C0WLe4Viwf1C0Yp2rwiCEFSg1mor1GrVog4MmDp1Kl599VXVXxaLBRdffDFeffVVjB07FgAwcuRIlJWVYefOncp7i4uLsW/fPowcOVK5NnLkSHz22WfweDzKtU2bNsFkMmHAgAEAgCuvvBIJCQn4xz/+oazxeDzYvHmz6l4Nze/3w+/3IS7O2GhfgxpGXJwRfr+vFZ91SkREREREREQtiSSJ0MRLSEyOgyfOj8TkOGjipTZ7dmizqgB1OBzYtm0bAOCnn35CRUWFMuxo0KBByMjIQEZGhuo9H3zwAX7++WcMHjxYuTZgwABYLBYsWLAA8+fPh16vx7PPPotevXphwoQJyrrs7Gx8/PHHeOCBB3DXXXfh4MGDyM/Px9y5c6HT6QAAer0e06ZNQ15eHlJSUpCZmYk333wTVqsV2dnZjfa98Pt9AMABOy1A4O+R3++DJPHvFxERERERERFdWMGVnqIowJCow5P/2YK8/YWwuh0w6wyY2ceC+X3HwmFzt7n2+WYVgJ4+fRqzZ89WXQv8/tVXX1WFnLVZsWIFli1bhsWLF8Pr9cJisWDRokXQaM5/5G7duiE/Px+5ubm47777kJKSglmzZmHKlCmqe02dOhWyLGP9+vU4c+YM+vTpg/z8fHTp0qUenzY6bKlu/vj3iIiIiIiIiIiagiSJEOIEGPU6WF0OmPUGOL0ePPmfLXjs2wJlndXtUH4/u/cIoLKpnrhpCHJbPgCgifl8fpw5E37HeTxunD59EqmpnaDV6i7wkzWMtnAGKNA6/l41NY1GRHJyPM6erWwTe4bqjnuFYsH9QtHiXqFYcL9QtLhXKBbcLxQtrVaE2RwPq7USfj9CKj0zElPx7c3zcMnbj8LqdoS836wz4MSdj8B21tkqzgRNSYmPqq2/bTb+U4uSn78W//nPtw16z127voHFMhAHDuxr0PsSERERERERETUEQRCqKjzP/aqJl2BM1KPUUQFjoh5SvKhUegbCToNGixKHLWz4CVRVgpa5nRDFttXN2qxa4KlxtdTJXy+9tA4GgxF9+/ZrsHv26tUba9a8hG7d0hrsnkRERERERERE9VW9rT1Zb4BPlvHk3tBKz7z9har3ltht6BCXALPOELECNEkXB1ul80J9nGaBAWgbIEkiBJ0GxjgNrA4PzAYt7E4vZLe3VR1663I5odfHRbU2Pj4Bl1/et5GfiIiIiIiIiIjovHDFabUNMPp4XDa++uUYHg860zNSpecpVyU+O3kQ9/cZjj9/+2nI15/ZxwK7y92iCuMaAlvgWzlJEmFI0GPFjmJ0Wlqg/PXcjqMwJOijOiehPgoLt8NiGYjjx4+prpeXl2Ps2OH429/erfH9FstAAMDq1c/BYhkIi2Ugdu36RnnttddexurVK3Hjjdfg+usnAAC++24v5s+fi5tuuhbjxllwzz3/g08+2ai6b7gWeItlIN544xXk56/FDTdMwHXXZeGJJ5bC4QhfNk5EREREREREbVtwm3pNJElEolFGSrIOJn1l1a/xArQJGiQmx8ET50diclxIW3s7fTxGdczAqhoqPavL+WYTZl1qwcP9xiuvm3UGPNxvPOb3HQvZ2bbCT4AVoC2OLMuwe6Kv2kw0aZG79TAeKzikXLM6PHi04CAAYMbQbqh0eKK6l1Fb+z/Q1Q0dOhzt23fAxo0f4Q9/mKFcLyj4BAAwfvy1Nb5/zZqX8Ic/3Itbb70D48ZVrU1LO9+2/t57b+LSS/vioYcehs/nBQCUlJxE3779cPPNv4ZOp8d//vMtcnMfgyzLmDjx+hq/3vvvv4N+/QZg4cIlOH78GFavfg7JySn43/+dGdPnJiIiIiIiIqLWS5JEGPU+6OL08DutEOPMcDudsLsk+Hz+kKrOpEQR5d88hV8OvAVBY4AU3wnm69/Biu+219jW3tGYiFJnRUyVngfKSvFW0R7MvWwkcvplocztRJIuDnaXGw6bu1V1A0eLAWgLIssybnhjN/79U3lU69vF61C8MAt5hUfDvr6ysBgPjsnApU9/hVOV7lrvN+hiEz76zYCYQlBJkjBp0g3YuPEjTJ36v5AkCQCwceNHGDVqDBITE2t8f6BNvUOHjmFb1hMTk/DEE39RPdO4cdco/1uWZfTrNwClpT/jww//VmsAmpraDo888jgAYMiQYTh48AA+//wzBqBEREREREREbVwg1BQEwJRQFWj+vOd5+F1WiHozTP1nIGngn+D1+KHV65Rg1O9xouK/L8PXYRA6DPwTrI5yGONMePK77Xislrb2ms70zPlmE7ZNmg4RghKimnUGzOxjQXbPwXDY3LD5ndCIAmyVrWPqe10xAG1hYqm/7JioR2mFG9YIFZ5Whwe/VLjRMVEfVQBaV9dffxNefXU9vvpqJ4YNs+Dw4UM4ePAA7r9/dr3vPWTIsJBAtry8HOvXr8W//rUNp079Ap/PBwBISkqq9X5XXz1Y9fvu3dPw2Web6/2cRERERERERNQyVa/2FLRGlH+zHNav/qys8busqDz4HpKunA37tytQca7SU5Di0On2LdD2+h88u+9L5H35BDSCiKLbFkY1wKghKj19vrYbfAYwAG1BBEHAR78ZEHULvCgKuMikh9mgDRuCmg1adDLp8fffDIDfX/s/DHVpgQeATp064+qrB+Pvf/8Qw4ZZsHHjR+jU6WJceeXAmO9VXUpKSsi1J55Ygu++24t77vk90tIyEB8fjw8+eA9bthSEuYNaQoK6IlWr1cLtbrxwmIiIiIiIiIiaL0kSlfb1n/c8D4gadJlyCOV7ng9Zm2z5M2z7XlNVepoNSajw+fHM/i/x2N7PAACXJ3eMqa0955tN+HzSdAgQsIqVnnXCALSFEQQB8Top6vV2pxezLGnKmZ/BZlnSYHd6YdA0/iysG264GUuXLsIvv5SioOAfuPXWO+sUpoZS38PlcuGLLwoxY8Zc3Hrrncp1/sNPRERERERERNEIPr/TqPeh/JunlGpPberl8NlL4XdZAQCioR0kY0fIfjcMXbPg9LqUSk/1uZ5fKPePta29xGHD20V7MO/yUVjYLwtlHieStHGoZKVn1BiAtnKy24v5YzIAVJ35aXV4YDZoMcuShvljMuCocF2Q5xgxYjQSE01YunQRysvLaz2LM5hGo4HbHd1zejwe+P1+aLVa5ZrdXonCwu0xPzMRERERERERtR3hBhsJQFXl5zk+ewkkYwfoLroa+kGLkNh19PlKT7+60hMIf65nrW3tR3bhj5eHtrV7KnzwiT60NyfAaq2EN4Yh2W0dA9BWzufzw1Hhwuzh3ZGT1QNlDi+SDBrYnV44KlwXbPKXRqPBxInXYcOG1zBo0FBcdFHHqN/brVsaCgu3o1+/ATAYDOjatRuMxviwaxMSEtCnz6V4/fWXYTabIUkavP76y4iPT4DVeqahPg4RERERERERtQI1DTbSdxqKDpM2hFR7On/ejZRfb8aT/9leY6UnELnaM9DWDgDP799xvq2991BMybgCetkDARJSZBdE6GCQvbBDQqDBlY2usWn83mdqcj6fH16HGzarAxqPFzarA16H+4KFnwEjR44BAFx33Y0xve+Pf5wPv9+PefNm4fe//x0OHNhf4/pHHvkzLrmkC/785yVYseIvGD06C9deO6nOz01ERERERERELZsgCJCk87NNJElEolFGSrIOJn0lTPGyMtgoEHh6rIcgxpmhu+hqJN7wITpOOQLtTf+Ar8NVePK7Qjy29zMl1AxX6Qmoqz2DHSgrxehNq3Fr9ytw4s7FOHnrgzhx+wLMbGeEHjLKdz2L4y9cguMvdMLxFy6BY+8KJCWKEMWGOE6w7RFkHo7YZHw+P86cqQz7msfjxunTJ5Ga2glare4CP1nD0GhEeL3nQ9YXX1yDDz54Fx988A/odC3zM4XTGv5eNTWNRkRycjzOnq1U7Rmi6rhXKBbcLxQt7hWKBfcLRYt7hWLB/dJ4zre1xylt7R6XG1qthLJvllcNMzo32Oj4i2nwu6xKpafPXoL2E9+A0HFQVbXngS+UCe5d33lMFXa208eHvQ4AvZM6YNuk6Vi1b4dyrmfVEKPh+FOfQbB++Cv4PTb47CVoN24t3D//P1i/fiLks5gHL4Kx3xzojSbulXNSUuIhSbXXd7IFnhrdsWNHcezYD3jvvbdxyy23tarwk4iIiIiIiIiap+oT3P0uK0S9GR1v/Qxlh/8WMthIMnZE/IRXlHM92xtMcDjLseJctScQ+wR34Ny5nkV7MPcy9bmeHp8Hnn0vwXN2P/wuKzRJGTB0G4dTm7PDfp7yPauQNOihmL4HwQOd2nINJANQanTLlz+Bffu+w+DBQzF58r2q17xeb8T3VZWnRz/xnoiIiIiIiIjajkjhXuB6fJx6gjsAQNRAa06vqvxE1bmeYpwZkvEipNy+DcvDTnAvVN4e6wT3qkpPC7J7DobD5obN74RGFGCrdEIUBRh7/hZd+v4efmcZxDgz/K6zSgt+dX6XFX5XGURj+1q/N+EGOrmdTthd0gU/ErE5YABKjW7VqhfCXj958gRuuy3yeaD9+18Z8b1ERERERERE1DZUDzojhXtOjwZxWu+562WQ9EkoCQo6JWNHiHpz2GpPO0Q8tf/r+k9wD1PpaXe54bCdn8Xi88nKrza7AMHhhigaITtdSDabIerNYUNQUW+GqE869z2J/D0SRSFs5aup/wwkDXwQZTa0uRCUASg1mXbt2uPFF1+N+LrRaLyAT0NEREREREREzUm4oLPq/E4RZdXCPfPQJTBdfi/K/v0sft7zPKSES3DRjX8LCTrNhiR4ZCDljn9h+X+/QN6XTyjnegZXegJ1mOAeodKzttZzWZaVUNTtdMLUfwasXz2uWqNN7o32k96AAMBnL4UpMVzoW/U98nucIZWvfpdVuafxijmw2dvWMCUGoNRktFoteve+tKkfg4iIiIiIiIiamWjP7wSqwj1NYheUff0krF8/EdTW3iGkrd2sM+DzidPx/tG9dT7XMzDB/c3Rk7Go3zhYa6n0jIXdJSFp4IMAqs789Lus0F10NTrdsgllu1agpIbQN3CO6MWTdykt/tUFzhEVHO42dSYoA1AiIiIiIiIiImpWjPraz+9ULhvawdB1LKxf5yLxhg9rbGvXCCLSE1Prfa7nbd37oWdiO1SWu6CRo6v0jIbP50eZrapKM2nQQ/A7yyBoDSj7f0/XGPoGCBoDfJUlNZ8j6iyDKBrrFNC2VAxAiYiIiIiIiIioWagaiCxAF6fFz9WCTsnYET57qRLuqc71dJ5F8s0fY/m+r2psa+9oTAyp9myIcz0bks/nV84GlSQjkuK0EUPf6hPjffYSSMYONZ8jGpcEv8Pd4M/dnDEAJSIiIiIiIiKiJqU679Nlg+yqDAnwAuGe7qKroR+0SHWup0OW8cx3n9fa1n6hzvVsCLIsQ5YF+J3WkO9F9TA4wO84BcexLTD1m66qDA0w9Z8Bt9MJWeYZoERERERERERERI2mpqnlEDXoMuWQUsUYqPT02UvgPPk1Un69GU/+Z7tyrmdGYiq+vXke8vZ/odw/UtDZFOd61offL0OMC50MX1Ol59nCheh0+xZAEJVzRANT4E0DH0SZzQ+AU+CJiIiIiIiIiIgaXLjJ7uGmljuObYF5yGJ4TBlKpWd7gwkOZzlWfFeoOtfToNGixGGLuq0955tN+OL6WRfsXM/6kGU57GR4pdKz//3qc1IBeM4eQMX37yBxwGzlHFExLglupxNlNn+jtO03dwxAiYiIiIiIiIio0QSqPQUBMCWoJ7tXn1oeqPas/P5dmCa8iGf3bkHeO9UrPdXnesba1n5b937QCxLm9L5w53rWR7jJ8KLeDK/tOJKu/hMAIaTSM+Gye1Fm88Pvd0MUjfA73Ofa3pvXZ7tQGIBSs5efvxaDBg1B3779Gvzeu3Z9g+++24vf/W5Kg9+biIiIiIiIqC2rXu0paI0o/2a5qmIxMLVcMnZE/IRXlGpPQ5wJuXu34rGg6s1wlZ5A3dra7eeCTq/dd0HP9ayL6pPhZVcZBH1VRWd5hQxD0MT4cJWebWnaeyRiUz8AXThVk9RECELLOuj2pZfW4T//2dso9969+//htddeapR7ExEREREREbUlwbmDJIlIShTh2Pssjr9wCX56vT8EQVBVempTL4fsd0MyXoSU27dhlRXo/M4TuOLvqyEKUo2VntXlfLMJsy614OF+45XXQ9raHQJsZ53wVvqUcFCWZfh8/mYbfgYEJsOX2zyQjO1RbvPAZhfg8fhgsws4c9aNcpcRZ866YbMLza6KtamxArQNCHe+htvphN0l8R8IIiIiIiIiIqqXaM711KZeDp+9NKTS02wwocLnxTP7v6p1gntN53oeKCvFW0V7MPeyltHWXleBnLZ6XlsV5DbvELcpsQK0lav+X1yOr7sEx1+4BI69K5CUKEKSGncLFBZuh8UyEMePH1NdLy8vx9ixw/G3v71b4/stloEAgNWrn4PFMhAWy0Ds2vUNgKp/uDdseA133nkLxowZittuuwlvv/2G6v2lpT/j4Ycfwg03TMDYscNw2203YuXKpwFUtda/9NI6OBwO5d4zZtzXUB+diIiIiIiIqNULlzv89Go/iBqtqtpTjDOHVHp2fv8v6PvhCmg1ccjbv0O5Z10qPR/uNx7ZPQfDV+mH7awzbLUntV2sAG1hZFkGfPao1xsNepR/s1J1vobfZVUmhxn7zkS5zRXdzSRjzO3zQ4cOR/v2HbBx40f4wx9mKNcLCj4BAIwff22N71+z5iX84Q/34tZb78C4cVVr09LSAADPPfcUPv74//C7303BpZdeju++24u//jUPer0eN998KwDg8ccfwalTv2DOnHlITk7Bzz+X4Pvv9wMAbrjhZvzySykKCj7Bc8+tAQDEx8fH9PmIiIiIiIiI2prAUCO/X4ZR71MqPQNt7aLeHPZcTztEPLX/63pNcI+20pPVkBSMAWgLIssyHJsnwX/q66jWi4Z2aDflEH4+919cqivfswpJAx+A443L4Hecqv1+7QfDMH5jTCGoJEmYNOkGbNz4EaZO/V9IkgQA2LjxI4waNQaJiYk1vv/yy/sCADp06Kj8bwD46acf8f7772DevBzcdNMtAICrrx4Mp9OJl15ahxtvvAWiKGL//v9i2rT7kZU1QXnvxInXn7vnRWjfvgNEUVTdm4iIiIiIiIhChWt1FwCcProZiTd8GNTWngSPDKTc8S8s/+8XyPvyCWgEEUW3Laz3BPeZfSzI7jkYDpsbNr+z2Q8wouaBLfAtTSzho7EjfPZS+F3WsK/7XVb4HL9AMnZsoIcL7/rrb8Lp06fw1Vc7AQCHDx/CwYMHcP31N9X5nv/+91cAgNGjx8Lr9Sp/DRw4CKdPn0Zp6c8AgMzM3njzzdfxwQfv4ccfj9f/wxARERERERG1QcGt7j+92g8lf7sWpR/fCp/zLJJv/ljV1t75nT/jSMVZLN/3JR7b+xmsbgc6GhNrPdczWGCC+63d++HknY/g5J1LcOLORzC79wil0rOlDDCipscK0BZEEAQYxm+MugVeEAVI8WaIenPYEFTUmyEZO0E/4R/Q+aP4YVGHFngA6NSpM66+ejD+/vcPMWyYBRs3foROnS7GlVcOjPleAWVlVsiyjOuuGxf29Z9//hkdO3bC0qXL8MILz+OFF1bj6adz0bVrN0ybdj9GjRpb569NRERERERE1FYE2t3j43yo+O/L8HUYhA4D/6RUetplGc9897mqrV0jiEhPTFVVe0aq9ASqqj23TZoOEQLy9hcq1Z6qCe4yKz2p7hiAtjCCIACa6M+pdLtcMPWfoZz5GczUfwbcLhcgGiE0ci3wDTfcjKVLF507c/MfuPXWO+sUpgaYTEkQBAGrV78IrVYb8nrXrt0AAO3atcOCBY/A7/fj++/345VX8rF4cQ42bHgfF198SZ2/PhEREREREVFrpmp3d1VA0ifA1+t/8Oy+L5H35ROwuh3ISEzFtzfPQ97+L1TvDVftyQnu1JQYgLZydpeEpIEPAqg689PvskLUm2HqPwOmgQ+izOYH0Pg/SEaMGI3ERBOWLl2E8vJy5RzOaGg0Grjd6kFNV111NQCgrKwMFsvIWu8hiiL69LkMU6dOR2Hhdvz004+4+OJLoNVq4Xa7Y/swRERERERERK1M8GAjURSQlCihouw4BF13WH0+xPvlqAYYATzXk5ofBqCtnM/nR5kNMF4xB0mDHoLfWQYxLglupxNlNv8F+68oGo0GEydehw0bXsOgQUNx0UXRnzvarVsaCgu3o1+/ATAYDOjatRu6du2GW265DY8/vhh33TUZl156ObxeL44fP4bdu7/BsmVPo6KiAn/84wxcc80kdO3aDV6vB++99w4SEhKRmdlbubfP58M777yJvn2vQHx8PLp27d5I3wUiIiIiIiKi5kVd6WmDqE+E3+uGW/bh2R+KkXfgjZgHGEWq9gyc6/nm6MlY1G8crJzgThcIA9A2wOfzw2YXIDjcEEUj/A43ZFnAhaj8DDZy5Bhs2PAarrvuxpje98c/zsdzzz2FefNmweVyYeXKNbjyyoGYM+dBdO3aDR9++De8/PKLMBiM6Nq1G8aMyQIA6HQ6ZGT0wPvvv42ffy6BXh+H3r374NlnV8FsNgMAhg8fgV/96ja8/vrLOHv2DPr1G4BVq15o6I9ORERERERE1GwEqj0FATAlBFV6er0wa/1wCZLqXM/LkzvWOsCoelt7zjeb8MX1s3iuJzULDEDbkKrpaE33Q+XLL79AUlISRowYFdP7+vXrj/XrXw+5LggCfv3rO/DrX98R9n06nQ7z5y+q8d4ajQYPPDAfDzwwP6ZnIiIiIiIiImppqld7Cpo4VaVnpHM96zrASC9ImNOb53pS02MASo3u2LGjOHbsB7z33tu45ZbboNPpmvqRiIiIiIiIiFq9ms/19CJeq8FT//m81nM96zrAyH4u6PTafTzXk5pUswpAf/jhB+Tn5+Pbb7/FoUOHkJ6ejr///e/K6xUVFXjppZewbds2HD16FDqdDldccQXmzp2LXr16qe5ls9mwbNkyfPrpp/B4PBgxYgQWLVqEDh06qNbt2rULTz75JPbv34/U1FTcddddmDp1qmpCuSzLWLduHTZs2IAzZ86gT58+yMnJQf/+/Rv1+9FaLF/+BPbt+w6DBw/F5Mn3ql7zer0R3ycIAiRJauzHIyIiIiIiImpVGuNcz/oMMGrqjlSiZhWAHjp0CNu2bUO/fv3g9/tD/qvAiRMn8Pbbb+PXv/415syZA5fLhfXr1+OOO+7A+++/j4yMDGXtnDlzcPjwYSxZsgR6vR4rVqzA1KlT8f7770OjqfrYP/zwA7KzszF8+HDMmTMH33//PZ566ilIkoTs7GzlXuvWrcPKlSsxb9489OrVC2+88QamTJmCDz/8EF26dLkw35wWLNKZmidPnsBtt0U+D7R//yt5HicRERERERFRDCRJVFd6NtC5nhxgRC1ZswpAx44di3HjxgEAHnroIXz33Xeq1y+55BIUFBTAYDAo14YMGYKxY8diw4YNePjhhwEAu3fvRmFhIfLz82GxWAAAaWlpmDRpEjZv3oxJkyYBAPLz85GcnIxnnnkGOp0OQ4cOxZkzZ7BmzRpMnjwZOp0OLpcLa9euxZQpU3DPPfcAAK666ipce+21yM/Px5IlSxr5u9J6tWvXHi+++GrE141G4wV8GiIiIiIiIqKWSxQFaDQiDDq5Uc/17GniACNqeZpVACqKYo2vhwvE4uPj0bVrV5SWlirXtm/fDpPJhOHDhyvX0tPT0adPH2zfvl0JQLdv347x48erzqScNGkS1q5di927d2Pw4MHYtWsXKioqMHHiRGWNTqfD+PHjUVBQUOfPSoBWq0Xv3pc29WMQERERERERtVharQQpDtBrdbC6HZA0ukY919NRzgFG1PI0qwC0LsrLy3Ho0CEMGzZMuVZUVIS0tDTVOZ5AVQhaVFQEALDb7Th58iTS09ND1giCgKKiIgwePFhZX31dRkYGXnnlFTidTsTFxdX5+TWa8KGv3y+Evd5SBL71ggC0lf8YJElCxL+fVDNJElW/EkXCvUKx4H6haHGvUCy4Xyha3CsUi1j2iyBUzcyQZRmSJEJr0CB372fI27+jYc/1zBwMd6UHDtkFvSDC4XIBctXX5599mw5/ttRNiw9A//KXv0AQBNx1113KtfLyciQmJoasTUpKUtrqbTYbAMBkMqnW6HQ6GAwGlJWVKffS6XTQ6/WqdSaTCbIso6ysrM4BqCgKSE6OD/ua0ynh1CmxxYdqbeEfSL9fgCiKSEoy1isMJ8BkMtS+iAjcKxQb7heKFvcKxYL7haLFvUKxqG2/VHrc0IoirG4nzLo4OH0e5O79DI+dq+Cs77meZR4nkrRx8Pj9MGi0MCRpG/5DUoPgz5bYtOgA9P3338c777yD3NxcdOzYsakfJ2Z+v4zycnvY19xuF/x+P3w+GV5vyystF4Sq8NPn87f6ClCfT4bf70dZmR0Oh6+pH6dFkiQRJpMB5eUOtlJQjbhXKBbcLxQt7hWKBfcLRYt7hWIRbr8EV3oKggBdvBZ/+c9W5UzO8+d67lDuU59zPZ2VbuhkEXanC7IMuOG5oN8Dig5/tqiZTIaoiu9abAC6bds2LF68GNOnT8evfvUr1WsmkwklJSUh7ykrK0NSUhIAKBWigUrQALfbDYfDoawzmUxwu91wuVyqKtDy8nIIgqCsq6tI4WZLn5oWCD1be/gZrKWG1c2Jz+fn95Ciwr1CseB+oWhxr1AsuF8oWtwrFItAEZEQJ8Cg18HqcsCsN8Dp9eDJvVvw2LfnZ5HwXM+2jT9bYtMiA9A9e/Zg9uzZuPnmmzF79uyQ19PT07Fz507lv5IEFBcXIzMzE0DVQKVOnTopZ3wGr5FlWTnzM/BrcXExevfurawrKipC586d2fJMRERERERERPUSiC4kSYDWoMWT/9kSptKzYc71dJS7YfM7oRE5xZ3ajhZ3QOPhw4cxbdo0DBkyBEuXLg27ZuTIkSgrK8POnTuVa8XFxdi3bx9GjhypWvfZZ5/B4zlf1r1p0yaYTCYMGDAAAHDllVciISEB//jHP5Q1Ho8HmzdvVt2LiIiIiIiIiCgWkiRCEy/BmKhHqaMCokHCk/+pqvQMhJrhKj0BdbVnsMC5nrd2vwIn73wEJ+9cghN3PoLZvUcolZ6yLCu/ErUFzaoC1OFwYNu2bQCAn376CRUVFfjkk08AAIMGDYIsy8jOzoZer8fdd9+tDDQCgISEBPTo0QMAMGDAAFgsFixYsADz58+HXq/Hs88+i169emHChAnKe7Kzs/Hxxx/jgQcewF133YWDBw8iPz8fc+fOhU6nAwDo9XpMmzYNeXl5SElJQWZmJt58801YrVZkZ2dfqG8NNYFDh77Hvff+BitXrsGVVw5s6schIiIiIiKiFk4QBIiiAL9fhigKMCTqlGrPWCe4AzWd63kFMk3t4ahwQ+NjpSdRswpAT58+HdLSHvj9q6++CgDK2Z733HOPat2gQYPw2muvKb9fsWIFli1bhsWLF8Pr9cJisWDRokXQaM5/5G7duiE/Px+5ubm47777kJKSglmzZmHKlCmqe0+dOhWyLGP9+vU4c+YM+vTpg/z8fHTp0qXBPvuFEPyDlj/4iIiIiIiIiC4MSRIhxAkw6nVKSOn0epRqTyD2Ce5A4FzPXfjj5SOxoF8WytwOJOkMcHnccFZ44PFwUC8R0MwC0EsuuQTff/99jWtqez0gMTERTzzxBJ544oka11155ZV45513alwjCAKmTZuGadOmRfW1mxvVD9pzByjbXW7ITrlVH3Lscjmh1/OMViIiIiIiIrrwRFGARiNClmXoE3R4cm/N53rWpdJzZu+hmJJ2OXReByrsfugECTa7E34/i56IgrW4M0ApNpIkwpCow3MH/oXOby1F57eXovNbS/HcgX/BkKiDJDXuFigs3A6LZSCOHz+mul5eXo6xY4fjb397t9Z7WCwD8dprL2P16udw/fXjMH78SPz5z0tgt1cqa3bt+gYWy0B88UUhFi36EyZMGIWHH34IAGCz2fDUU7m46aZrMGbMUEyZ8lt8/fWXIV/n5ZdfxI03XoPx40dgwYIHcfbs2Xp+eiIiIiIiImprtFoJcYkSEs1xcOl80MRrlAnuNZ3rGelMT+BcpeeRXZjToy9O3L4AJ299ECduX4AZSTLO/t8NEPUJ8HpluN0+hp9EYTSrClCqnSzLsPs8tS88J9EYpyqpBwCr26H8fkYvCyrL3VHdyyhpIQRG00Vp6NDhaN++AzZu/Ah/+MMM5XpBQdXZruPHXxvVfd5//21kZvbGwoVLcOLECaxZswputwtLly5TrVu+/M+YMGEinnjiVoiiCI/Hg7lz78eZM6cxdep0tG/fAZs3b8KDD87G+vVvICOjh3L/F19cg7vumoyBAwfh3//+Crm5j8X0WYmIiIiIiKjtCT5uTqMREZegRe7ez5C3f0fM53rWOMG951U4/eZQlDlPQTJ2RIm9BH7HKZgHL4Lb6YQsx/bndaK2hAFoCyLLMm784iX8++zxqNa308eH/UEbkLe/EA/2HYPLti7HKVdl2DXBBiV3wYfD7o0pBJUkCZMm3YCNGz/C1Kn/C0mSAAAbN36EUaPGIDExMar7aLU6LFv2lPJ+vV6PJ598HFOmTEO3bt2VdRbLSEyfPkv5/caNH+HQoe/x8stvIi0tHQAwePBQHD9+HC+//CIeeywXPp8Pr732Mq65ZhLuv3+2subs2TP45z83Rf1ZiYiIiIiIqO2QJBGCTgNjnAZWhwdmgxZunwu5ez/DY+fO6oz1XM/ABPc3R/8WC/tlocx+BklxJlSc+Ao6vxvxmbeifM8qeE5/B1FvhnnwIpgGPogymx9A6z3ijqi+2ALfwsTy33M6GhPD/qANsLod+MVZgY7G6ELIurr++ptw+vQpfPXVTgDA4cOHcPDgAVx//U1R32P48BFK+AkAY8ZkQZZl7Nv3nWrd0KEW1e+//vpLZGT0QJcuXeH1epW/rr56MA4c2AcA+OWXUpw69QtGjhyjeu+YMVkxfU4iIiIiIiJqWQRBqAoyoyz0CazXaEQYEvRYsaMYnZYWoNPSAoxctQN6jQ55+3co64MrPavL+WYTZl1qwcP9xiuvBya49zDE4fS74+D+v4koWZ+B8g8moOLAm0gcMBtd7vsRXab+hC73/QjDFXNQZvO36vkeRA2BFaAtiCAI+HDYvVG3wIuigIuMprAHKANVP1g7GU34eOiUqM4IqUsLPAB06tQZV189GH//+4cYNsyCjRs/QqdOF+PKKwdGfY/k5GTV7+PjE6DT6XH69CnV9ZSUFNXvy8qsOHjwe4wePSTknoFA9dSpU2G/RnJyatTPR0RERERERM1DcEu6LIf/s2646k270wvZ7YXP5w+5R/X1Rp2E3K1H8FjBIeWel5ir/uwd6VzPsBPcz53rmdN3JMqc5UiKS4LH74Vn38vwnN0Pv8uqVHomXHYvymx++P1uiKIRfof7XNs7w0+i2jAAbWEEQUC8Rhf1ervLjZl9LKozQANm9rHA7nLDIGobvRb4hhtuxtKli/DLL6UoKPgHbr31zpjC1OoDiSorK+B2u5Ca2k51vfo9TaYkZGT0RE7OwxHv3a5du7Bf4+zZ01E/HxERERERETWtaENNQQDi4vXI3XoYeYVHlbWzLGl4aEwGnB4fDPrz93C6fNBpJeRuPYS8wqPQiAKKF2Yhr7BY9fX3/WyDWWeo97meUlw7tJ/0Brre9yN8zjKIcUlwO52qSk+fj4OOiGLBALSVk50y5vcdC6DqzM/gH7Tz+46FwxbdAKT6GjFiNBITTVi6dBHKy8sxceL1Mb1/x45/YebMuUrV5tatn0EQBPTpc1mN7xs4cBB27tyBdu3ao1279mHXtG/fAamp7bB9+1aMGnW+DX7r1s9iekYiIiIiIiJqHJGqOusSaoar3rQ6PHjn2xOYOyodzxYeRV5hsXKPbdOH4b29J5X1l3dMRGmFG1aHujvz+18q8bOtEjP7DFfOAAUineuZBKftR+hFKeRcz/jMW6Ex94C13ANZZqUnUUNgANrK+Xx+OGxuzO49Ajn9slDmdiJJFwe7yw2HzX3BzgnRaDSYOPE6bNjwGgYNGoqLLuoY0/s9HjdycubhV7+6FSdPnsBf/5qH0aOz0L17Wo3vu/ba6/Dhh3/DjBnTcNddv0WXLl1RUVGBQ4e+h8fjwR/+MAOSJOG3v70Hzz33FFJSUnH11YPx9ddfYvfu/1efj0xERERERET1FKmqU/D6IGukGlvSw4Wakao3AWDZpD54+vMjePzT8/fQiALSU42q9SU2Fzok6GA2aENC0P997wDevadqnkReULXnbd2vQKapPbSQkSJpIYoiZH0HlFf4YbhiDpIGPQTZVQZBH1rtSUT1xwC0DfD5/EAlYLM7oREF2CqdEc9BaUwjR47Bhg2v4brrboz5vb/+9R2wWs/isccWw+PxYOTI0fjjH/9U6/t0Oh1Wrvwr1q9/Aa++uh6nT59CUpIZmZm98Ktf3aasu/XWO1BRYcPf/vYuPvjgXQwcOAjz5y/CAw/MjPlZiYiIiIiIqP4kqWrQUPWqzkev6YXswV2Ru+VwzKFmpOrNdvE6jMtsh3vf3qO63jFRH7L+VKUbnx48hRnDu6vCUgDYdKAUf/v2Zzxw+Ugs6DcOZW4HknQGuDxuOCs8qPD6IYp6+O2Bqk4fPB4BWo8HZnN7WK2V8HhY7UnU0BiAtiGyLDfpOSFffvkFkpKSMGLEqJjfK0kSZs6ci5kz54Z9/corB6Kw8Juwr8XHJ2DmzD9i5sw/Rry/IAi4996puPfeqarrke5JREREREREjUvQaZC79XBIVWcXswHLthzC47W0pIcLNSNVb4YLOmtan7NpP7ZNHwYAWLVD3XJ/S9+OcFS44Jad0Gkk2OxO1eDhcH8uD9QoNUGtElGbwACUGt2xY0dx7NgPeO+9t3HLLbdBp4t+iBMRERERERG1PYIgwBinQV7hUdX1+oaakao3I90j0voDpRUYtfoLvDX5Kiwcl4kyhwdJBg3sTi8cFS6lfd3t9jXUt4SI6qGRZ38TAcuXP4HFi3MwYMCVmDz5XtVrXq834l8+H/+PgoiIiIiIqC0SRQFWhyekIrO2UDNYcKgZLGfTfsy0pGHRuJ7Ka16/jKLTdsyyhM6ZyNm0Hw+MzsDi8ZnKerNBi9v7dUbPVCPsNgc0Hi9sVge8jgs3a4OIoscKUGp0q1a9EPb6yZMncNttkc8D7d//Sqxa9QLb0ImIiIiIiNqIwFR3WZZhNmhDKjJjaUkPDjUfLTiorI1Uvel0+TB/TA8AwMqgKfC39+sMvShgjqU7crJ6oMzhDVvtSUTNFwNQajLt2rXHiy++GvF1o9F4AZ+GiIiIiIiILqRA2On3yxBFIWTa+6lKd0jreaDac6alu+ps0LqEmkr1pgzYXJ6quRmSiNnDQ4NO+7mg0+vwVA0XPreeiFoGBqDUZLRaLXr3vrSpH4OIiIiIiIguIEkSVWFnskELnwxlqnsgpFwyIRM5WT0hCFBNgT9udeChsT0hQKhXqBmuetPn8wMON2zO8EFnUw8XJqK6YQDazPG/KDV//HtEREREREQUHUkSYUjQI3frYSXU/Dh7EL46dlaZ6g5UTXuf8+F/IQjAH0ekY0FWT1V46ap0NVioGQ6DTqLWhUOQmilRlAAAfj8HATV3gb9Hgb9nREREREREFJ6g0yB362E8VnAIVocH7eJ1GJ2RilXVpr0HPPLPg4jTSagoUw8a8nh88DrcsFmjH0BUFWr6WcRC1AYxAG2mRFGEKEpwOu1N/ShUC6fTDlGUIIr8x4mIiIiIiKg6QRAgSSJEUYAxToO8oLAz3FT3YFaHB2UOLwRBCBteMtQkomiwBb6ZEgQBCQlmlJefRkWFFjpdHARBaOrHionfL7TqlgFZluF2O+F0VsJkSm1xf3+IiIiIiIgaU7izPs86vFFNdQ8wG7RIMmhgc4UPSImIosEAtBkzGOLh8bhQUVEGwNrUjxMzURTh90duP2gdBBgMCTAY4pv6QYiIiIiIiJqNcGd9ZqQasXfeaFXYGZjqXn3ae8AsSxrsTi8rPImoXhiANmOCICApKRWJiWb4fC3rLFBJEpCUZERZmb1VV4FKksSzP4mIiIiIiKoJPusz4MhpOzZ//wtmWrqrruds2o9t04eFTHufZUnD/DEZcFS4muIjEFErwgC0Bag6X7JlhWwajYi4uDg4HD54va29CpSIiIiIiIiAwHmfAoxxWtVZnwE5m/Zj+/RhECBgZWExrA4PSmwuvL3nBOaNysCCrEyUOTw1TnUnIooVA1AiIiIiIiIiqpfg8z4rXF6UOT1hz/Q8UFqBkau/wI4Zw5GT1VMVdnrsbrj8MjSiAJvLw7Z3ImowDECJiIiIiIiIKCaCIEAUBfj9MkRRUJ33qREFFC/MijjYqMTmglEnoaLMDo0QGna25mPUiKhpiE39AERERERERETUMkiSCI1Bh0SzAR6thESzAZJBi9ytR/BYwSFYHR7VYKNwAoON/H4ZPp+flZ5E1OhYAUpEREREREREEQWqPQUBiIsPP9k9r7BY9Z7AYCMAWLWDg42IqGkxACUiIiIiIiKiEMHnelodHhh1klLpGWDQSiixuUJa3Q+UVmDU6i/wxKQ+OPnIBA42IqImxRZ4IiIiIiIiIlKRJBGGBD1W7ChGp6UF6PvUNoiCEFLpWWJzoUOCDmaDNuQeB0orMOXtPQBkaL0+2KwOeB1uhp9EdMExACUiIiIiIiIiFUGnQe7Ww8q5nh0T9SitcIdUekZ73qfX6+NZn0TUZBiAEhEREREREZFCEAQY4zTIKzyqXKup0jNn037MsqRh8fhM5XWzQYvF4zMxf0wGZLf3Qj06EVFYPAOUiIiIiIiIiIKGHQmwOjyqas/gSs/HPz2ket+B0gq8tecE5o5IQ05WD5Q5vDzvk4iaFQagRERERERERG1Y9WFHZoMWRlRVcQaHoDVNds8e1AWOChdsdjc0ogCby8OWdyJqNhiAEhEREREREbVRgWFHuVsPI6/wfKj5+fRhmGVJw6MFB5W1gcnub02+CgvHZUac7O7zMfgkouaFASgRERERERFRGxJodff7ZdWwowCrw4M7X/t/2DnLAgBYWVisBKO39+uMnqlG2G0OaGSw0pOIWgQGoERERERERERtQLhWd0BQDTsKOFBagWte+BIF04YgJ6tnxGpPIqKWgAEoERERERERUSsXrtV9aPdkvP3bq1TnfAb7+pgVDrcfXq+b1Z5E1KKJTf0ARERERERERNQ4BEGAJIkQg1rdA4HnoV8qkWzUnqsEDWU2aJFk0MDnk+Hz+Rl+ElGLxQCUiIiIiIiIqJWRJBEagw6JZgO8OgnGOG1Iq/upSjc+PXgKM4Z3D3uPWZY02J1eBp9E1OI1qwD0hx9+wOLFi3HTTTfh0ksvxfXXXx923bvvvotrrrkGffv2xY033oitW7eGrLHZbFiwYAEGDRqEAQMGYNasWSgtLQ1Zt2vXLtxxxx244oorMGbMGLzwwgshP9xlWcYLL7yA0aNH44orrsAdd9yBPXv2NMhnJiIiIiIiIqqvQKVn4FdDgh4rdhSj09ICjFvzJU6UO8O2uuds2o+ZljQsHp+pVIKaDVosHp+J+WMyILu9F/qjEBE1uGYVgB46dAjbtm1Dt27dkJGREXbNxo0b8fDDD2PixIlYt24d+vfvjxkzZoQEknPmzMGOHTuwZMkSPPXUUyguLsbUqVPh9Z7/4f3DDz8gOzsb7du3x9q1a3H33Xdj5cqVWL9+vepe69atw8qVK3HPPfdg7dq1aN++PaZMmYLjx483+PeAiIiIiIiIKFrBlZ4erYREswGSQYvcrUeUdvcSmwvtE3RhW90PlFbghvVf44HR6Tj5yASUPDIBJx8Zj9nDu3PYERG1Gs1qCNLYsWMxbtw4AMBDDz2E7777LmTNypUrcd1112HOnDkAgCFDhuDgwYN4/vnnsW7dOgDA7t27UVhYiPz8fFgsFgBAWloaJk2ahM2bN2PSpEkAgPz8fCQnJ+OZZ56BTqfD0KFDcebMGaxZswaTJ0+GTqeDy+XC2rVrMWXKFNxzzz0AgKuuugrXXnst8vPzsWTJksb9phAREREREREFEQQBoihAEIC4ePVgo4xUI/bOG428wmJlfXCr++OfHgq537W9OsDr8cNlc0EjChx2REStTrOqABXFmh/n+PHjOHr0KCZOnKi6PmnSJOzcuRNutxsAsH37dphMJgwfPlxZk56ejj59+mD79u3Kte3btyMrKws6nU51r/LycuzevRtAVYt8RUWF6mvqdDqMHz9edS8iIiIiIiKixlS92lNj1KkqPQHAoJVQYnOFtLsHWt0XjesZsdVdljnsiIhap2ZVAVqboqIiAFXVnMEyMjLg8Xhw/PhxZGRkoKioCGlpaRAEQbUuPT1duYfdbsfJkyeRnp4eskYQBBQVFWHw4MHK+urrMjIy8Morr8DpdCIuLq7On0mjaVYZdIORJFH1K1FNuF8oWtwrFAvuF4oW9wrFgvuFotXQe0UUBegMgWrPYmhEAcULs1SVngBQYnOhw7l29+AQ9EBpBUat/gLLr78UJeMyYXV4kGTQwuHywO1wQxBa759PWwL+bKFoca/UTYsKQMvKygAAJpNJdT3w+8Dr5eXlSExMDHl/UlKS0lZvs9nC3kun08FgMKjupdPpoNfrQ76mLMsoKyurcwAqigKSk+Pr9N6WwmQyNPUjUAvC/ULR4l6hWHC/ULS4VygW3C8UrYbaK5VuL3K3HsZjBQcBAJd3TERphTuk0rOmdvcDpRX45rgVY3qkokNi1Z9x9Rr1n3WpafFnC0WLeyU2LSoAbW38fhnl5famfoxGIUkiTCYDyssdPDSbasX9QtHiXqFYcL9QtLhXKBbcLxSt+uwVURQAjYQ4vRZWhxtmgw5aUVRVe0aq9ASq2t23Tx8GURCwsrAYVocHZoMWsyxpmD+mB9wOF85Wuhrkc1LD4M8Wihb3iprJZIiqGrZFBaBJSUkAqqo327dvr1wvLy9XvW4ymVBSUhLy/rKyMmVNoEI0UAka4Ha74XA4VPdyu91wuVyqKtDy8nIIgqCsqyuvt3VvVp/P3+o/IzUc7heKFvcKxYL7haLFvUKx4H6haMW6VyRJhM6gUw02Gto9GW//9ipV0Flbpedbe05g7og05GT1QJnDiySDBnanF44KJ0OTZow/Wyha3CuxaVEHBgTO4QycyxlQVFQErVaLLl26KOuKi4tDDm4uLi5W7mE0GtGpU6eQewXeF1gX+LW4WH2uSlFRETp37lyv8z+JiIiIiIiIggk6zblW9/ODjQ79Uolko1YZXhRQ02Cj7EFd4HN6YLM6oPF4YbM64HW4GX4SUZvUogLQLl26oHv37vjkk09U1zdt2oShQ4cq09xHjhyJsrIy7Ny5U1lTXFyMffv2YeTIkcq1kSNH4rPPPoPH41Hdy2QyYcCAAQCAK6+8EgkJCfjHP/6hrPF4PNi8ebPqXkRERERERER1IQgCJEmEKAowxmmQV3hU9XpwtWewwGCjW/t1xslHJqDkkQk4+ch4zB7eHY4KlzLRnZPdiaita1Yt8A6HA9u2bQMA/PTTT6ioqFDCzkGDBiElJQUzZ87EvHnz0LVrVwwePBibNm3C3r178frrryv3GTBgACwWCxYsWID58+dDr9fj2WefRa9evTBhwgRlXXZ2Nj7++GM88MADuOuuu3Dw4EHk5+dj7ty5Spiq1+sxbdo05OXlISUlBZmZmXjzzTdhtVqRnZ19Ab87RERERERE1JpIkghBp4ExTgOrw4NkgxZnHd6QMz2BqmrPbWHO9by9X2f0TDXCbnNAIwM2l4dhJxFRNYLcjH4y/vjjj8jKygr72quvvorBgwcDAN59912sW7cOJ06cQFpaGv74xz9izJgxqvU2mw3Lli1DQUEBvF4vLBYLFi1ahIsuuki1bteuXcjNzcX+/fuRkpKC3/zmN5g6dSoEQVDWyLKMF154ARs2bMCZM2fQp08f5OTkKFWideXz+XHmTGW97tFcaTQikpPjcfZsJc+koFpxv1C0uFcoFtwvFC3uFYoF9wtFK9JeEQQBoihAEIC4eL3qrM+MVCP2zhuNix8tCBuCDupqRsG0IdBJEsocHuVcT9ntZWt7C8efLRQt7hW1lJT4qIYgNasAtK1hAEpUhfuFosW9QrHgfqFoca9QLLhfKFrV90r1ak+jTsJTnxfhsYKDqvd9cM/V2PWTFY8VHAq55+LxmZg9vDt8Tg9EUYDfL7Pas5XgzxaKFveKWrQBaLNqgSciIiIiIiJqDQJNhYJQ1epuSDhf7akRBRQvzEJeYXHI+3I27cf26cMgQN3qPsuShvljMuCocJ0715PBJxFRtBiAEhERERERETWQQKWnIU6L0goXkhIMcHq8yN16RKnqvLxjIkor3GHb3A+UVmDk6i+wY8Zw5GT1VLW6BwYbERFRbBiAEhEREREREdVDNOd6Bld7lthc6JCgg9mgDRuClthcMOokVJTZoREEDjYiIqqn2pvkiYiIiIiIiCiEJInQGHRINBvg0UrQGHVKpWcg2DRoJZTYXKqg81SlG58ePIUZw7uHve8sSxrsTi/8fhk+n5/hJxFRPTEAJSIiIiIiIopR4FzPFTuK0WlpAfo+tQ2iIISc6xlc7RksZ9N+zLSkYdG4nsprZoMWi8dnYv6YDMhu7wX7LERErR1b4ImIiIiIiIhiJOg0yN16uNZzPYOrPR//9Pxk9wOlFRi1+gu8NfkqLByXybM+iYgaEQNQIiIiIiIioigEzvqUZRnGOA3yCo8qr9V0rmdgsrsoqCe7396vM3qmGmG3OaCRwbM+iYgaCQNQIiIiIiIiohoEJrsb4zSwOjxINmhx1uGNeK5ncKUnUFXt+daeE5g7Ig05WT1Q5vCy2pOI6AJiAEpERERERERUTTST3atXe+Zs2o9t04cBAFbtOKpUes6ypCF7UBc4Klyw2d3QiJzsTkR0ITEAJSIiIiIiIjqnerWnUScpk90Djpy2Y/P3v2CmpbvqevC5novGZaLM6YEpTgu706Oq9PT5GHwSEV1IDRKAyrKMM2fOAABSUlIgCEJD3JaIiIiIiIioUQUqPf1+GaIowJBwvtpTIwooXpgVMtkdOH+up4Dw53o67U60NxlhtVbC62GLOxFRU6pXAHr48GGsXLkS//rXv+B0OgEAcXFxGDFiBGbMmIHMzMwGeUgiIiIiIiKihlS90tNs0MLp8amqPSNNdgeqqj1Hrv4CO2YMR05Wz5Ap7oG6IHa5ExE1vToHoN988w2mTp0Kv9+PrKwsdO/eHQBQXFyMLVu2YPv27XjxxRcxcODAhnpWIiIiIiIioloFV3UGztmsqdIz+FzP4GrPmia7B1436iRUlNmhEdTnemo04oX5sEREVKs6B6BPPPEEUlJS8Prrr6NTp06q106ePInf/OY3WLZsGd5///16PyQRERERERFRddWDzrBVnS4fZMgw6CNXegKAQSuhxOaKerI7AMyypMHu9MLvlwGw1JOIqLmq83+SOnz4MP7nf/4nJPwEgE6dOuGuu+7C4cOH6/VwRERERERERNVJkgiNQYdEswEerYREswE6ox6GhDis2FGMTksL0GlpAUat/gKQBDxbeFS5dvlfPodOI4Wc6xlc7RksZ9N+zLSkYdG4nsprZoMWi8dnYv6YDMhu7wX73EREVDd1rgDt3Lkz3G53xNc9Hg86duxY19sTERERERERKQLVnoIAxMWr29fNBi22TR+G9/aeVFV15oztiac/P6Kq3gxX6QlErvYMnuy+cFxmyFmfgcnuRETUfNW5AvT+++/Ha6+9hv3794e8tm/fPrz++uuYOXNmvR6OiIiIiIiI2rbq1Z4ao05pXw+EmBpRQHqqUVXV2S5eh3GZ7bBqx1HV/SJVegJV1Z6zLGlYPD5TVe0ZmOxutzmg8XhhszrgdbgZfhIRtRB1rgD99ttvkZqailtuuQUDBgxAt27dAABHjx7Fnj170LNnT+zZswd79uxRvW/RokX1emAiIiIiIiJqvWoaVqQRBRQvzAppX++YqA+Z1h7uGlDzuZ4HSivw1p4TmDsiDTlZPVDm8LLak4ioFahzAPr6668r/3vXrl3YtWuX6vWDBw/i4MGDqmuCIDAAJSIiIiIiohBhBxhVG1Z0ecfEsKFmuGntNU1wz9m0H9unD4MoCFhZWKx8vVmWNGQP6gJHhQs2uxsaUT3ZnYiIWqY6B6AHDhxoyOcgIiIiIiKiNkqSRFWlp9XhQUaqEXvnjVZVe0YKNcNVdTZEpafPx+CTiKg1qPMZoERERERERET1IQgCJEmEqNMgd+th1bme4YYVBYea1eVs2o8HRmeozu9ctuVQyLXABPfsQV3gc3pgs/JcTyKi1q7OFaBEREREREREdRHc7l7m9MAcp0Ve4VHVmkjVnjmb9mPb9GEAgFU7zk+Bv71fZ+hFAXMs3VVVnU6XL+QaKz2JiNqWqAPQ3r17QxCEmL9AuCnxRERERERE1HbUNNjokqQ4fDRlUNTDig6UVmDU6i/w1uSrsHBcJsocHiXUtJ8LNb0OT8j5neGuERFR2xB1AHr//feHBKAFBQU4fPgwLBYL0tLSAABFRUXYsWMHevbsiXHjxjXs0xIREREREVGLEc1gI40ooH2Mw4pu79cZPVONsNsc0MgICTVlWQ6p6gx3jYiI2oaoA9CZM2eqfv/222/j9OnT+Pjjj5Genq567ciRI7j77rvRoUOHhnlKIiIiIiIialGiHWzUEMOKiIiIalLnIUj5+fn47W9/GxJ+AkBGRgZ+85vf4MUXX6zXwxEREREREVHLERhqJAgChCgHGwFVlZ4zLWlYNK4nhxUREVGDq/MQpJKSEmg0kd+u0WhQUlJS19sTERERERFRM1b9XM/qre6AEPVgo8C5nsuvvxQl4zJhDTrXk8OKiIiovupcAdqzZ09s2LABP//8c8hrJSUlePPNN5GZmVmvhyMiIiIiIqKmFVzVCVS1tmsMOiSaDfBoJZjMBhgS47Bix1F0WlqATksLMHbNTvxSEVrpGdzuXt2B0gp8c9wKu9PDSk8iImpQda4AzcnJwe9//3tcc801GDduHLp16wYAOHr0KD777DPIsozly5c32IMSERERERFR46qtqtPp8kGnlZC79ZByrufH2YPw1bGzeLzg/Pmdh36pRLJRG9Ngo1mWNMwfk8GzPYmIqMHVOQAdOHAg3nnnHTz33HP49NNP4XQ6AQBxcXGwWCyYOXMmevXq1WAPSkRERERERI2j+rT2ZIMWPhnI3XIYeUEh5bbpw/De3pPKBPd28TqMzkjF5A27VffjYCMiImpO6hyAAkBmZiaef/55+P1+nDlzBgCQkpICUaxzZz0RERERERE1suqVntWntYer6tSIAtJTjaoJ7h0T9SitcIdUeQJVlZ7bIlR6Zg/qAkeFCza7GxpRgM3lgSzzfE8iImoc9QpAA0RRRLt27RriVkRERERERNRIqld6mg1aOD0+5G49UmtVZ7iwM9JQI6Cq0vOG9V+jYNoQ5GT1RBkHGxERUROpcwC6atWqGl8XBAF6vR4dO3bE1VdfjYsuuqiuX4qIiIiIiIjqoLZKz4xUI/bOGx1VVWe4sLOmVncAuLZXB3g9frhsLlZ6EhFRk6lXABqYAlj9/8CqX5ckCbfddhsWL17M9ngiIiIiIqJGFk2lJwAYtBJKbK6oqjojhZ2BVndBgBKsVh9qJMsyKz2JiKjJ1DkA3bZtG6ZNm4Y+ffpg8uTJ6Nq1KwDghx9+wOuvv47vv/8ezz77LOx2O1555RW8/fbb6NChA6ZPn95gD09ERERERERVAtWeggDExdde6QnEXtWZs2k/ds6yqM71LLG58PaeE5g3KgMLsjIjtroTERE1lTqXYy5duhTp6elYtmwZLr30UiQkJCAhIQGXXXYZli1bhm7duuHpp59Gnz59kJubC4vFgg8//LAhn52IiIiIiKjNkyQRGoMOiWYDPFoJGqNOqfQMhJrhKj0BddgZLGfTfsy0pOHh8T1hNmgBAGaDFrf36wy9KGCOpTtOPjIeJY9MwMlHxuM3/TvDY3fDZrVD4/HCZnXA63Az/CQiomahzgHol19+iauvvjri61dffTV27Nih/H7UqFE4ceJEXb8cERERERERoarSU5JE5VdDgh4rdhSj09IC9H1qG0RBqLHSs7qcTfsxy5KGxeMzldeDqzpPPjJBCTpnD+8Oe4XrXNjpCAk7q1rd/Tznk4iImpU6B6A6nQ579+6N+Pq3334Lrfb8/7l6vV4Yjca6fjmVzz77DLfddhsGDBgAi8WC2bNn4/jx4yHr3n33XVxzzTXo27cvbrzxRmzdujVkjc1mw4IFCzBo0CAMGDAAs2bNQmlpaci6Xbt24Y477sAVV1yBMWPG4IUXXuD/qRMRERERtVLBIaP6uvrXmtY29HNUr/RMNBsgGbSqas9IA4wiVXoCVdPa39pzAnNHpMVU1cmwk4iIWoo6B6DXX389/u///g9PPvkkjh07Br/fD7/fj2PHjiE3NxcfffQRrr/+emX9V199hR49etT7gb/66ivMmDEDPXr0wPPPP48FCxbgwIEDmDJlCpxOp7Ju48aNePjhhzFx4kSsW7cO/fv3x4wZM7Bnzx7V/ebMmYMdO3ZgyZIleOqpp1BcXIypU6fC6/Uqa3744QdkZ2ejffv2WLt2Le6++26sXLkS69evr/fnISIiIiKi5iNcyKgx6KDVStAYdDAkGFBa4YIhwQCdUQ+tMXStJFX9MSuWYLT62urPYTIbYEiMw4odR9FpaQE6LS3A5X/5HDqNpKr2jLXS02zQYvH4TGQP6gKf08OqTiIiapXqPATpwQcfxKlTp/DSSy/h5ZdfVqa7+/1V/8c4YcIEPPjggwAAl8uFyy67DFdeeWW9H3jjxo3o3LkznnjiCeVfDlJSUnD33Xfju+++w8CBAwEAK1euxHXXXYc5c+YAAIYMGYKDBw/i+eefx7p16wAAu3fvRmFhIfLz82GxWAAAaWlpmDRpEjZv3oxJkyYBAPLz85GcnIxnnnkGOp0OQ4cOxZkzZ7BmzRpMnjwZOp2u3p+LiIiIiIiaVqCdPHh4kNmgxaPX9EL24K7I3XIYeecG/wzqasY/7xuCZ7cVK9cCk88fGpMBp8cHg/78BHa70wvZ7YXP51eGFfn9ctXQourT2l0+6LQScrceUp7j4+xB+OrYWTxeywT3mgYYBVd65mT1QJnDG3ZYEae1ExFRa1PnAFSv12PFihXYt28f/vWvf+Gnn34CAFx88cWwWCy47LLLVGtnzJhR/6dFVSt9fHy86r+iJiYmAoDyXySPHz+Oo0ePKgFswKRJk7B8+XK43W7odDps374dJpMJw4cPV9akp6ejT58+2L59uxKAbt++HePHj1cFnZMmTcLatWuxe/duDB48uEE+GxERERERNZ7g4DG4mjFwXdRpkLv1MB4LChmtDg+6mA1YtuWQKnzMGdsTT39+RBUyWh0evPPtCcwdlY5nC4/WGowmG7TwyVAFq2aDFtumD8N7e08qz9EuXofRGamYvGG36vOEm+AOVFV6bps+DACwasdR1TNkD+oCR4ULNrsbGlGAzeVhZScREbV6dQ5AAy699FJceumlDfEsUbnlllvw4Ycf4o033sCNN94Iq9WKZ555BpdeeqlSYVpUVASgqpozWEZGBjweD44fP46MjAwUFRUhLS0tpCUlPT1duYfdbsfJkyeRnp4eskYQBBQVFTEAJSIiIiJqxiRJDKmytDu9ELw+yBoJxjgNypwemOO0yCs8qnpvu3gdxmW2w71v76nxWsCySX2iDkbDVXVqRAHpqUZVW3s053oGf70DpRUYtfoLvDX5Kiwcl4kyh4eVnkRE1KbVOwC90AYOHIhVq1bhgQcewKOPPgoA6NOnD1588UVIkgQAKCsrAwCYTCbVewO/D7xeXl6uVI8GS0pKwnfffQegakhSuHvpdDoYDAblXnWl0dT5GNZmLXDuUeBXoppwv1C0uFcoFtwvFC3uldZHEKqqOmVZhiAI0BkCbe3FEdvaL0mKw0dTBoWEjOHCx0iBZCzBaKSqznD3jlTpCVRVe26fPgyiIGBl0Oe7vV9n9EyNh8vuhF4GHBVeQK763rTWP4M0N/zZQrHgfqFoca/UTdQBaO/evWOeaigIAvbt2xfzQ9Vk165d+NOf/oTbb78do0ePhtVqxerVq3Hfffdhw4YNiIuLa9Cv15hEUUBycnxTP0ajMpkMTf0I1IJwv1C0uFcoFtwvFC3uldah0u2FVhRhPVfR6fT6zrW1H1TWhGtr14gC2ocJGcOFj5ECyViC0Uhrw907mnM9/zgqHQuyeqLM6UFSnBYevx8GnQSDzliP7yY1BP5soVhwv1C0uFdiE3UAev/998ccgDaGxx9/HEOGDMFDDz2kXOvfvz9Gjx6NDz/8EHfccQeSkpIAVFVvtm/fXllXXl4OAMrrJpMJJSUlIV+jrKxMWROoEA1Ugga43W44HA5lXV34/TLKy+11fn9zJkkiTCYDyssdSosNUSTcLxQt7hWKBfcLRYt7pWUSRQHQSIjTa2F1uJFs0MEny1i+9YhS6ZmRasTeeaNV7eRA+EAyUsgYuD7T0l05kzPS2liC0UhrI907cK6nIEA1oKnqXM+ucDtcsDvd0AkC7BVeyDLgrnTV/xtNdcafLRQL7heKFveKmslkiKoaNuoAdObMmfV6oIZy5MgRZGVlqa517NgRycnJOHbsGAAo53UWFRWpzu4sKiqCVqtFly5dlHU7d+5UWmMCiouLkZmZCQAwGo3o1KmTciZo8BpZlkPOBo2V19u6N6vP52/1n5EaDvcLRYt7hWLB/ULR4l5p3qpPTtcZdKpp7dFOSQciV19GGh503OrAQ2N7QsD5NvNlWw7hn/cNUbWee/0yik7bMcuShkeDKk5jrerM2bQfO2dZVPcusbnw9p4TmDcqAwuyqp/r6eQfgpsx/myhWHC/ULS4V2LT4s4A7dy5c0hb/U8//YSzZ8/i4osvBgB06dIF3bt3xyeffIJx48Yp6zZt2oShQ4cq09xHjhyJ1atXY+fOnRg2rOpfdIqLi7Fv3z78/ve/V943cuRIfPbZZ3jwwQeh1WqVe5lMJgwYMKBRPy8RERERUVsWboCR0+ND7tYjdZ6SHul6YHjQ8usvRcm4TFiDQkZXpQuzh3dHTlZPlDs9MMVp4XR5McfSHTlZPVDm8CLJoIHT5cP8MT0AoNZgNFJV5+39OkMvCiH3tju98NjdcPllTnAnIiKKQb0D0JKSEuzbtw82my3s//nefPPN9f0SKnfeeSeeeOIJPP744xg7diysViv++te/IjU1FRMnTlTWzZw5E/PmzUPXrl0xePBgbNq0CXv37sXrr7+urBkwYAAsFgsWLFiA+fPnQ6/X49lnn0WvXr0wYcIEZV12djY+/vhjPPDAA7jrrrtw8OBB5OfnY+7cuUqYSkREREREDUuSRBgS9KpKz3Bt7bFOSQ/X1h5woLQC3xy3wtLVDI3Hqw4ZPT44vF60N8fDaq2Ex1NVeeN1eFSBpE8Sz4WlNQejNVV12s9Na69+7wBOcCciIopenQNQl8uF+fPnY/PmzfD7/cqURQCqdvKGDkB/97vfQafT4c0338T777+P+Ph49O/fHytWrEBycrKy7vrrr4fD4cC6devwwgsvIC0tDatWrQqp2FyxYgWWLVuGxYsXw+v1wmKxYNGiRdBozn9runXrhvz8fOTm5uK+++5DSkoKZs2ahSlTpjToZyMiIiIiovPt7qJOc26AUc1t7XWZkh6urT1wpub8MRlwnAsgqwtkkMG1H7IsqwJJn88PONywOWsPRmur6qx+byIiIoqdINexZ2LZsmV4/fXXMXv2bAwYMACTJ09Gbm4uOnTogFdeeQWlpaV48sknlbM0KZTP58eZM5VN/RiNQqMRkZwcj7NnK3kmBdWK+4Wixb1CseB+oWhxrzSd4HM9ZVlWtbuXOT0wx+nQcelmVajZLl6H4oVZ6PLYp6rrH9xzNf7fj9aQ8zQBYOXNl2PylRcjTiepgkfB64Oskaq+XtB12e2NeKZmQ+yX6p+bWif+bKFYcL9QtLhX1FJS4qMaglT7igj++c9/4pZbbsF9992HHj2qWjkuuugiDBs2DGvXrkViYiLeeOONut6eiIiIiIhaKUkSoTHokGg2wKOVkGg2QGfUw5AQhxU7itFpaQHGrfkSJ8qdNba1B8vZtB8zLWl4eHxPmA1V5/abDVosHp+J7EFd4HN6YLM6qtrarQ54HW54PD54He6Q6409UKiqqtPP8JOIiOgCqXML/OnTp3HFFVcAAOLi4gAADodDef2aa67B888/j6VLl9bzEYmIiIiIqKULVD0KAhAXrz7X02zQYtv0YXhv70ml3V0jCmgfQ1t7zVPSz7ezh2snZ5s5ERFR61bnALRdu3Y4e/YsAMBgMCApKQnFxecPIq+oqIDL5ar/ExIRERERUYtRU1u71eGBUSepJrgDVWFneqpRNdgo0gAjoGpQ0Vt7TmDuiDROSSciIqJa1TkAveKKK7Br1y7l92PGjEF+fj7at28Pv9+Pl19+Gf3792+IZyQiIiIiomauetBpNmjhdPmg00rI3XoIeYVHoREFFC/MUgWdQOQp7jmb9mPb9GEAgFU7jqoGFWUP6gJHhQs2u5tT0omIiKhGdQ5AJ0+ejE8++QRutxs6nQ6zZ8/G7t278ac//QkA0LVrVyxcuLDBHpSIiIiIiJonSRJhSKi9rf3yjolhg85IU9wPlFZg1OovsPz6S1EyLhPWGNraiYiIiALqHIAOHDgQAwcOVH7fqVMn/OMf/8DBgwchiiLS09Oh0dT59kRERERE1IwFt7oLOg1ytx6uta09UtBZW7v7N8etsHQ1Vw0qYls7ERERxajOU+DD3kwU0bt3b2RmZjL8JCIiIiJqhcJNcDfGaZFXeFS1Llxbe6QJ7kBVu/sDozOweHxmyBT3+WMy4Hd7OTmdiIiI6qReKWVFRQU2bNiAr776CqdPn8ajjz6KK664AlarFR988AHGjh2Lbt26NdSzEhERERFRE6hpgvvQ7sl4+7dXRd3WHulcz9v7dYZeFDDH0j1ksFFwuzsRERFRrOocgJaUlOC3v/0tSkpK0K1bNxQVFaGyshIAYDab8dZbb+Gnn37CokWLGuxhiYiIiIjowolmgvuhXyqRbNRG3dYeONfzrclXYeG4TJQFnetpPxd0eh0eTnEnIiKiBlPnFvjly5ejsrIS//d//4fXXnst5F9Mxo0bh507d9b7AYmIiIiI6MIQBKEq9Dz3qyFBjxU7itFpaQH6PrUNoiCETHCvS1v77f06o2eqEXabo+pcT6sDXodbqfKUZZnt7kRERNRg6lwBumPHDtx9993o0aMHzp49G/J6ly5dcPLkyXo9HBERERERNY7gIUaiKKgqPc0GLZwen6raM9IEd+B8W7soCFhZWMy2diIiImpW6hyAOp1OpKSkRHw90A5PRERERERNJzjolGU5pK092aCFTwZytxxG3rnwMiPViL3zRkc1wR2oamu/Yf3XKJg2BDlZPdnWTkRERM1KnVvgMzIy8O9//zvi659++ikuvfTSut6eiIiIiIhiVL2Fvfq0dp1RD0NCnNLW3mlpAQoOncKyLYfwWMFBJdg0aCWU2FxRT3AHgGt7dYDX44fNamdbOxERETUrda4Avfvuu/HQQw+hV69emDhxIoCqf6n54YcfsGrVKuzZswd5eXkN9qBERERERFSlLlWdZoMW26YPw3t7Typt7e3idRidkYrJG3ar7h/rBPdZljTMH5MBR4XrXNDJkJOIiIiajzoHoDfddBNOnDiB5557DitWrAAA/P73v4csyxBFEXPnzsW4ceMa6jmJiIiIiNq86kGn2aCF0+WDTishd+sh5BVWhZIfZw/CV8fO4vGgae0aUUB6qlHV1t4xUR/2XM+6THDnmZ5ERETUXNUpAHU4HPjNb36D2267DQUFBdi8eTN++OEH+P1+dO3aFRMmTECXLl0a+lmJiIiIiNqU6oOKDAl65G49rASdsVR1hgs7azrXM2fTfmyPMNhImeAug2d6EhERUbNXpwDUYDDgxx9/hCAI6Ny5M+65554GfiwiIiIiorYl1qnsQGxVneHCzkiVnkBVtedbe05g7og0TnAnIiKiFq3OQ5BGjBiBwsLChnwWIiIiIqI2p/qwIpPZAENiHFbsOKoMKrr8L59Dp5FUQSdQe1VnsEhDjHI27cdMSxoeHt9TeY/ZoMXi8ZnIHtQFPqcHNqsj7GAjIiIiopagzmeATp8+HbNnz8aDDz6IO+64A126dIFerw9ZZzab6/N8REREREStliSJIW3t4c7vDDeVHYi9qjNn037snGVRtbWX2Fx4e88JzBuVgQVZkc/15GAjIiIiaqnqHIBed911AIDDhw/j73//e8R1+/fvr+uXICIiIiJqdYJb3QWdBrlbD9d5KnuksDMwrV0QoDov9PZ+naEXBcyxdA9pa/fY3XD5ZWhEged6EhERUatS5wD0/vvvhyAIDfksREREREStSm3negIC8gqPKutjncoOxF7VaT9X1el1eMKGnaz0JCIiotamzgHozJkzG/I5iIiIiIhaDUkSVWFnskELnwzkbjmMvHMh5dDuyXj7t1c1yFT2ulR1yrLMsJOIiIjahDoHoEREREREVKV6pWc053oe+qUSyUZtg0xlZ1UnERERUWR1ngJPRERERNTWCIJQVd157iio6hPcE80GSAYtcrcewWMFh2B1eJRzPVcFtboDjTOVvaqq08/zO4mIiIiCsAKUiIiIiKgW1VvazQYtnC4fdFoJuVsPKZWeGalG7J03GnmFxcp7I53rCZwfVsSp7ERERESNhwEoEREREVEYgbZ2QQDi4tUt7WaDFtumD8N7e08qE9wBwKCVUGJzRX2u54HSCtyw/msUTBuCnKyeqrCTU9mJiIiIGgZb4ImIiIiIglRva9cYdaqWdgDQiALSU42qSk9AHXYGRGp1D7i2Vwd4PX7YrPaQtna2tBMRERHVHwNQIiIiIqJzJEmEIUGPFTuK0WlpAfo+tQ2iIIQEnZHa2utyruf8MRmQ3V6GnURERESNhC3wRERERNSmnZtnVPWrVoPcrYeVtvbLOyaGDTpramvP2bQf2+t4ricRERERNTwGoERERETUJgUGGxnitCitcCEpwQAAyAua1h4p6Ayu9Hz800Oq+x4orcBbe05g7og05GT1QJnDy3M9iYiIiJoQW+CJiIiIqM1Rt7pvRsclmzF2zRf4pcIVMeisLmfTfjwwOgOLx2eGtLVnD+oCn9MDm9XBcz2JiIiImhgrQImIiIiozQhMdhd16lZ3ADj0SyWSjdqQas+cTfuxbfowAMCqHeenwN/erzP0ooA5lu4hlZ7Bbe0+H4NOIiIioqbEAJSIiIiIWr1Au7sxToMypwfmOK2q1R2I3NZ+oLQCo1Z/gbcmX4WF49Tnd9rPBZ1eh4dt7URERETNFANQIiIiImrVAu3uuVsPI6/wKC5JisNHUwaFDC8Czld7Bg8wClR79kw1wm5zQCMjJOisamtn8ElERETUHDEAJSIiIqJWJ9Dq7vfLEKq1u2tEAe0jTHA/UFqBG9Z/jYJpQ5CT1ZPT2omIiIhaAQ5BIiIiIqJWQ5JEaAw6JJoN8GglJJoNMFZrd69psBEAXNurA7weP2xWe8gAIyIiIiJqeVgBSkREREStQvVWd6vDg6Hdk/H2b68KqfSMNNholiUN88dkwFHhYls7ERERUSvBAJSIiIiIWrS6THYPDDZafv2lKBmXiTKnB6Y4LexOD1vdiYiIiFqZFtsC/8EHH+Dmm29G3759MXjwYPz+97+H0+lUXt+yZQtuvPFG9O3bF9dccw3ef//9kHu43W48+eSTGD58OPr37497770XRUVFIeuOHDmCe++9F/3798fw4cOxfPlyuN3uRv18RERERM2FIAhVU9QFoakfRSW43d2rk0Ja3YGa290PlFbgm+NWOF0etE/Qw1HBVnciIiKi1qhFVoD+9a9/xbp16/CHP/wB/fv3x9mzZ7Fz5074fD4AwDfffIMZM2bg1ltvxYIFC/Dll19i4cKFiI+Px7XXXqvc5/HHH8emTZvw0EMP4aKLLsKaNWtwzz33YOPGjUhMTAQAlJWV4e6770b37t2Rl5eHn3/+Gbm5uXA6nVi8eHGTfH4iIiKi+goeEhQ8zTyYJIkQdBoY4zRKi7jd6YXs9jZ5SNgQk90D7e5uR9V/2I7wbSAiIiKiFq7FBaBFRUVYtWoVVq9ejVGjRinXr7nmGuV///Wvf8UVV1yBRx99FAAwZMgQHD9+HCtXrlQC0JKSErz33nt45JFHcOuttwIA+vbtizFjxuCtt97C1KlTAQBvvfUWKisrsWrVKpjNZgCAz+fD0qVLMW3aNFx00UUX4mMTERERNYjaQs1AMCoIQFy8+jzN6mdk+v1yrSFqY2nIye7NrLCViIiIiBpYi2uB/9vf/oZLLrlEFX4Gc7vd+Oqrr1SVngAwadIkHDlyBD/++CMAoLCwEH6/X7XObDZj+PDh2L59u3Jt+/btGDp0qBJ+AsDEiRPh9/uxY8eOBvxkRERERKEasv08UDW5YkcxOi0tUP56bsdRGBP00BrPT0/XGHXI3XoEjxUcUgJFq8ODRwsOIv/r45AMWtWkdY1BB0lq3H+1DHwvRFGAMU7Dye5EREREFJUWF4B+++23yMzMxOrVqzF06FBcfvnluPPOO/Htt98CAI4dOwaPx4P09HTV+zIyMgBAOeOzqKgIqampSEpKClkXfA5oUVFRyL1MJhPat28f9rxQIiIiotqECzWrXws+37K+IaMSHAZVTQaHmu98ewIuv4xnC4+i09IC9H1qG0RBQF5hcci9endIwJ39O+OZ7aEhqiFBH/H5IgW5dfpeJBlgdXjCTnafaUnDonE9YTZoAQBmgxaLx2di/pgMyG7vucnu/gtesUpERERETafFtcD/8ssv+O6773Dw4EE88sgjMBgMWLNmDaZMmYLNmzejrKwMQFVIGSzw+8Dr5eXlyjmf1dcF1gTWVb8XACQlJanW1ZVG0+Iy6KgE/vDT2JUg1Dpwv1C0uFcoFs1hvwhCVZgnyzJkGRBFAdBIiNNrYXW4YTbo4HZ74QcQp9Oormk00rn28+pnVvaA21HVfl6b4K9X5nTDHGZIEAAsm9QHT39+BI9/WtVOfnnHRJRWuMOep7lsUh+sLCxW1gLnK0MBYK6lO0SPV/ncghD6mZ0uD0S/H35RrNP3IiPViL3zRtc62d3q8CDJoIXD5YHb4YYghP93r+awV6jl4H6haHGvUCy4Xyha3Ct10+ICUFmWYbfb8dxzz6F3794AgH79+mHs2LF4/fXXYbFYmvgJoyeKApKT45v6MRqVyWRo6kegFoT7haLFvUKxaKr9Uun2QiuKsDo9MMdp4fX7IQoClm05H+QN6mrGP+8bgme2FamCzm3Th+G9vSfx2LlQEVCHjA+OyUC8rvZ/jXN6fMrXizQkqF28DuMy2+Het/co10psLnQIc55muLXBPvm+FA+MzoBer1U+t0+WkbtFHeQ+ek0vZA/uqroey/fiyGk7Nn//C2ZauitngAYEJruP6ZGKDol6AIBeo6/1ewXwZwvFhvuFosW9QrHgfqFoca/EpsUFoCaTCWazWQk/gaqzOy+99FIcPnwY1113HQDAZrOp3ldeXg4ASsu7yWRCRUVFyP3Ly8tVbfEmkynkXkBVJWn19vlY+f0yysvt9bpHcyVJIkwmA8rLHTxbi2rF/ULR4l6hWDT0fglXvRmukjHZoINPlrF865Fag7ycsT1VlZdA1TCf9FRj2PZzAFhZWFw1yKesEsD56lJAXXEqaDV4pvCo8vUiDQnqmKgPqfYMPk8z+NnCrQ3o3SEBH08ZhKc+P/+5P84ehK+OncXjBepq0S5mA5ZtOaS6Huv3ImfTfmyfPgwCwk12r6qSPVvpCvs9rI4/WygW3C8ULe4VigX3C0WLe0XNZDJEVQ3b4gLQHj164NixY2Ffc7lc6Nq1K7RaLYqKijBixAjltcB5nYHzPNPT03Hq1KmQILP6mZ/p6ekhZ33abDb88ssvIWeD1oXX27o3q8/nb/WfkRoO9wtFi3uFYlGf/RI8EV1n0IVMRA9XyRgu9AsX5EWqpqwpZAy8bnd7YYw3nAtitXC6fJAhw6A/P9kdUJ/hGSnUjFTtmbNpP7ZNHwYAWLWj6jM7PD50TNSHnbS+bFIf5AW1xreL12F0Riomb9itWhfuc9fle3GgtAIjV3+BHTOGh5ns7qzTHwj4s4Viwf1C0eJeoVhwv1C0uFdi0+IODBgzZgysViv279+vXDt79iz++9//4rLLLoNOp8PgwYPxz3/+U/W+TZs2ISMjA5dccgkAwGKxQBRFbN68WVlTVlaGwsJCjBw5Urk2cuRIfPHFF0oFKQB88sknEEURw4cPb6yPSURERBdQbUN3Ik1ED1QyPlZwEFaHRwn9VlU7ZzNckBcp3AsOJKvr3SEB26cPw4p/FaPT0s3otLQAo1Z/AUiCMsCo09ICjF2zE79UuKIaEuT1yyg6bccsS5pqbeA8zVv7dcbJRyag5JEJ+O7B0XB7fSFrAwHmqh3nP3ekz9dQ34vA60adhIoyTnYnIiIioshaXAXouHHj0LdvX8yaNQtz586FXq/HCy+8AJ1Oh//5n/8BAPzv//4vfve732HJkiWYOHEivvrqK/z973/Hs88+q9ynY8eOuPXWW7F8+XKIooiLLroIa9euRWJiIu68805l3Z133onXXnsN999/P6ZNm4aff/4Zy5cvx5133omLLrrogn9+IiIiajiSJELQaWCMO1856XT5oNNKyN16CHmFR6ERBRQvzAppww5XtRhNkBd4LVLlZaRKTeDcAKIdxbW2jh/6pRLJRm2UQ4I0cLp8mD+mBwCo2slv79cZPVONsNsc0MiAzeWBKAqYPyZDtbZn+3ictaunskf6fA31vQCAWZY02J3ecwOhONWdiIiIiMJrcQGoKIp44YUXsGzZMixevBgejwcDBw7EG2+8gfbt2wMABg4ciLy8PKxYsQLvvfceOnfujMcffxwTJ05U3WvRokWIj4/H008/jcrKSlx55ZV46aWXVNPhk5KS8Morr+Cxxx7D/fffj/j4eNx6662YO3fuBf3cRERE1LAkSYQhQR/S1n7+rM6aJ6KHCztjCfJqCvdyNu3HzlkWiML58y0zUo2Y0Kt9VK3jNd07MCTI0tVcVTXp8kCWZfgkEbOHd0dOVg+UObxB7eQuVUWlzyfDUeEKWQsIqs8d6RkC14MHGMX6vTh/1mcGHBXRnfNJRERERG1XiwtAASAlJQV/+ctfalyTlZWFrKysGtfodDrMnz8f8+fPr3FdRkYGXn755Vgfk4iIiJoxQadB7tbDqini4c7qjKWSMdYgb9mWQ/jnfUNCwr3b+3WGXhQwx3I+ZDQbNDjr8EbVOh74etumD6sxOFQHm37A4YbN6YFGFJRgNJxwa6W4qns/Gjzk6dwzCAJUIfNxqwMPje2pGmAUy/ciUjhLRERERBROiwxAiYiIiOpDEAQY4zTIi+KszlgqGYHIoV+kIM/p8oUN9+znwj2v41zI6PbAnGSIqnUcqKr0vGH91yiYNiTMkKDIwaEsy/D5omsnD14ru70hrfElNhfe3nMC80ZlYEFWpuoZXJWhVaRRfy9qCGeJiIiIiKpjAEpERERtjigKsDo8IaFhLBPRI1Uy1hT61RTkRQr3gkNGu9OrqrKs7YzMa3t1gNfjh8vmavTg0Ofzh22Ntzu98NjdcPnl0Gfw+MJWnEbzvSAiIiIiihYDUCIiImrRBEGAKArw+2VVWHZuoLvya/BaWZZhNoQOCYoUKAaGB701+SosHFd7JWONoR/CB3nRhHvhqiwjtY4Ht7pfqOCwtjb6cM9Q1+8FEREREVG0GIASERFRixRugrvd6YXg9UHWSDDEaVFa4UJSggGCywsZMgz682t/trliGroTbiJ6bZWMQPjQr64iVVlGah1vqjMyGWASERERUXPCAJSIiIhajEAFpyAAcfGhE9wfvaYXsgd3Re6Ww8g7F14O6mrGP+8bgme3FSvXzAYtlkzIRE5WzwYbutMcqix5RiYRERERUSgGoERERNTsVa/2NOok5G49oho+ZHV40MVswLIth/B48FCisT3x9OdHVJWeVocHcz78L0RBwNwRaS1y6A5bx4mIiIiIoiM29QMQERER1USSRBgS9Fixoxidlhag71PbIAoC8gqLVevaxeswLrMdVgVNdleu7TiKcBb/83vE6SRUlDmg8XhhszrgdbiVKs+qQNHf7MJPIiIiIiKKHgNQIiIianYEQaiq+hQECDoNcrcexmMFh2B1eNAxUY/SCnfIBPdw1yOtDbA6PChzeCEIAoNOIiIiIqJWigEoERERNRuSJEJj0CHRbIBHKyHRbIAxTou8oKrOEpsLHRJ0MBu0qveGux5pbYDZoEWSQQO/n8EnEREREVFrxQCUiIiImkxwpWf1VvdOSwswds1O/FLhUlVwnqp049ODpzBjeHfVvQLXZ1q617o2YJYlDXanl5WfREREREStGIcgERERUYMKTGr3+2VVsBh8XRQF1VAjs0ELp8cXMtjo0C+VSDZqYTZoVSFozqb92DZ9GABg1Y7zU+CPWx14aGxPCDg/2X3ZlkP4531DQqa9z7KkYf6YDDgqXBfum0NERERERBecILPkocn4fH6cOVPZ1I/RKDQaEcnJ8Th7thJer7+pH4eaOe4Xihb3SvNTW6hpd3oheH2QNZJyPdmghU8GcrccRt65QDIj1Yi980bj4kcLQs7r/OCeq/H/frSqprgDQO8OCXhr8lXo1T4BZQ6PMsH9/NfTotzpgSlOC6fLCxkyDHqNatq77PYqA4+o7eLPFooF9wtFi3uFYsH9QtHiXlFLSYmHJNXe4M4KUCIiIoqZJImqsDNcqGk2aPHoNb2QPbir6vrH2YPw1bGzeDyo0tOglVBic4UdVhSo9qxewXl7v87omWqE3eaARgZsLs/5ilOPDw6vF+3N8bBaK+HxVP3LodfhgUYU1GuJiIiIiKhVYwBKREREMQmc1Zm79TDyCo9GDDWtDg+6mA1YtuWQcr1dvA6jM1IxecNu1T2DhxVVD0EPlFbghvVfo2DaEORk9VRVezoqXBErOAP5ZnDOKcsyfD4Gn0REREREbQmHIBEREVGtgocVCToNcrcexmMFh2B1eJRQc1XQpHagKuwcl9lOdb1joh6lFe6QkLO2YUXX9uoAr8cPm9UOjccLm9UBr8PN9nUiIiIiIqoVK0CJiIgaQKTBP/Vd29Sqt7qbDVoAAvKiCDXDXa+p0jNn035sD9PqHjysiBWcREREREQUKwagRERE9RAuIAwerhPNkKBwayNNT78QgWng6wkCEBevbnUf2j0Zb//2qqhCzXDXgys9qw81OlBagbf2nMDcEWnIyeqhGlZUU6s7ERERERFRTRiAEhERxaimgDBQsfjQmAw4PT4Y9DUPCQq3NtL09ODANBCoNmRYWj3MNeok5G49gseCzvU89Eslko3aqELNwPWZlu6qewSGGgkCQr5v2YO6wFHhgs3u5rAiIiIiIiJqEAxAiYiIohRNQGh1ePDOtycwd1Q6ni08WuPk80hrI01PD1y/e+Al0GkkWB3uqMLSSJWT1atTgwcbaUQBxQuzkFdYrHpPpLAzUqh53OrAQ2N7QsD5tvYSmwtv7zmBeaMysCArM+JQI7a6ExERERFRQ2AASkREFIXqk88jBYQAsGxSHzz9+RElIIw0+TzcWiD89HSg6kzNO/p3xl+2HcGqoJAxUlgaODvTWemCLEOpCg3Xtu/0+FRh7uUdE8Oe6wmcDzuDz+qsKdR0Vbowe3j3kLZ2j90Nl19mpScRERERETUqBqBEREQRBFdIBk8+ByIHhIHJ5/e+vUe5FmlIULi1NV1fNqkP8gqLowpLA9Wlv76iEzLbJyjVok6XDzqthNyth5RKzYxUI/bOG60Kc2saVnSgtAI3rP8aBdOGICerpyrsjBhqenywOT1hw05WehIRERERUWNiAEpERFRNNJPPIwWEsUw+j2V6eqxhae8OCdg2fRjyCouxasf5atFt04fhvb0nVW37Bq2EEptL9fVqGlYEANf26gCvxw+XzRV1qMkJ7kRERERE1BTEpn4AIiKi5kAQBEiSCI2mqtV9xY5idFpagE5LCzB2zU78UhE5IAwWHHbWZW2k67GEpYC6WjTwmkYUkJ5qDGnbj/QcOZv2Y6YlDYvG9VReMxu0WDw+E/PHZEB2e8+Fmn62rxMRERERUbPFAJSIiNqUQNApCAKAqmpPjUGHRLMBHq0EjVGnnIUZCA6DJ58HCxcQev0yik7bMcuSFnbtw+NrXxs8PT0glrA0UBW6asdR1dpIYWmkgPZAaQVGrf4Ct/brjJOPTEDJIxNw8pHxmD28u2pYERERERERUXPGFngiImoTwg7+qXYeZqyTzwMB4VuTr8LCcecH/zhdPswf0wMAah0SFG5tuOnppyrd+PzIacy0dFe1rweHpYHrkYLOms71zNm0H9urDTYyG7S4vV9n9Ew1wm5zQCODw4qIiIiIiKjFYQBKREStXvUJ7pHOw4x18nlNAaFPEqOefB5pbfXp6WaDBmN7tFNC0UhhaaSgs6ZzPQ+UVuCtPScwd0RayHOw2pOIiIiIiFoyBqBERNQq1TTBHQh/HmZdJ5+HCwh9Pj/gcEc1+bzGtUHT08tdHoiiEFVY6pdlzLKk4dGCg6rnytm0HztnWUKC3FmWNGQP6gJHhQs2uzvsMxMREREREbVEDECJiKhViWaCOxC+Tby+k8/DiWXyeaS1wdd9PjmqsNTrBeaPyQCAkIpVvShgjiU0RA0OcjmtnYiIiIiIWgsGoERE1GKcm1uk/Hr+elW1pyAAcfHqVveh3ZPx9m+vivo8zECrOwCs2nFUVSE5f0wGHBWumELNxhJNWOqocIWtFrWfCzq9jvDVqURERERERK0JA1AiImpSwa3qwSFc8HVRFCDoNDDEaVFa4UJSggGy0wPB64OskZRqT6NOUia4BwRPcI+m2jPSYKOWeBZmbW34zSHIJSIiIiIiamwMQImIqEmEa1W3O70hoWayQQufDORuOYy8oFbuR6/phezBXZXrsU5wByKfh9naJp8z6CQiIiIioraMASgRETW4SFWdAZGmslcPNa0ODz7OHoSvjp3F40FVnVaHB13MBizbcki5XtcJ7tGch0lEREREREQtFwNQIqI2IJo288D1+qyNVNUpu73w+fzKejHMVPZwoWa7eB1GZ6Ri8obdqs/TLl6HcZntcO/be5RrdZ3gzvMwiYiIiIiIWjcGoERErUi0gWT1NnOzQQunywcZMgz6uq11unzQaSXkbj2kquqcZUnDQ2My4PT4YNBrUOb0wBynDZnKHi7UDDepPdL1+k5wZ5s4ERERERFR68QAlIiohQo3JKh6SBkukAzXZj6oqxn/vG8Int1WXOM5mzWt3TZ9GN7bezKkqvOdb09g7qh0PFt4FHmFxbgkKQ4fTRkUVagZqaqzpU9wJyIiIiIioguHASgRUTNXW1VnpCFBkQLJ6m3mAJAztiee/vyIqnIylrUaUUB6qjFkABEALJvUR7VeIwpoH2WoGamqM3B9pqW76vO1pgnuRERERERE1DDEpn4AIiI6TxCEqoDz3K8agw6JZgM8WgmJZgN0Rj0MCXFYsaMYnZYWoNPSAhQcOoVlWw7hsYKDSnAYKZAMtJmvCmo/V67tOFrntZFa1cOtDw41gwWHmsFyNu3HTEsaHh7fE2aDFgBgNmhx3OrAQ2N7YvH4TNV11QR3jxc2qwNeh5vhJxERERERURvFClAioibQUFWdkYYExXJ2ZkOsjdSSHml9pFb1QKgp4Py09hKbC2/vOYF5ozKwMCsTZU4PTHFa2J0euCpdmD2cE9yJiIiIiIgoshYfgFZWVmLixIn4+eef8d5776Fv377Ka++++y5efPFFnDhxAmlpaZg7dy7GjBmjer/NZsOyZcvw6aefwuPxYMSIEVi0aBE6dOigWrdr1y48+eST2L9/P1JTU3HXXXdh6tSpEAThgnxOImodwg0lCndW58fZg/DVsbOq1vNwVZ2xBpLhrjfE2kit6pHWB1rVl19/KUrGZcIa1KoeKdT02N3wiUB7czys1kp4PecCTo8PNicnuBMREREREVF4Lb4FfvXq1fD5fCHXN27ciIcffhgTJ07EunXr0L9/f8yYMQN79uxRrZszZw527NiBJUuW4KmnnkJxcTGmTp0Kr9errPnhhx+QnZ2N9u3bY+3atbj77ruxcuVKrF+/vrE/HhG1Ako7u0aEIUGval/vtLQAx8qdyN16GI8VHILV4VGqOldVm5Je25CgYLG0mTfEWqCqqvOB0RmqlnSvX0bRaTtmWdJC1h8orcA3x62wOz2qVnWPxwevww2bNbSFPZBtVs84qwYb+Rl+EhERERERUYgWXQF65MgRbNiwAfPnz8cjjzyiem3lypW47rrrMGfOHADAkCFDcPDgQTz//PNYt24dAGD37t0oLCxEfn4+LBYLACAtLQ2TJk3C5s2bMWnSJABAfn4+kpOT8cwzz0Cn02Ho0KE4c+YM1qxZg8mTJ0On0124D01EzVZtbe1GnYTcrUdUQ3vqW9UZqfISqAokd86yQBTOt5NHajNftuUQ/nnfkHqtvb1fZ+hFAXMs6upNp8uH+WN6AIBqffBk9nDt6pzWTkRERERERA2hRQegjz/+OO68806kpakri44fP46jR4/iwQcfVF2fNGkSli9fDrfbDZ1Oh+3bt8NkMmH48OHKmvT0dPTp0wfbt29XAtDt27dj/PjxqqBz0qRJWLt2LXbv3o3Bgwc34qckoqZUPdQMdy2atnaNKKB4YVbIUKLaqjqjaTMPnKcpCFBa6GsKJCO1mTtdvnqvtZ8LM70OdUu6TxJ5VicRERERERE1iRYbgH7yySc4ePAg8vLy8N///lf1WlFREQCEBKMZGRnweDw4fvw4MjIyUFRUhLS0tJBzPNPT05V72O12nDx5Eunp6SFrBEFAUVERA1CiViDaUFOGDIO+5vM7ww0rurxjYqNVdQYPCVqQlYmyoPM0IwWSACKenVnvtQit3vT5/IDDzbM6iYiIiIiI6IJrkQGow+FAbm4u5s6di4SEhJDXy8rKAAAmk0l1PfD7wOvl5eVITEwMeX9SUhK+++47AFVDksLdS6fTwWAwKPeqK42mxR/DGpYkiapfiWrSWPtFEKqCTVmWVWdGBl8XBAHQSIjTa2F1uGE26OB2e6HRSMjden4C+6CuZvzzviF4dltxjVPZgfBt7Q1b1Sli7og05GT1PBd2auFweeB3eeBwuqEXBDgqvIBc9VmDf85IkgAgdHhbuOsNsTaSWNeffx9/tlD0uF8oWtwrFAvuF4oW9wrFgvuFosW9UjctMgD961//itTUVPz6179u6kepF1EUkJwc39SP0ahMJkNTPwK1IA25XyrdXmhFEVanB+Y4LTx+P+J1mpDrPllG7pbDEULNg8r9csb2xNOfH1GFlOGCTiB8W3t9qjoXZmWizOlB0rnPEaeTEKeTAAAdEvUAAL1G32Dfu5aAP1soFtwvFC3uFYoF9wtFi3uFYsH9QtHiXolNiwtAf/rpJ6xfvx7PP/+8Up1pt9uVXysrK5GUlASgqnqzffv2ynvLy8sBQHndZDKhpKQk5GuUlZUpawIVooGvFeB2u+FwOJR1deH3yygvt9f5/c2ZJIkwmQwoL3fwfD+qVV33iyhGrt5cvvWIKtR89JpeyB7cVXX94+xB+OrYWTxeS/Vmu3gdxmW2w71v71F9/ViGFQHnqzoBYNWO6Ks67U43dIIAe4UXsgy4K12xfHtbFf5soVhwv1C0uFcoFtwvFC3uFYoF9wtFi3tFzWQyRFUN2+IC0B9//BEejwf33XdfyGu/+93v0K9fPzz99NMAqs4CDT67s6ioCFqtFl26dAFQdY7nzp07z7fBnlNcXIzMzEwAgNFoRKdOnZQzQYPXyLIccjZorLze1r1ZfT5/q/+M1HCi2S+BszoFAdAZdOfa1MOdvXm+etPq8KCL2YBlWw4pYWe7eB1GZ6Ri8obdqvuHCzVjDTojVXseKK3AqNVf4K3JV2HhuOpndTrh8/nhsbt5RmYU+LOFYsH9QtHiXqFYcL9QtLhXKBbcLxQt7pXYtLgDA/r06YNXX31V9VdOTg4AYOnSpXjkkUfQpUsXdO/eHZ988onqvZs2bcLQoUOVae4jR45EWVkZdu7cqawpLi7Gvn37MHLkSOXayJEj8dlnn8Hj8ajuZTKZMGDAgMb8uER0jiSJ0Bh0SDQb4NFK0Bh1yN16BI8VHFLCx0gt6YEKzlWFR5Vr0YSaNV0D1EFndTmb9uOB0RlYPD5TeV+g2rNnqhF2mwMajxc2qwNeh1v5L3dVw4P8DD+JiIiIiIiIGkiLqwA1mUwRp65fdtlluOyyywAAM2fOxLx589C1a1cMHjwYmzZtwt69e/H6668r6wcMGACLxYIFCxZg/vz50Ov1ePbZZ9GrVy9MmDBBWZednY2PP/4YDzzwAO666y4cPHgQ+fn5mDt3rhKmElHDCp7KLooCDAl6pdpTIwooXpgV1dmbka7HUr0Z6/md59vaBcyxdEdOVg+UObxKtafj3GR2IiIiIiIiImp8LS4Ajdb1118Ph8OBdevW4YUXXkBaWhpWrVoVUrG5YsUKLFu2DIsXL4bX64XFYsGiRYug0Zz/1nTr1g35+fnIzc3Ffffdh5SUFMyaNQtTpky50B+LqNWTJBGCTgNjnEYJE50en1LtCQCXd0yMqSU93PVYQ81lWw7hn/cNiSnotJ8LOr0OD9vaiYiIiIiIiJqIIPNP403G5/PjzJnKpn6MRqHRiEhOjsfZs5U8k4JqpdWKMJvjUV5uhzZOfa5nRqoRe+eNxsWPFijhZbt4HYoXZqHLY5+GhKAf3HM1/t+P1pBQ84N7rsaun6xKiAoAvTskYNv0YVi1o1h1jugsSxoeGpMBp8cHg16jhJpOlw8yZNU1u9ML2e2Fz+dXVa3yR2vj4M8WigX3C0WLe4Viwf1C0eJeoVhwv1C0uFfUUlLiW+cQJCJquaoHhIFqT0OcFqUVLhj0OlWlJwAYtBJKbK6ohgwBkVvSj1sdeGhsTwg4f73E5sLbe05g3qgMLMiqPpQocvVmpIrOqvM7GXwSERERERERNScMQImo0YVta3f5oNNKyN16qMZzPSO1teds2o9t04cBAFbtOBpVS7qr0oXZw0Ove+xuuPxy1KEmg04iIiIiIiKiloMBKBE1ikC1pyAAcfF6VVu72aDFtunD8N7ek7We6xmp2vNAaQVGrf4Cb02+CgvHRV+9CY8PNmf4Ck6GmkREREREREStDwNQIqqXSG3tgWpPo04KaWvXiALSU42qas9IlZ5AVbXn9unDwg4g6plqhN3mgEZG1C3prOAkIiIiIiIiajsYgBJRVGoLOmNpa++YqA+p9qzpXM8DpRV4a88JzB2RFtK+7jhX7UlEREREREREFA4DUCIKERx2iqJQa9AZa1t7rOd6zrKkIXtQFzgqXLDZ3WHb14mIiIiIiIiIwmEASkSK6lWdyQYtfDKQu+Uw8oJaz6sHnUBsbe11OdczuNKT7etEREREREREFC0GoERtRPUW9urXRFGAIUE9rOjj7EH46thZPF5L0AnE3taes2k/ds6yxHyuJxERERERERFRLBiAErVykc7qlCHDoA+65vGphhW1i9dhdEYqJm/YrbpfuKATiL2t/fZ+naEXBcyxdEdOVs//396dx0VV9X8A/wybgoi4ZYoWSIKACyiLJCKapKU9opZr4ApuoeaSSS65i5aaS0q4hEs9lZr+BNI0VNxzJ82NxV0UM/YZmBnu7w+fuXGZAWdwBMPP+/XypZ57uOfe4cudw3fOgmyFEjbVzZGvUHJdTyIiIiIiIiIyGiZAiaqYp43q9H7NFnvD2mHZoTRxWrtjXSskTQ6QjOo0NNFZnmnt+ZpEp1KF+rY1kJmZB5WSiU8iIiIiIiIiMh6Tyr4AIio/mUz2ZITn//42s7RATVtLKM1NUdPWEqaW5uKoTk2yclrnZvjyYArm7rsmllmamyI9p0CS0Cye6CyueKKzpGnxlzEpwBEzA53Er9Oa1q5UISdTDpW8UBzlqZnhzpnuRERERERERGRsHAFK9ALRtU6nrnJ9NivSNaqzXg0LdHGqh6E/nJe0q2tUZ3nX7/xnWvsbyJKrdG5iRERERERERERUUZgAJaokJaeql1ynM1+hgkylhmBmqrV+p4W5KRYduF7mZkW6RnWWNq29tGSnZv1OmQxiW09LdGqmtavkSpiZyLiJERERERERERFVKiZAiZ6z8ozetLU0x5yuzhju85pW+aExb2Jb0v2nblaka1Rnaet3Ak+SnYlj3pSM6kzPKcAP5+9hckdHRLyle/3O0hKdgiBArWbik4iIiIiIiIgqFxOgREb0tFGd+o7ezJQr0cTWEgsTrkvKzUxkaFrXSq/NinSN6ixrWvuVh7n47/l7+LiDg9aoTmV+IQqKBCY6iYiIiIiIiOhfhwlQIiPQd1SnvqM3S1urU1ey09BRnQsTrmNvWDut9TvH+TlguHcTyHMLkJNfqDPZyUQnEREREREREf3bMAFKVA4lR3paWlfDogPJZY7qNGT0Zmnlhm5WVNqoTkWB+qkbFTHZSURERERERERVgUllXwCRoWQy2ZMRlzJZhbdlamoCM0sL1LS1hNLcFDVtLWFqaY5FB1Iwd991ZMqV4qjOVUduSM71tNGbxZVWXjzZWdy0+MsI93PAjMBm4tfYWppjZqAThns3gVqhRE6mHGZKFXIy5SjML4Ayv1BSppIXcpd2IiIiIiIiIqpyOAKU/jVKTjPX7JQuFKrEkZiajYY0Sm5AVFpZyXJ91+90rGuFpMkBeo3qNGT0pqY83M9enC6vMS3+Mo6P89N7s6KyRnVy/U4iIiIiIiIiquqYAKUK9SwJyZLTzDU7pQ/2bAwLM1NkygvFpKhMpYZgZqqVwBQgwLKaNIFasq6+63cCgKW5KdJzCvRak7O0ZOe0+Ms4NOZNyGSQ3NvtTDk+7dwMMkjX6uzbuhGqmch0TmEva7MiIiIiIiIiIqKXEROgVCF0jd7UNyFpa2kOhVItTjPXeLVmNfRzb4Qlh1KwqkRSdLjPa5IEpvdrttgb1g7LDqVJkpq66uq7fidg+Jqcho7eLMgrwPj22onO/P+N6lTJldysiIiIiIiIiIioDEyA0nOhWZ5TJnuS/Cw5etOQhKSuaeYAsPBdF6w8kiZJMmbKlWhia4mFCdclCcxpnZvhy4MpT61b2q7spU1rN3RUZ7lGbyrVyFHoTnRyCjsRERERERERUdmYACWj0oz0tKxujoe5BahlbQmFUqU1elPfhCSge5p5vRoW6OJUD0N/OC9pX1e5IXUNWb9TvJf4y0gc86beozrLM3qTiU4iIiIiIiIiovJhApSMRtdIT12jNw1JSAK6k4+lJSp1lRtS19D1OwHgysNc/Pf8PXzcwcGgNTmZ1CQiIiIiIiIiev5MKvsCqOqQWZhh0YFkzN13XUwe6hq9aUhCEpAmHzWKJyqL01VuSF1dbWlMi7+MSQGOmBnoJH6NraU5ZgY6Ybh3E6gVSuRkymGmVCEnUw6VvBBqddH/Ep1F3JCIiIiIiIiIiKgSMAFKRiGTyWBV3Qwrj9yQlD9rQlJjWvxljPNzEJOPj/IKcTDlL4T72UvqaRKYxctLS2rqqqtpK9zPATMCm0kSncXX77w/KxDps97G/VmBGN/eHvL/TWtnspOIiIiIiIiI6MXCKfBkFCYmMmTKlXptElTadPLiCcni64UCuqeZ21qaofMb9SDDP2tv2lqa43amHJ92biYpX5hwHXvD2knW6Syt7rOs30lERERERERERC8WmcDsTaVRq4vw+HFeZV+GUchkMtS0tUTD2fu0kqDNX7FG4pg3seroDTHJ+M8u8KmShKRmF/jIhGRJ+Tg/B0zt5Ah5bgGKigSYmMjEv2UWZrCqbiZZe1OmUkMwM5WUKwrUECDAstrT6+YrVBAKVZK2+KPy/JiZmaB27Rr4++88qFRFlX059AJjrJAhGC+kL8YKGYLxQvpirJAhGC+kL8aKVJ06NWBq+vQJ7kyAVqKqlAAFADNLC3x19Abm7LumdWxFUAsEt7FDdQvTZ0pIqtW6f7hlMpnORKWuckPqUsXgA5z0xVghQzBeSF+MFTIE44X0xVghQzBeSF+MFSl9E6CcAk9GIxSqMLWTIwBojd4c7t0E8twC5OQXak0d1zmdXKlGjkL/aeal7aiuq9yQukRERERERERE9O/GBCgZjVpdBHluAca3t8e0t5ohW6GETXVz5CuU4iZBT+oxIUlERERERERERBWDCVAyKrW6CJAXQq5Sob5tDWRm5kGl5JBsIiIiIiIiIiKqHE+fJE9UDpoZ61xKk4iIiIiIiIiIKhMToERERERERERERFRlMQFKREREREREREREVRYToERERERERERERFRlMQFKREREREREREREVRYToERERERERERERFRlMQFKREREREREREREVZZMEAShsi/iZSUIAoqKqu7Lb2pqArW6qLIvg/4lGC+kL8YKGYLxQvpirJAhGC+kL8YKGYLxQvpirPzDxEQGmUz21HpMgBIREREREREREVGVxSnwREREREREREREVGUxAUpERERERERERERVFhOgREREREREREREVGUxAUpERERERERERERVFhOgREREREREREREVGUxAUpERERERERERERVFhOgREREREREREREVGUxAUpERERERERERERVFhOgREREREREREREVGUxAUpERERERERERERVFhOgREREREREREREVGUxAUpERERERERERERVFhOgL5GbN29i5syZ6NmzJ1xdXdGjRw+tOoWFhViyZAn8/PzQqlUrvP/++zh+/LhWPWdnZ60/7du316p34MAB9OrVCy1atEDHjh2xYsUKqNVqrXoKhQJff/013n33XbRs2RI+Pj4YPXo0kpKStOoePXoUkyZNQpcuXeDs7Iw5c+aU8xWh0hgzVgAgJSUFY8eOhZeXF9zd3REUFISjR4+Kx0+ePKkzpj7++GOtc+kbK2q1GtHR0Rg0aBB8fHzg7e2N4OBgnD59+hlfHSrJWPGyY8cOnXHg7OyM4cOHG9SeRlZWFiIjI9GlSxe0aNECb775JiZNmoS0tDStupGRkejevTs8PDzQpk0b9OnTB3Fxcc/wylBJFR0rwcHBpdYr+b01JFaK279/P5ydncuMQyofY74X3b17FxMnToSfnx88PDzQp08f7N27t9S28/Ly4O/vD2dnZ/zxxx9axw2Jl/j4eISHh4vnW79+vYGvBD2NMWPlwYMHmDBhAtq2bQsPDw+MGjUKt2/f1lkvPDwcHh4e8Pb2xmeffYbc3Fyteuy3vHgqI14A4ODBg+jfvz/c3d3h5eWF4OBgpKenS+qo1Wps2bIFvXr1QuvWreHp6YkhQ4bgyJEjWuf7+eef0bdvX3h7e6Nly5bo2rUrVq9ejcLCwnK+MlTSL7/8gtGjR8Pf3x/u7u7o2bMntm3bBkEQJPV++ukndO3aFS1btsR//vMfHDhwQOtcOTk5iIiIgLe3Nzw8PDBu3Dg8fPhQUue///0vhg0bhvbt26NNmzbo27cv9u/fr/PaHjx4gJkzZ6Jjx47i79ozZ87EgwcPyrynb7/9Fs7Ozhg5cqSBrwaVpaJjBQC2b9+Obt26oUWLFggMDMTmzZt1Xhv7LP8wq+wLoIpz/fp1HDp0CK1bt0ZRUZHWDyMALFiwALt27cKECRPg4OCAHTt2IDQ0FD/88APc3NwkdYODgyUdBnNzc8nx8+fPY8yYMejevTsmTpyI5ORkLF++HHK5HFOnThXr5efnY8iQIbh+/TpGjBgBT09PZGZmYsuWLRg4cCCWLVuGwMBAsf7hw4dx5coVeHl5ISsry1gvDxVjzFi5fv06BgwYAD8/PyxZsgTm5ua4dOkS5HK51jkXLlyIpk2biv+vXbu25LghsaJQKPDNN9+gV69eCA0NhYmJCX788UeEhIRg/fr18PX1NdbL9dIzVrwEBATghx9+kHzdjRs3MHXqVPj7+xvUHgBkZGTgww8/RFZWFkaNGgVXV1ekp6djw4YN6NOnD9avXw8PDw+xfl5eHj744AM0bdoUMpkMe/fuxcSJE1FUVIT33nvPGC/VS6+iY2XWrFlaCYmYmBj8+uuvkmeAobGioVAosGDBAtSrV++ZXhfSzVjxUlhYiBEjRgAAIiIiUKtWLezatQvjx49HdHQ0OnTooHXer7/+WucHtoDh8bJnzx7cvn1bZ9yScRgrVtRqNUaMGAG5XI65c+fCwsICq1atwuDBg7F7927UqFEDAKBUKsWY+vLLL6FQKBAZGYlJkyYhKipKbJP9lhdTRccLAOzatQufffYZhg0bhgkTJiAvLw+nT59GQUGBWKeoqAjh4eFITExEcHAwPvnkE8jlcvz8888YPnw4ZsyYgQ8//FCsn5WVhQ4dOiAsLAzW1tZISkrCqlWrkJ6ejrlz5z7HV/Dl8e2338LOzg6ffvopateujWPHjmHGjBlIT0/HRx99BACIi4vDjBkzMGrUKLRr1w7x8fH46KOPsHXrVri7u4vnmjBhApKTk/H555+jWrVqWL58OUJDQ7F9+3aYmT1Jy6xduxZ+fn4YMGAArKyssGfPHowdOxaLFi1Cr169xHOlpKQgODgY1atXx9ixY2Fvb4+bN29i7dq1SEhIwJYtW2Bvb691PxkZGVi9ejXq1q37XF+3l1FFx0p8fDwiIiIQEhKCgIAAnD59GgsXLoRMJpM8J9hnKUGgl4ZarRb/PXXqVKF79+6S4+np6YKLi4uwadMmsayoqEjo0aOHMGrUKEldJycnYd26dWW2N2zYMKFXr16SsvXr1wtubm5CRkaGWLZgwQLByclJOH78uKSuSqUSQkJChLZt2wqPHj3SeR+dOnUSZs+eXeZ1kOGMGSsDBgwQxo8fX2Z7J06cEJycnISkpKQy6xkSKyqVSsjMzNSq161bN2HkyJFltkOGMWa8lLRixQrBxcVFePjwod7taYwdO1Zo0aKFkJycLCnPy8sT3nnnHaFTp05CQUFBme3369dPGDp0aJl1SH8VHSu6dO7cWQgNDZWUlTdWli9fLgwaNKjMOKTyM1a8nDt3TnBychJOnDghOXenTp2EadOmabWbnJwsuLu7C99//73O9yZD46X4fejTfyLDGStWYmNjBScnJ+Hy5cuSr23RooWwceNGsWz37t2Cs7OzkJKSIpYdPnxYcHJyEi5cuCCWsd/yYqroePn777+FNm3aCFu3bi3zumJiYgQnJydh+/btWsc++eQTwdXVVbh69WqZ51i6dKnQqlUrQaVSlVmP9PPXX39plU2fPl1o06aNGEdvv/22MHHiREmdfv36CSNGjBD/f/bsWcHJyUk4fPiwWJaSkiI4OzsLcXFxZbY3dOhQoUePHpKyXr16Cb6+vpLfqQVBEDIyMgRfX1+hb9++Ou9nypQpwieffCJ8+OGHQlhYWGm3TeVQ0bHStWtX4aOPPpKca86cOYK3t7dQWFgolrHPIsUp8C8RE5Oyv91XrlyBWq2WTGWXyWTw8/PDkSNHDJ5OcfnyZa1p8X5+flAqleI0DoVCgR9//BHt27dHu3btJHVNTU0xbtw45OTkYNu2bXrfBz07Y8VKSkoKzpw5g+Dg4Ge+JkNjxdTUFLVq1dKq5+zsrHMKAZXf83y2xMbGol27dqhfv77e7QFPprvu378fQUFBcHR0lByzsrLCqFGjcPfuXfz6669lnsfW1hZKpfKp7ZF+KjpWSjp79izu3LkjGdFb3li5desWNm7ciOnTp5d5T1R+xooXlUoFAKhZs6bk3DVq1NA58mvevHno378/HBwctI6VJ17Yb3n+jBUrf/75J+rXr4/mzZuL9Ro0aIBmzZohISFBLEtMTISzs7Nk1kr79u1ha2uLQ4cOAWC/5UVW0fHyyy+/oKioCO+//36Z7cbExMDBwQFBQUFax8aNGwcA2LJlS5nnsLW1hUqlQlFRUZn1SD916tTRKnNxcUFubi7y8/Nx+/Zt3LhxA++8846kzrvvvovjx4+LsZKYmAgbGxtJTDVt2hQuLi5ITEx8anvFnwGnTp3CpUuXEBISojUDpV69eggODsb58+dx7tw5ybHTp09j//79mDRpkgGvAOmrImNFLpfjxo0bOnMtmZmZOH/+PAD2WXSp2ndHBtH80FlYWEjKLSwsUFhYiDt37kjKv/nmG7i5ucHT0xMTJkzAvXv3JMcLCgp0ngt4khgDgIsXLyI/Px+dOnXSeU1t27aFra0tfv/99/LfGBmdvrFy4cIFAE+mgPXq1Quurq4ICAgodS2RsLAwuLi4wN/fH5GRkVAoFOIxY8SKSqXChQsXJL+w0PNn6LNF448//sCNGzfKtbbiqVOnIAhCqfHSuXNnANCKF0EQoFKpkJ2djZ07d+Lo0aMYNGiQwe1T+TzvWImNjYWVlRXeeustsay8sTJ//nz07NlT8osvVSx948Xd3R3NmjXDsmXLcPv2bWRnZ2Pz5s24ceMG+vbtK/naPXv24Nq1axg7dqzONssbL1S59I0VXX1XTb3U1FTx/6mpqVp9CZlMBgcHB7Ee+y3/XsaOlwsXLsDBwQE7d+5Ep06d4Orqip49e4rJcgC4f/8+7ty5g4CAAJ0JCDs7Ozg7O+PkyZNax1QqFeRyOU6fPo2YmBgMGDBAa2kyMp4zZ86gQYMGsLa2Fr/PJT8wc3R0hFKpFNeDTU1NhYODA2QymaRe06ZNJbFSWnvFnwGaZ4Yh70NqtRpz587FqFGj8Morr+hzm2QEzytWCgsLIQjCU3Mt7LNo4xqgJHr99dcBAElJSWjcuLFYrvkEofh6m0FBQQgICEC9evVw7do1rFmzBgMHDsSuXbvET69ff/11rQXeS55Ls0hzw4YNS72uhg0bai0QTpVL31h59OgRAGDy5MkYMmQIpk6diiNHjmDJkiWoUaMG+vfvD+DJqJwRI0bAy8sL1apVw4kTJ7BhwwakpqaKa2kZI1bWrVuHBw8eYMiQIeW7cSoXQ54txcXGxqJatWp4++23DW5T80l5o0aNdB63traGjY2NVrwcP34cQ4cOBQCYmZlhxowZ6Natm8HtU/k8z1hRqVT45Zdf0LlzZ1hZWYnl5YmVhIQEnDt3Dnv27NHvxui50DdezMzMEBMTg9GjR6NLly4AgOrVq2PZsmWSda/kcjkWLVqEjz/+GNbW1jrbLO+zhSqXvrFib2+P9PR0PHjwAA0aNADwZH3o5ORkyYey2dnZkhHFGrVq1TJqH5f9lsph7HjJyMhAWloavvrqK0yZMgX169fH1q1bMWbMGOzcuRPNmjXTO16KbyIKPHlvK772fq9evRAREfEMd09lOX36NOLj48X9LDSxYGNjI6mn+b/meFnPjIsXL5ba3u7du3Hu3DmsXr1aLNPESmnvQ5ry4s+W7777DnK5nM+SCvQ8Y6VWrVqwtbVFUlISevfuLdYp+Yxin0UbE6AkcnJygqenJ7744gs0bNgQ9vb22LFjB06dOgUAkk8hIiMjxX97eXmhbdu26N27N3788UeEhoYCAAYOHIjPPvsMMTEx6Nmzp7gJkqmpqcHXVvITEKpc+saKZvpNUFAQRo8eDQBo164d0tPTsXbtWjEB6urqCldXV/H8vr6+eOWVVzBnzhwkJSWhVatWel9babFy9OhRrFy5EmPGjEGLFi0Mv2kqN0OeLRpFRUWIi4tDQEBAqYkIYyjZdqtWrbBt2zbk5uYiMTER8+bNg6mpKT744IPndg30j+cZK0ePHsXjx4/LvVu7pu2CggIsWLAA4eHhOqc7UcXRN14UCgXGjRsHQRCwevVq1KhRA3v27MGkSZMQHR0Nb29vAMCaNWtQt25d9OnT55mvjf2WF4u+sdKjRw989dVXiIiIwOeffw5zc3NERkYiPz+/XP1XfbDf8uIxdrwIgoD8/Hx88cUX4gwEb29vdO3aFdHR0Vi8eLHe11YyXszMzLBt2zYUFBTg4sWLWLNmDaZNmyb5XY2MIz09HR9//DF8fHwQEhLy3Nu7cuUKZs2ahd69e4sf3hlCEyt//fUXVqxYgcjISJ0jlsn4KiJWBg4ciPXr16Nt27bw9/fH2bNnsWnTJgCG90Fepj4Lp8CTxKJFi1C7dm30798f7dq1Ez+dBFDmumrNmzeHg4MDLl26JJb17t0bgwcPxuLFi+Hj44MhQ4agf//+qFWrljj0XvNp6f3790s99/3798V69OLQJ1Y0n2iVXPvK19cX9+/f19qduTjN+iiaT7qeJVYuXbqE8PBw9OjRQ9yFjyqWoc+WkydPIiMjo9y7r2vioOTSHBq5ubnIzs7Wihdra2u0bNkSvr6+mDp1KgYOHIhFixaVuhs0Gd/zipXY2FjY2trCz89PUm5orMTExMDExATdu3dHdnY2srOzoVQqUVRUhOzsbIPXy6Zno0+8bNu2DUlJSYiKikKXLl3g6+uL2bNno02bNli6dCmAJ+tkbdiwQVyXMTs7G/n5+QCeLOOSl5cHoPzPFqp8+sSKra0tli5dimvXrqFLly7o2LEjMjIyEBQUJHn+2NjY6OzDZGVliTOh2G/5dzN2vADS/rC5uTm8vLyQnJwMAHj11VcBPD1edE1fbtmyJTw9PTFkyBDMnz8fO3fuxB9//PGMrwAVl52djdDQUNja2mLlypXiMgWan/ecnByt+sWP6/PMKO7u3bsIDQ1Fq1atMGfOHMkxTayU9j6kKdc8W7766is4OzvD09NT7LeoVCpxySfNOtlkHBUVKyNHjkRgYCCmTJkCb29vTJw4UVwrWPP8YZ9FGxOgJNGkSRNs374dv/32G+Li4rBv3z5Ur14d9evXh52dnUHnMjExQUREBE6cOIFdu3bh2LFj6Nu3Lx4/fozWrVsDAFq0aAErKyscPHhQ5znOnTuHzMxMeHp6PuutkZHpEyvNmjUr8xyGJArKGys3b95EaGgoPDw8MG/ePL3bI+My9Nmye/du2NjYoGPHjuVqz9PTEzKZrNR40ZQ/7dni5uaG3NxcPH78uFzXQYZ7HrGiUCiwf/9+dOvWTWtdNENjJTU1FTdv3oSvry+8vLzg5eWF2NhYpKSkwMvLC9u3by/fjVO56BMvycnJaNCggdaIXRcXF9y6dQsAcOfOHSiVSoSFhYnf11GjRgEAQkJCxKUxjPVsoYqn77OlQ4cOOHjwIOLj47F//35s2bIFjx49gru7u1hH17p9giAgLS1NXKuP/ZZ/N2PGyxtvvFFqOwUFBQCeJLWaNGmCxMREnZuz3bt3D1evXoWXl1eZ160ZLax5ttGzUygUGDlyJHJycrBu3TrJ9GTNz3vJ50FqairMzc3RpEkTsV5aWprW97b4M0Pj8ePHGD58OOrWrYtVq1Zp9Vs0sxaKryFbXMn3obS0NJw6dUp8b/Py8sLZs2dx5MgReHl54dixY4a8HFSGioyV6tWr48svv8SxY8fwf//3fzh69ChatmwJAGKuhX0WbUyAkk6NGzfGG2+8AaVSiW3btj11+ufly5eRlpYm/tAVV7NmTTRv3hw2NjbYvHkzGjdujDfffBPAkx/cfv364ciRI+K0Eo2ioiKsWLEC1atX17kbIr0YyooVd3d32Nraar2xHjt2DI0aNSpz+mhcXBwAiDFVnlh5+PAhhg0bhoYNG2LFihVcEP4FoM+zpbCwEPv27UNgYGC5p+rY2dkhMDAQO3fuRFpamuSYXC7H2rVrUadOHQQGBpZ5njNnzsDa2hq1a9cu13VQ+RkzVhISEpCfn69zlKihsRIaGopNmzZJ/vj5+cHOzg6bNm0SF5SnilVWvDRq1Ajp6elaH2RcunRJTGS4uLhofV+nTZsGAJg9ezZmzZoFwHjPFqo8+jxbTE1N4ejoiCZNmiAlJQXHjh2T1PP398eVK1dw48YNsez48ePIzMwUP4xhv6VqMEa8aDYgOX78uFhWWFiIU6dOSdbvHDx4MFJSUrBr1y6tNlauXAm1Wq21cVtJZ86cAQAxmULPRqVSYcKECUhNTcW6deu0Rso1adIE9vb2WmuCx8fHw9fXV+yb+Pv7IysrSxIDaWlp+PPPP+Hv7y+W5eXlITQ0FEqlEt98843OpX08PT3h5uaGmJgYrfe1x48fY9OmTXByckLbtm0BABEREVrvb82bN4e7uzs2bdpk0FJjVLqKjhWNOnXqwNnZGVZWVti6dSs8PT3FRCn7LNq4BuhLRC6Xi58U3b17F7m5ueIPoLe3N+rUqYMtW7bA2toaDRs2xN27d7Fx40ZUq1ZNXNcTANavX49bt27Bx8cHderUwfXr17F27Vq8+uqrkjf7pKQk/P7773BxcYFCoUBCQgJ27dqF6Ohoybo448aNw9mzZxEWFobQ0FB4enoiMzMTW7duxcmTJzF//nzJguB3794Vp3XI5XLcunVLvA9uVmIcxooVc3NzhIeHY+HChahVqxbatGmDw4cPIy4uDnPnzhXrTZ48Ga+//jpcXV3FTZC+/fZbdOnSRZJUNyRWFAoFQkND8ffff+Ozzz7D9evXxfNYWFhI1hylZ2OseNE4dOgQsrOzS53SrE97ADBz5kwMGjQIgwYNwsiRI+Hq6ooHDx5gw4YNSEtLQ3R0NCwtLQE8WWfpiy++QLdu3WBnZ4f8/HwcPHgQP/30EyZOnAgzM75dGkNFx4rG7t270ahRI/GXgZIMiRVHR0c4OjpKvv7nn3/GgwcP4OPjY/BrQqUzVry89957iIqKQmhoKMLCwsQ1QE+cOCGuvWdjY1Pq98/NzU2SpDAkXoAnI1A101wB4Nq1a9izZw8sLS3LPcqdpIz5bFmyZAnc3d1hbW2Nq1evYs2aNQgKCoKvr69Yp2vXroiKikJ4eDgmTpwIuVyOxYsXIyAgQJJMYL/lxVTR8eLm5oauXbtixowZyMzMRP369fHdd9/h0aNHGD58uFhv0KBBOH78OKZPn46rV6/C398fBQUF2LFjB/bu3Yvw8HBJfA0aNAiBgYFo2rQpTExMcOHCBWzYsAEdOnRgUstIZs+ejQMHDuDTTz9Fbm6uuMkM8GQPAwsLC4SHh2Py5Ml47bXX4OPjg/j4eCQlJWHLli1iXQ8PD/j5+SEiIgJTp05FtWrVsGzZMjg7O0s2cAwPD8eVK1cwf/583Lt3TzJ1ufio4iVLliA4OBh9+/bFyJEjYW9vj5s3byIqKgr5+fnYuHGjWNfFxUXrvmxsbGBlZcV+ixFVdKwcOnQIt27dwhtvvIGsrCzs3r0bJ0+exPfffy+5LvZZpGSCrjH2VCXduXNHXHi7pE2bNsHHxwcbNmzAd999h/T0dNja2uLtt9/G+PHjJetNJCQkICoqCmlpacjLy0Pt2rXh7++PCRMmSNaluXz5MmbNmiV24Fq3bo3x48dLdlzVUCgUWL9+PWJjY3H79m0olUpUq1YNUVFRkg4EAOzYsUMclVHS1atXDX5dSJuxYkVj8+bNiImJQXp6Ouzs7DBixAhJsjwqKgq7d+/G3bt3oVQqYWdnh/feew9hYWFao7r0jZWy7sHOzg4JCQnleWlIB2PHi+YXxsTERHHdHEPb08jKysLatWvx66+/Ij09HSqVCnXr1sW6deskv0w+evQICxYswPnz55GRkYGaNWuiadOmGDJkSLkWnifdKjpWgCcx0L59ewwePBhTpkwp9dr0jRVdPv30U1y8eBGxsbFl1iPDGDNeLl26hOXLl+PSpUtQKBSwt7fH4MGD0bNnz1LbP3nyJEJCQrBt2zatGS6GxMvKlSuxatUqrfPzvch4jBkrkyZNwokTJ5CVlYXGjRujX79+CAkJ0doE6cGDB5g3bx6OHDkCMzMzBAYGIiIiQmvEFvstL57KiJf8/HwsXboUcXFxyM3NhZubG6ZMmaL1wZxKpcL333+P7du3IzU1VZwiHxkZqTUjbuHChTh8+DDu3bsHMzMzNG7cGEFBQRg4cCA3uzGSzp074+7duzqP/fbbb2jcuDEA4KeffkJ0dDTu3bsHBwcHTJw4URz5q5GTk4OFCxdi3759UKlU8PPzw/Tp0yUjBZ2dnUu9lpK/56anp2P16tU4dOgQMjIyUFRUBHt7e0RHR+O1114r876Cg4NhZWWFqKioMuuR/io6Vo4ePYrFixfj5s2bMDMzg7e3NyZNmqT1IT3APktxTIDSC+nUqVMYOnQoRo8ejbFjx1b25dALjLFChoiNjcXkyZOxaNEiLq1BZWKskCEYL6Qv9lvIECkpKejbty+6deuG+fPnV/bl0AssKioKK1euRHR0tNYAIqLiXuY+C9cApReSl5cXpk2bhhUrVmDnzp2VfTn0AmOskCF69OiBoUOHYvr06ZK1dYhKYqyQIRgvpC/2W8gQjo6OWLJkCbZv346vv/66si+HXmAjR47EW2+9hfDwcMkSGkQlvcx9Fo4AJSIiIiIiIiIioiqLI0CJiIiIiIiIiIioymIClIiIiIiIiIiIiKosJkCJiIiIiIiIiIioymIClIiIiIiIiIiIiKosJkCJiIiIiIiIiIioymIClIiIiIiIiIiIiKosJkCJiIiIiIiIiIioymIClIiIiIiIiIiIiKosJkCJiIiIiIiIiIioymIClIiIiIiIiIiIiKqs/wePhIyV9fbicgAAAABJRU5ErkJggg==", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "y = df['realgdp']\n", + "y_train, y_test = temporal_train_test_split(y, test_size=30)\n", + "\n", + "forecaster = AutoARIMA(sp=4)\n", + "forecaster.fit(y_train)\n", + "y_pred = forecaster.predict(fh=fh_abs)\n", + "\n", + "print(f\"MASE: {round(mean_absolute_scaled_error(y_true=y_test, y_pred=y_pred, y_train=y_train), 3)}\")\n", + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred\"]);" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "196a3a90", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 365 + }, + "id": "196a3a90", + "outputId": "31da0272-9c99-48ed-cfa7-49b4a10e32d9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MASE: 5.496\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "y = df['realgdp']\n", + "y_train, y_test = temporal_train_test_split(y, test_size=30)\n", + "\n", + "forecaster = DirectTabularRegressionForecaster(LinearRegression())\n", + "forecaster.fit(y_train, fh=fh_abs) #forecasting horizon necessary in the fit() method for this forecaster\n", + "y_pred = forecaster.predict()\n", + "\n", + "print(f\"MASE: {round(mean_absolute_scaled_error(y_true=y_test, y_pred=y_pred, y_train=y_train), 3)}\")\n", + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred\"]);" + ] + }, + { + "cell_type": "markdown", + "id": "c6a0a392", + "metadata": { + "id": "c6a0a392" + }, + "source": [ + "### Multivariate exogenous time series\n", + "Forecasting with exogenous variables (i.e. multivariate X)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "e1f3f9a1", + "metadata": { + "id": "e1f3f9a1" + }, + "outputs": [], + "source": [ + "#creating lagged values to avoid data leakage\n", + "df.loc[:, 'realinv_lagged'] = df.loc[:, 'realinv'].shift()\n", + "df[['realinv_lagged']] = df[['realinv_lagged']].fillna(method='backfill')" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "f6fd09dd", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 365 + }, + "id": "f6fd09dd", + "outputId": "0b48e343-f29d-45ea-bb6f-c3f2960569a4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MASE: 6.708\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "y = df['realgdp']\n", + "X = df[['realinv_lagged']] #could be more features\n", + "y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, fh=fh_abs)\n", + "\n", + "forecaster = AutoARIMA(sp=4)\n", + "forecaster.fit(y_train, X_train)\n", + "y_pred = forecaster.predict(X=X_test, fh=fh_abs)\n", + "\n", + "print(f\"MASE: {round(mean_absolute_scaled_error(y_true=y_test, y_pred=y_pred, y_train=y_train), 3)}\")\n", + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred\"]);" + ] + }, + { + "cell_type": "markdown", + "id": "006a6c6c", + "metadata": { + "id": "006a6c6c" + }, + "source": [ + "### Updating a forecaster" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "e43675b4", + "metadata": { + "id": "e43675b4" + }, + "outputs": [], + "source": [ + "y_old_data = y_train[:-4]\n", + "forecaster = AutoARIMA(sp=4)\n", + "forecaster.fit(y_old_data)\n", + "y_old_pred = forecaster.predict(fh_rel)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "2443261e", + "metadata": { + "id": "2443261e" + }, + "outputs": [], + "source": [ + "#updating the model with new data (full year) and generating predictions for the next period\n", + "y_new_data = y_train[-4:]\n", + "forecaster.update(y_new_data)\n", + "y_new_pred = forecaster.predict(fh_rel)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "93f0812b", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 346 + }, + "id": "93f0812b", + "outputId": "903e5276-4226-4259-caca-5673836b059c" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_series(y_train, y_old_pred, y_new_pred,\n", + " labels=[\"y_train\", \"y_old_pred\", 'y_new_pred']);" + ] + }, + { + "cell_type": "markdown", + "id": "3a0d9d7d", + "metadata": { + "id": "3a0d9d7d" + }, + "source": [ + "## 2. Reducing a forecasting task to a regression problem" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "d0c35fb5", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 365 + }, + "id": "d0c35fb5", + "outputId": "ccf76d17-c15e-4af6-f5a9-cba35cdbb4ca" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MASE: 4.104\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "regressor = LinearRegression()\n", + "forecaster = make_reduction(estimator=regressor, strategy='recursive', window_length=4)\n", + "forecaster.fit(y_train)\n", + "y_pred = forecaster.predict(fh_abs)\n", + "\n", + "print(f\"MASE: {round(mean_absolute_scaled_error(y_true=y_test, y_pred=y_pred, y_train=y_train), 3)}\")\n", + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred\"]);" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "a50fc8aa", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 365 + }, + "id": "a50fc8aa", + "outputId": "7200c19f-85d0-4252-90bf-2792546a3f36" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MASE: 4.984\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#direct reduction strategy\n", + "regressor = LinearRegression()\n", + "forecaster = DirectTabularRegressionForecaster(estimator=regressor, window_length=4)\n", + "forecaster.fit(y=y_train, fh=fh_abs)\n", + "y_pred = forecaster.predict()\n", + "\n", + "print(f\"MASE: {round(mean_absolute_scaled_error(y_true=y_test, y_pred=y_pred, y_train=y_train), 3)}\")\n", + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred\"]);" + ] + }, + { + "cell_type": "markdown", + "id": "64ad7c5a", + "metadata": { + "id": "64ad7c5a" + }, + "source": [ + "## 3. Evaluation of forecasting models" + ] + }, + { + "cell_type": "markdown", + "id": "718f8427", + "metadata": { + "id": "718f8427" + }, + "source": [ + "### Performance metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "34cb830f", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 346 + }, + "id": "34cb830f", + "outputId": "8a07532e-bf01-4af5-c08b-02476922ec44" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "forecaster = ThetaForecaster()\n", + "forecaster.fit(y_train)\n", + "y_pred = forecaster.predict(fh=fh_abs)\n", + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred\"]);" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "80e7ce1d", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "80e7ce1d", + "outputId": "b392187a-6656-448f-a567-e9b71d33beed" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.06012960019008075" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#MdAPE function\n", + "median_absolute_percentage_error(y_test, y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "73ab70a0", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "73ab70a0", + "outputId": "cb44e270-14d8-46f5-f72b-4405cc7f905c", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.06012960019008075" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#MdAPE class\n", + "mdape = MedianAbsolutePercentageError()\n", + "mdape(y_true=y_test, y_pred=y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "64b12088-669f-44f6-9b16-9d75954e2054", + "metadata": { + "id": "64b12088-669f-44f6-9b16-9d75954e2054" + }, + "outputs": [], + "source": [ + "def custom_mape(y_true: np.array, y_pred: np.array, multioutput: str):\n", + " metrics_dict = {'uniform_average': np.mean(np.abs((y_true - np.ceil(y_pred)) / y_true)),\n", + " 'raw_values': np.abs((y_true - np.ceil(y_pred)) / y_true)}\n", + " try:\n", + " return metrics_dict[multioutput]\n", + " except KeyError:\n", + " print(\"multioutput not specified correctly - pick `raw_values` or `uniform_average`\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "51724bfe", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "51724bfe", + "outputId": "c4df6ca0-e400-4be1-a20a-14b7a140109d", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.05751249071487726" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mape_changed = make_forecasting_scorer(func=custom_mape, multioutput = 'uniform_average')\n", + "mape_changed(y_test, y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "8108bc34", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8108bc34", + "outputId": "919734a2-4414-4775-eef0-22b03d64a066" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.00102004, 0.00391758, 0.002054 , 0.00402024, 0.00977161,\n", + " 0.02408462, 0.03079155, 0.03550762, 0.04034922, 0.04539615,\n", + " 0.05159286, 0.0590479 , 0.06113469, 0.0663205 , 0.06915992,\n", + " 0.07935452, 0.08080733, 0.07912407, 0.08396799, 0.08487858,\n", + " 0.09019835, 0.09639655, 0.09933719, 0.0958987 , 0.09729706,\n", + " 0.08935353, 0.07486882, 0.05743802, 0.05383124, 0.05845428])" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mape_changed = make_forecasting_scorer(func=custom_mape, multioutput = 'raw_values')\n", + "mape_changed(y_test, y_pred)" + ] + }, + { + "cell_type": "markdown", + "id": "3114fb2a", + "metadata": { + "id": "3114fb2a" + }, + "source": [ + "### Time-based cross-validation" + ] + }, + { + "cell_type": "markdown", + "id": "2e5ed2da", + "metadata": { + "id": "2e5ed2da" + }, + "source": [ + "- **Expanding Window**" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "a05ce4c6", + "metadata": { + "id": "a05ce4c6" + }, + "outputs": [], + "source": [ + "forecaster = AutoARIMA(sp=4)\n", + "cv = ExpandingWindowSplitter(step_length=30, fh=list(range(1,31)), initial_window=40)\n", + "cv_df = evaluate(forecaster=forecaster, y=y, cv=cv, strategy=\"update\", return_data=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "02aae2ca", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 307 + }, + "id": "02aae2ca", + "outputId": "85ea11c2-7b68-4daf-e5b2-dc5d86e2634a", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " <div id=\"df-820beac7-72a3-4b11-b33b-7b9555a3f8fd\" class=\"colab-df-container\">\n", + " <div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>test_MeanAbsolutePercentageError</th>\n", + " <th>fit_time</th>\n", + " <th>pred_time</th>\n", + " <th>len_train_window</th>\n", + " <th>cutoff</th>\n", + " <th>y_train</th>\n", + " <th>y_test</th>\n", + " <th>y_pred</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0.027279</td>\n", + " <td>2.230224</td>\n", + " <td>0.024685</td>\n", + " <td>40</td>\n", + " <td>1968Q4</td>\n", + " <td>date\n", + "1959Q1 2710.349\n", + "1959Q2 2778.801\n", + "195...</td>\n", + " <td>date\n", + "1969Q1 4244.100\n", + "1969Q2 4256.460\n", + "196...</td>\n", + " <td>1969Q1 4211.384583\n", + "1969Q2 4248.007673\n", + "19...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0.022458</td>\n", + " <td>4.483943</td>\n", + " <td>0.045663</td>\n", + " <td>70</td>\n", + " <td>1976Q2</td>\n", + " <td>date\n", + "1959Q1 2710.349\n", + "1959Q2 2778.801\n", + "195...</td>\n", + " <td>date\n", + "1976Q3 5154.072\n", + "1976Q4 5191.499\n", + "197...</td>\n", + " <td>1976Q3 5183.195992\n", + "1976Q4 5222.693348\n", + "19...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0.059587</td>\n", + " <td>3.957826</td>\n", + " <td>0.027062</td>\n", + " <td>100</td>\n", + " <td>1983Q4</td>\n", + " <td>date\n", + "1959Q1 2710.349\n", + "1959Q2 2778.801\n", + "195...</td>\n", + " <td>date\n", + "1984Q1 6448.264\n", + "1984Q2 6559.594\n", + "198...</td>\n", + " <td>1984Q1 6385.598862\n", + "1984Q2 6428.366855\n", + "19...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0.049360</td>\n", + " <td>2.459181</td>\n", + " <td>0.020844</td>\n", + " <td>130</td>\n", + " <td>1991Q2</td>\n", + " <td>date\n", + "1959Q1 2710.349\n", + "1959Q2 2778.801\n", + "195...</td>\n", + " <td>date\n", + "1991Q3 8037.538\n", + "1991Q4 8069.046\n", + "1...</td>\n", + " <td>1991Q3 8048.706257\n", + "1991Q4 8090.890623\n", + "19...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0.009310</td>\n", + " <td>9.126039</td>\n", + " <td>0.040927</td>\n", + " <td>160</td>\n", + " <td>1998Q4</td>\n", + " <td>date\n", + "1959Q1 2710.349\n", + "1959Q2 2778.801\n", + "1...</td>\n", + " <td>date\n", + "1999Q1 10601.179\n", + "1999Q2 10684.049\n", + "1...</td>\n", + " <td>1999Q1 10623.174768\n", + "1999Q2 10727.402350\n", + "...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>\n", + " <div class=\"colab-df-buttons\">\n", + "\n", + " <div class=\"colab-df-container\">\n", + " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-820beac7-72a3-4b11-b33b-7b9555a3f8fd')\"\n", + " title=\"Convert this dataframe to an interactive table.\"\n", + " style=\"display:none;\">\n", + "\n", + " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", + " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", + " </svg>\n", + " </button>\n", + "\n", + " <style>\n", + " .colab-df-container {\n", + " display:flex;\n", + " gap: 12px;\n", + " }\n", + "\n", + " .colab-df-convert {\n", + " background-color: #E8F0FE;\n", + " border: none;\n", + " border-radius: 50%;\n", + " cursor: pointer;\n", + " display: none;\n", + " fill: #1967D2;\n", + " height: 32px;\n", + " padding: 0 0 0 0;\n", + " width: 32px;\n", + " }\n", + "\n", + " .colab-df-convert:hover {\n", + " background-color: #E2EBFA;\n", + " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", + " fill: #174EA6;\n", + " }\n", + "\n", + " .colab-df-buttons div {\n", + " margin-bottom: 4px;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert {\n", + " background-color: #3B4455;\n", + " fill: #D2E3FC;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert:hover {\n", + " background-color: #434B5C;\n", + " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", + " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", + " fill: #FFFFFF;\n", + " }\n", + " </style>\n", + "\n", + " <script>\n", + " const buttonEl =\n", + " document.querySelector('#df-820beac7-72a3-4b11-b33b-7b9555a3f8fd button.colab-df-convert');\n", + " buttonEl.style.display =\n", + " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", + "\n", + " async function convertToInteractive(key) {\n", + " const element = document.querySelector('#df-820beac7-72a3-4b11-b33b-7b9555a3f8fd');\n", + " const dataTable =\n", + " await google.colab.kernel.invokeFunction('convertToInteractive',\n", + " [key], {});\n", + " if (!dataTable) return;\n", + "\n", + " const docLinkHtml = 'Like what you see? Visit the ' +\n", + " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", + " + ' to learn more about interactive tables.';\n", + " element.innerHTML = '';\n", + " dataTable['output_type'] = 'display_data';\n", + " await google.colab.output.renderOutput(dataTable, element);\n", + " const docLink = document.createElement('div');\n", + " docLink.innerHTML = docLinkHtml;\n", + " element.appendChild(docLink);\n", + " }\n", + " </script>\n", + " </div>\n", + "\n", + "\n", + "<div id=\"df-c6f18150-71a1-4246-8b2d-0a9136c354d4\">\n", + " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-c6f18150-71a1-4246-8b2d-0a9136c354d4')\"\n", + " title=\"Suggest charts\"\n", + " style=\"display:none;\">\n", + "\n", + "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", + " width=\"24px\">\n", + " <g>\n", + " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", + " </g>\n", + "</svg>\n", + " </button>\n", + "\n", + "<style>\n", + " .colab-df-quickchart {\n", + " --bg-color: #E8F0FE;\n", + " --fill-color: #1967D2;\n", + " --hover-bg-color: #E2EBFA;\n", + " --hover-fill-color: #174EA6;\n", + " --disabled-fill-color: #AAA;\n", + " --disabled-bg-color: #DDD;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-quickchart {\n", + " --bg-color: #3B4455;\n", + " --fill-color: #D2E3FC;\n", + " --hover-bg-color: #434B5C;\n", + " --hover-fill-color: #FFFFFF;\n", + " --disabled-bg-color: #3B4455;\n", + " --disabled-fill-color: #666;\n", + " }\n", + "\n", + " .colab-df-quickchart {\n", + " background-color: var(--bg-color);\n", + " border: none;\n", + " border-radius: 50%;\n", + " cursor: pointer;\n", + " display: none;\n", + " fill: var(--fill-color);\n", + " height: 32px;\n", + " padding: 0;\n", + " width: 32px;\n", + " }\n", + "\n", + " .colab-df-quickchart:hover {\n", + " background-color: var(--hover-bg-color);\n", + " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", + " fill: var(--button-hover-fill-color);\n", + " }\n", + "\n", + " .colab-df-quickchart-complete:disabled,\n", + " .colab-df-quickchart-complete:disabled:hover {\n", + " background-color: var(--disabled-bg-color);\n", + " fill: var(--disabled-fill-color);\n", + " box-shadow: none;\n", + " }\n", + "\n", + " .colab-df-spinner {\n", + " border: 2px solid var(--fill-color);\n", + " border-color: transparent;\n", + " border-bottom-color: var(--fill-color);\n", + " animation:\n", + " spin 1s steps(1) infinite;\n", + " }\n", + "\n", + " @keyframes spin {\n", + " 0% {\n", + " border-color: transparent;\n", + " border-bottom-color: var(--fill-color);\n", + " border-left-color: var(--fill-color);\n", + " }\n", + " 20% {\n", + " border-color: transparent;\n", + " border-left-color: var(--fill-color);\n", + " border-top-color: var(--fill-color);\n", + " }\n", + " 30% {\n", + " border-color: transparent;\n", + " border-left-color: var(--fill-color);\n", + " border-top-color: var(--fill-color);\n", + " border-right-color: var(--fill-color);\n", + " }\n", + " 40% {\n", + " border-color: transparent;\n", + " border-right-color: var(--fill-color);\n", + " border-top-color: var(--fill-color);\n", + " }\n", + " 60% {\n", + " border-color: transparent;\n", + " border-right-color: var(--fill-color);\n", + " }\n", + " 80% {\n", + " border-color: transparent;\n", + " border-right-color: var(--fill-color);\n", + " border-bottom-color: var(--fill-color);\n", + " }\n", + " 90% {\n", + " border-color: transparent;\n", + " border-bottom-color: var(--fill-color);\n", + " }\n", + " }\n", + "</style>\n", + "\n", + " <script>\n", + " async function quickchart(key) {\n", + " const quickchartButtonEl =\n", + " document.querySelector('#' + key + ' button');\n", + " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", + " quickchartButtonEl.classList.add('colab-df-spinner');\n", + " try {\n", + " const charts = await google.colab.kernel.invokeFunction(\n", + " 'suggestCharts', [key], {});\n", + " } catch (error) {\n", + " console.error('Error during call to suggestCharts:', error);\n", + " }\n", + " quickchartButtonEl.classList.remove('colab-df-spinner');\n", + " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", + " }\n", + " (() => {\n", + " let quickchartButtonEl =\n", + " document.querySelector('#df-c6f18150-71a1-4246-8b2d-0a9136c354d4 button');\n", + " quickchartButtonEl.style.display =\n", + " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", + " })();\n", + " </script>\n", + "</div>\n", + " </div>\n", + " </div>\n" + ], + "text/plain": [ + " test_MeanAbsolutePercentageError fit_time pred_time len_train_window \\\n", + "0 0.027279 2.230224 0.024685 40 \n", + "1 0.022458 4.483943 0.045663 70 \n", + "2 0.059587 3.957826 0.027062 100 \n", + "3 0.049360 2.459181 0.020844 130 \n", + "4 0.009310 9.126039 0.040927 160 \n", + "\n", + " cutoff y_train \\\n", + "0 1968Q4 date\n", + "1959Q1 2710.349\n", + "1959Q2 2778.801\n", + "195... \n", + "1 1976Q2 date\n", + "1959Q1 2710.349\n", + "1959Q2 2778.801\n", + "195... \n", + "2 1983Q4 date\n", + "1959Q1 2710.349\n", + "1959Q2 2778.801\n", + "195... \n", + "3 1991Q2 date\n", + "1959Q1 2710.349\n", + "1959Q2 2778.801\n", + "195... \n", + "4 1998Q4 date\n", + "1959Q1 2710.349\n", + "1959Q2 2778.801\n", + "1... \n", + "\n", + " y_test \\\n", + "0 date\n", + "1969Q1 4244.100\n", + "1969Q2 4256.460\n", + "196... \n", + "1 date\n", + "1976Q3 5154.072\n", + "1976Q4 5191.499\n", + "197... \n", + "2 date\n", + "1984Q1 6448.264\n", + "1984Q2 6559.594\n", + "198... \n", + "3 date\n", + "1991Q3 8037.538\n", + "1991Q4 8069.046\n", + "1... \n", + "4 date\n", + "1999Q1 10601.179\n", + "1999Q2 10684.049\n", + "1... \n", + "\n", + " y_pred \n", + "0 1969Q1 4211.384583\n", + "1969Q2 4248.007673\n", + "19... \n", + "1 1976Q3 5183.195992\n", + "1976Q4 5222.693348\n", + "19... \n", + "2 1984Q1 6385.598862\n", + "1984Q2 6428.366855\n", + "19... \n", + "3 1991Q3 8048.706257\n", + "1991Q4 8090.890623\n", + "19... \n", + "4 1999Q1 10623.174768\n", + "1999Q2 10727.402350\n", + "... " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cv_df" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "9447bd22", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 385 + }, + "id": "9447bd22", + "outputId": "d6cdb1bb-cc22-46de-cafb-edc55b24bcef", + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average MAPE with Expanding Window: 0.034\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_series(\n", + " y, *[cv_df[\"y_pred\"].iloc[x] for x in range(5)],\n", + " markers=[\"o\", *[\".\"]*5],\n", + " labels=[\"y_true\"] + [f\"cv: {x}\" for x in range(5)]\n", + ")\n", + "print(f\"Average MAPE with Expanding Window: {round(cv_df['test_MeanAbsolutePercentageError'].mean(), 3)}\")\n", + "plt.title('Cross-validation with Expanding Window');" + ] + }, + { + "cell_type": "markdown", + "id": "4afb8360", + "metadata": { + "id": "4afb8360" + }, + "source": [ + "- **Sliding Window**" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "5c9ed552", + "metadata": { + "id": "5c9ed552" + }, + "outputs": [], + "source": [ + "forecaster = AutoARIMA(sp=4)\n", + "cv = SlidingWindowSplitter(step_length=30, fh=list(range(1,31)), initial_window=40)\n", + "cv_df = evaluate(forecaster=forecaster, y=y, cv=cv, strategy=\"update\", return_data=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "c4b80504", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 400 + }, + "id": "c4b80504", + "outputId": "4f6137a9-6724-428c-c69e-05b40f9c9c52" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " <div id=\"df-ab05c56f-8a70-4c11-b119-079e5dbb1d8a\" class=\"colab-df-container\">\n", + " <div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>test_MeanAbsolutePercentageError</th>\n", + " <th>fit_time</th>\n", + " <th>pred_time</th>\n", + " <th>len_train_window</th>\n", + " <th>cutoff</th>\n", + " <th>y_train</th>\n", + " <th>y_test</th>\n", + " <th>y_pred</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0.027279</td>\n", + " <td>2.242027</td>\n", + " <td>0.024409</td>\n", + " <td>40</td>\n", + " <td>1968Q4</td>\n", + " <td>1959Q1 2710.349\n", + "1959Q2 2778.801\n", + "1959Q3 ...</td>\n", + " <td>1969Q1 4244.100\n", + "1969Q2 4256.460\n", + "1969Q3 ...</td>\n", + " <td>1969Q1 4211.384583\n", + "1969Q2 4248.007673\n", + "19...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0.032508</td>\n", + " <td>3.777926</td>\n", + " <td>0.056402</td>\n", + " <td>10</td>\n", + " <td>1976Q2</td>\n", + " <td>1974Q1 4909.617\n", + "1974Q2 4922.188\n", + "1974Q3 ...</td>\n", + " <td>1976Q3 5154.072\n", + "1976Q4 5191.499\n", + "1977Q1 ...</td>\n", + " <td>1976Q3 5178.306143\n", + "1976Q4 5227.665286\n", + "19...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0.017365</td>\n", + " <td>2.365881</td>\n", + " <td>0.022176</td>\n", + " <td>10</td>\n", + " <td>1983Q4</td>\n", + " <td>1981Q3 6030.184\n", + "1981Q4 5955.062\n", + "1982Q1 ...</td>\n", + " <td>1984Q1 6448.264\n", + "1984Q2 6559.594\n", + "1984Q3 ...</td>\n", + " <td>1984Q1 6386.849\n", + "1984Q2 6448.124\n", + "1984Q3 ...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0.011343</td>\n", + " <td>1.619386</td>\n", + " <td>0.020145</td>\n", + " <td>10</td>\n", + " <td>1991Q2</td>\n", + " <td>1989Q1 7806.603\n", + "1989Q2 7865.016\n", + "1989Q3 ...</td>\n", + " <td>1991Q3 8037.538\n", + "1991Q4 8069.046\n", + "1992Q1...</td>\n", + " <td>1991Q3 8080.539\n", + "1991Q4 8157.256\n", + "1992Q1...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0.052317</td>\n", + " <td>13.451261</td>\n", + " <td>0.017559</td>\n", + " <td>10</td>\n", + " <td>1998Q4</td>\n", + " <td>1996Q3 9488.879\n", + "1996Q4 9592.458\n", + "1997Q1...</td>\n", + " <td>1999Q1 10601.179\n", + "1999Q2 10684.049\n", + "1999Q3...</td>\n", + " <td>1999Q1 10749.487249\n", + "1999Q2 10779.693065\n", + "...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>\n", + " <div class=\"colab-df-buttons\">\n", + "\n", + " <div class=\"colab-df-container\">\n", + " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ab05c56f-8a70-4c11-b119-079e5dbb1d8a')\"\n", + " title=\"Convert this dataframe to an interactive table.\"\n", + " style=\"display:none;\">\n", + "\n", + " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", + " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", + " </svg>\n", + " </button>\n", + "\n", + " <style>\n", + " .colab-df-container {\n", + " display:flex;\n", + " gap: 12px;\n", + " }\n", + "\n", + " .colab-df-convert {\n", + " background-color: #E8F0FE;\n", + " border: none;\n", + " border-radius: 50%;\n", + " cursor: pointer;\n", + " display: none;\n", + " fill: #1967D2;\n", + " height: 32px;\n", + " padding: 0 0 0 0;\n", + " width: 32px;\n", + " }\n", + "\n", + " .colab-df-convert:hover {\n", + " background-color: #E2EBFA;\n", + " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", + " fill: #174EA6;\n", + " }\n", + "\n", + " .colab-df-buttons div {\n", + " margin-bottom: 4px;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert {\n", + " background-color: #3B4455;\n", + " fill: #D2E3FC;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert:hover {\n", + " background-color: #434B5C;\n", + " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", + " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", + " fill: #FFFFFF;\n", + " }\n", + " </style>\n", + "\n", + " <script>\n", + " const buttonEl =\n", + " document.querySelector('#df-ab05c56f-8a70-4c11-b119-079e5dbb1d8a button.colab-df-convert');\n", + " buttonEl.style.display =\n", + " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", + "\n", + " async function convertToInteractive(key) {\n", + " const element = document.querySelector('#df-ab05c56f-8a70-4c11-b119-079e5dbb1d8a');\n", + " const dataTable =\n", + " await google.colab.kernel.invokeFunction('convertToInteractive',\n", + " [key], {});\n", + " if (!dataTable) return;\n", + "\n", + " const docLinkHtml = 'Like what you see? Visit the ' +\n", + " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", + " + ' to learn more about interactive tables.';\n", + " element.innerHTML = '';\n", + " dataTable['output_type'] = 'display_data';\n", + " await google.colab.output.renderOutput(dataTable, element);\n", + " const docLink = document.createElement('div');\n", + " docLink.innerHTML = docLinkHtml;\n", + " element.appendChild(docLink);\n", + " }\n", + " </script>\n", + " </div>\n", + "\n", + "\n", + "<div id=\"df-1ed4b4f8-1a2b-466f-8273-ec61ad86e0c8\">\n", + " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-1ed4b4f8-1a2b-466f-8273-ec61ad86e0c8')\"\n", + " title=\"Suggest charts\"\n", + " style=\"display:none;\">\n", + "\n", + "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", + " width=\"24px\">\n", + " <g>\n", + " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", + " </g>\n", + "</svg>\n", + " </button>\n", + "\n", + "<style>\n", + " .colab-df-quickchart {\n", + " --bg-color: #E8F0FE;\n", + " --fill-color: #1967D2;\n", + " --hover-bg-color: #E2EBFA;\n", + " --hover-fill-color: #174EA6;\n", + " --disabled-fill-color: #AAA;\n", + " --disabled-bg-color: #DDD;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-quickchart {\n", + " --bg-color: #3B4455;\n", + " --fill-color: #D2E3FC;\n", + " --hover-bg-color: #434B5C;\n", + " --hover-fill-color: #FFFFFF;\n", + " --disabled-bg-color: #3B4455;\n", + " --disabled-fill-color: #666;\n", + " }\n", + "\n", + " .colab-df-quickchart {\n", + " background-color: var(--bg-color);\n", + " border: none;\n", + " border-radius: 50%;\n", + " cursor: pointer;\n", + " display: none;\n", + " fill: var(--fill-color);\n", + " height: 32px;\n", + " padding: 0;\n", + " width: 32px;\n", + " }\n", + "\n", + " .colab-df-quickchart:hover {\n", + " background-color: var(--hover-bg-color);\n", + " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", + " fill: var(--button-hover-fill-color);\n", + " }\n", + "\n", + " .colab-df-quickchart-complete:disabled,\n", + " .colab-df-quickchart-complete:disabled:hover {\n", + " background-color: var(--disabled-bg-color);\n", + " fill: var(--disabled-fill-color);\n", + " box-shadow: none;\n", + " }\n", + "\n", + " .colab-df-spinner {\n", + " border: 2px solid var(--fill-color);\n", + " border-color: transparent;\n", + " border-bottom-color: var(--fill-color);\n", + " animation:\n", + " spin 1s steps(1) infinite;\n", + " }\n", + "\n", + " @keyframes spin {\n", + " 0% {\n", + " border-color: transparent;\n", + " border-bottom-color: var(--fill-color);\n", + " border-left-color: var(--fill-color);\n", + " }\n", + " 20% {\n", + " border-color: transparent;\n", + " border-left-color: var(--fill-color);\n", + " border-top-color: var(--fill-color);\n", + " }\n", + " 30% {\n", + " border-color: transparent;\n", + " border-left-color: var(--fill-color);\n", + " border-top-color: var(--fill-color);\n", + " border-right-color: var(--fill-color);\n", + " }\n", + " 40% {\n", + " border-color: transparent;\n", + " border-right-color: var(--fill-color);\n", + " border-top-color: var(--fill-color);\n", + " }\n", + " 60% {\n", + " border-color: transparent;\n", + " border-right-color: var(--fill-color);\n", + " }\n", + " 80% {\n", + " border-color: transparent;\n", + " border-right-color: var(--fill-color);\n", + " border-bottom-color: var(--fill-color);\n", + " }\n", + " 90% {\n", + " border-color: transparent;\n", + " border-bottom-color: var(--fill-color);\n", + " }\n", + " }\n", + "</style>\n", + "\n", + " <script>\n", + " async function quickchart(key) {\n", + " const quickchartButtonEl =\n", + " document.querySelector('#' + key + ' button');\n", + " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", + " quickchartButtonEl.classList.add('colab-df-spinner');\n", + " try {\n", + " const charts = await google.colab.kernel.invokeFunction(\n", + " 'suggestCharts', [key], {});\n", + " } catch (error) {\n", + " console.error('Error during call to suggestCharts:', error);\n", + " }\n", + " quickchartButtonEl.classList.remove('colab-df-spinner');\n", + " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", + " }\n", + " (() => {\n", + " let quickchartButtonEl =\n", + " document.querySelector('#df-1ed4b4f8-1a2b-466f-8273-ec61ad86e0c8 button');\n", + " quickchartButtonEl.style.display =\n", + " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", + " })();\n", + " </script>\n", + "</div>\n", + " </div>\n", + " </div>\n" + ], + "text/plain": [ + " test_MeanAbsolutePercentageError fit_time pred_time len_train_window \\\n", + "0 0.027279 2.242027 0.024409 40 \n", + "1 0.032508 3.777926 0.056402 10 \n", + "2 0.017365 2.365881 0.022176 10 \n", + "3 0.011343 1.619386 0.020145 10 \n", + "4 0.052317 13.451261 0.017559 10 \n", + "\n", + " cutoff y_train \\\n", + "0 1968Q4 1959Q1 2710.349\n", + "1959Q2 2778.801\n", + "1959Q3 ... \n", + "1 1976Q2 1974Q1 4909.617\n", + "1974Q2 4922.188\n", + "1974Q3 ... \n", + "2 1983Q4 1981Q3 6030.184\n", + "1981Q4 5955.062\n", + "1982Q1 ... \n", + "3 1991Q2 1989Q1 7806.603\n", + "1989Q2 7865.016\n", + "1989Q3 ... \n", + "4 1998Q4 1996Q3 9488.879\n", + "1996Q4 9592.458\n", + "1997Q1... \n", + "\n", + " y_test \\\n", + "0 1969Q1 4244.100\n", + "1969Q2 4256.460\n", + "1969Q3 ... \n", + "1 1976Q3 5154.072\n", + "1976Q4 5191.499\n", + "1977Q1 ... \n", + "2 1984Q1 6448.264\n", + "1984Q2 6559.594\n", + "1984Q3 ... \n", + "3 1991Q3 8037.538\n", + "1991Q4 8069.046\n", + "1992Q1... \n", + "4 1999Q1 10601.179\n", + "1999Q2 10684.049\n", + "1999Q3... \n", + "\n", + " y_pred \n", + "0 1969Q1 4211.384583\n", + "1969Q2 4248.007673\n", + "19... \n", + "1 1976Q3 5178.306143\n", + "1976Q4 5227.665286\n", + "19... \n", + "2 1984Q1 6386.849\n", + "1984Q2 6448.124\n", + "1984Q3 ... \n", + "3 1991Q3 8080.539\n", + "1991Q4 8157.256\n", + "1992Q1... \n", + "4 1999Q1 10749.487249\n", + "1999Q2 10779.693065\n", + "... " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cv_df" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "beb1d16e", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 385 + }, + "id": "beb1d16e", + "outputId": "93bac828-f0cd-4e16-87a4-456a00178f38", + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average MAPE with Sliding Window: 0.028\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_series(\n", + " y, *[cv_df[\"y_pred\"].iloc[x] for x in range(5)],\n", + " markers=[\"o\", *[\".\"]*5],\n", + " labels=[\"y_true\"] + [f\"cv: {x}\" for x in range(5)],\n", + ")\n", + "print(f\"Average MAPE with Sliding Window: {round(cv_df['test_MeanAbsolutePercentageError'].mean(), 3)}\")\n", + "plt.title('Cross-validation with Sliding Window');" + ] + }, + { + "cell_type": "markdown", + "id": "126aa812", + "metadata": { + "id": "126aa812" + }, + "source": [ + "### Tuning" + ] + }, + { + "cell_type": "markdown", + "id": "78da0181", + "metadata": { + "id": "78da0181" + }, + "source": [ + "- **Grid Search with a statistical model**" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "9165746c", + "metadata": { + "id": "9165746c" + }, + "outputs": [], + "source": [ + "forecaster = AutoETS(sp=4, allow_multiplicative_trend=True)\n", + "params = {\"trend\": [\"add\", \"mul\"],\n", + " \"damped_trend\": [True, False]}" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "dc753968", + "metadata": { + "id": "dc753968" + }, + "outputs": [], + "source": [ + "cv = SlidingWindowSplitter(initial_window=60, window_length=30)\n", + "fgs = ForecastingGridSearchCV(forecaster, strategy=\"refit\", cv=cv, param_grid=params)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "123a1fea", + "metadata": { + "id": "123a1fea" + }, + "outputs": [], + "source": [ + "fgs.fit(y_train)\n", + "y_pred = fgs.predict(fh_abs)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "0f5023e0", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0f5023e0", + "outputId": "67db60f5-1665-4a16-d457-e26061d0511d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'damped_trend': False, 'trend': 'add'}" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fgs.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "9108e55a", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9108e55a", + "outputId": "c6a611bd-44c4-44c9-fb3d-7ddc83f5eec1" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.00674066038278309" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fgs.best_score_" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "dea23dd5", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 346 + }, + "id": "dea23dd5", + "outputId": "af52c734-2a7b-4b56-d34c-cfe78e52733a" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred\"]);" + ] + }, + { + "cell_type": "markdown", + "id": "85dcd148", + "metadata": { + "id": "85dcd148" + }, + "source": [ + "- **Randomized Search with a scikit-learn estimator**" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "4ed06658", + "metadata": { + "id": "4ed06658" + }, + "outputs": [], + "source": [ + "regressor = LinearRegression()\n", + "forecaster = make_reduction(regressor, strategy=\"recursive\")\n", + "params = {\"window_length\": list(range(2, 21))}" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "87b8df0e", + "metadata": { + "id": "87b8df0e" + }, + "outputs": [], + "source": [ + "cv = SlidingWindowSplitter(initial_window=60, window_length=30)\n", + "frs = ForecastingRandomizedSearchCV(forecaster, strategy=\"refit\", cv=cv,\n", + " param_distributions=params, n_iter=5, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "34cfb79f", + "metadata": { + "id": "34cfb79f" + }, + "outputs": [], + "source": [ + "frs.fit(y_train)\n", + "y_pred = frs.predict(fh_abs)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "cf7a209d", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cf7a209d", + "outputId": "03d364a3-46de-4193-b81f-6298c3b53e34" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'window_length': 2}" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frs.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "32cb3203", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "32cb3203", + "outputId": "6509ceed-5f58-4934-87c1-af3d52e38e91" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.006820844800594237" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frs.best_score_" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "a662382e", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 346 + }, + "id": "a662382e", + "outputId": "ac982554-f149-455b-bf3e-df7ec13fd8fc", + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred\"]);" + ] + }, + { + "cell_type": "markdown", + "id": "de0b8af4", + "metadata": { + "id": "de0b8af4" + }, + "source": [ + "- **Tuning nested parameters**\n", + "\n", + "Random Forest Regressor" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "fc74f40f", + "metadata": { + "id": "fc74f40f" + }, + "outputs": [], + "source": [ + "regressor = RandomForestRegressor(n_estimators=20)\n", + "forecaster = make_reduction(regressor)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "266739e0", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "266739e0", + "outputId": "fd7f9966-1055-4487-fc06-0981446719af" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'estimator': RandomForestRegressor(n_estimators=20),\n", + " 'pooling': 'local',\n", + " 'transformers': None,\n", + " 'window_length': 10,\n", + " 'estimator__bootstrap': True,\n", + " 'estimator__ccp_alpha': 0.0,\n", + " 'estimator__criterion': 'squared_error',\n", + " 'estimator__max_depth': None,\n", + " 'estimator__max_features': 1.0,\n", + " 'estimator__max_leaf_nodes': None,\n", + " 'estimator__max_samples': None,\n", + " 'estimator__min_impurity_decrease': 0.0,\n", + " 'estimator__min_samples_leaf': 1,\n", + " 'estimator__min_samples_split': 2,\n", + " 'estimator__min_weight_fraction_leaf': 0.0,\n", + " 'estimator__n_estimators': 20,\n", + " 'estimator__n_jobs': None,\n", + " 'estimator__oob_score': False,\n", + " 'estimator__random_state': None,\n", + " 'estimator__verbose': 0,\n", + " 'estimator__warm_start': False}" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "forecaster.get_params()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "b98459bb", + "metadata": { + "id": "b98459bb" + }, + "outputs": [], + "source": [ + "nested_params = {\"window_length\": list(range(2,21)),\n", + " \"estimator__max_depth\": list(range(5,16))}" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "8f5abf2b", + "metadata": { + "id": "8f5abf2b" + }, + "outputs": [], + "source": [ + "cv = SlidingWindowSplitter(initial_window=60, window_length=30)\n", + "nrcv = ForecastingRandomizedSearchCV(forecaster, strategy=\"refit\", cv=cv,\n", + " param_distributions=nested_params,\n", + " n_iter = 5, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "7f6653a1", + "metadata": { + "id": "7f6653a1" + }, + "outputs": [], + "source": [ + "nrcv.fit(y_train)\n", + "y_pred = nrcv.predict(fh_abs)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "b8822ac3", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b8822ac3", + "outputId": "186ef5d8-03b0-4177-f336-64df3e6b1d42" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'window_length': 5, 'estimator__max_depth': 8}" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nrcv.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "9125c788", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9125c788", + "outputId": "da1e94f8-f8bb-4f30-b70c-db62fc086120" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.01392739416525403" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nrcv.best_score_" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "1adc9ae3", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 346 + }, + "id": "1adc9ae3", + "outputId": "3df3413f-592a-40e1-9a8d-312c2e831182" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred\"]);" + ] + }, + { + "cell_type": "markdown", + "id": "d145a0d6", + "metadata": { + "id": "d145a0d6" + }, + "source": [ + "## 4. New forecasting functionalities" + ] + }, + { + "cell_type": "markdown", + "id": "ca9f4d37", + "metadata": { + "id": "ca9f4d37" + }, + "source": [ + "### Transformers" + ] + }, + { + "cell_type": "markdown", + "id": "39f1096f", + "metadata": { + "id": "39f1096f" + }, + "source": [ + "**`Deseasonalizer`**" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "29b339f5", + "metadata": { + "id": "29b339f5" + }, + "outputs": [], + "source": [ + "transformer = Deseasonalizer(sp=4) #removing seasonality\n", + "y_train_transformed = transformer.fit_transform(y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "c1e883b4", + "metadata": { + "id": "c1e883b4" + }, + "outputs": [], + "source": [ + "y_train_inversed = transformer.inverse_transform(y_train_transformed) #inversing the seasonality removal" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "35ccf61b", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "35ccf61b", + "outputId": "e614eadc-8549-4377-e5b3-59a9e79a2f4d", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#after inversing the transformation, values are again on the same scale as they were before\n", + "(np.round(y_train.values, 3) == np.round(y_train_inversed.values, 3)).all()" + ] + }, + { + "cell_type": "markdown", + "id": "206c9fe5", + "metadata": { + "id": "206c9fe5" + }, + "source": [ + "**`BoxCoxTransformer`**" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "3e14f992", + "metadata": { + "id": "3e14f992" + }, + "outputs": [], + "source": [ + "forecaster = ThetaForecaster(sp=4)\n", + "transformer = BoxCoxTransformer(sp=4) #Box-Cox power transformation\n", + "y_train_transformed = transformer.fit_transform(y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "2aca2946", + "metadata": { + "id": "2aca2946" + }, + "outputs": [], + "source": [ + "forecaster.fit(y_train_transformed)\n", + "y_pred = forecaster.predict(fh=fh_abs)\n", + "y_pred_inversed = transformer.inverse_transform(y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "a22cbb57", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 350 + }, + "id": "a22cbb57", + "outputId": "8c3dd787-a46f-4761-a080-2e86e7b53634" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred_transformed\"]);" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "470037d2", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 346 + }, + "id": "470037d2", + "outputId": "6922ba2a-6eaf-4813-ba7c-5478fd1f4821" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_series(y_train, y_test, y_pred_inversed, labels=[\"y_train\", \"y_test\", \"y_pred_inversed_transformation\"]);" + ] + }, + { + "cell_type": "markdown", + "id": "90d4d899", + "metadata": { + "id": "90d4d899" + }, + "source": [ + "### Pipelines" + ] + }, + { + "cell_type": "markdown", + "id": "4affacfa", + "metadata": { + "id": "4affacfa" + }, + "source": [ + "- **`TransformedTargetForecaster` - simple pipeline**" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "02dc7cdd", + "metadata": { + "id": "02dc7cdd" + }, + "outputs": [], + "source": [ + "#two transformers and a forecaster\n", + "forecaster = TransformedTargetForecaster(\n", + " [(\"deseasonalize\", Deseasonalizer(sp=4)),\n", + " (\"box_cox\", BoxCoxTransformer(sp=4)),\n", + " (\"forecast\", ThetaForecaster(sp=4))]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "db4c544e", + "metadata": { + "id": "db4c544e" + }, + "outputs": [], + "source": [ + "forecaster.fit(y_train)\n", + "y_pred = forecaster.predict(fh_abs)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "244d1c58", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 346 + }, + "id": "244d1c58", + "outputId": "b04b9da7-de05-4942-b49a-e6d5644d7843" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred\"]);" + ] + }, + { + "cell_type": "markdown", + "id": "ee3b3b82", + "metadata": { + "id": "ee3b3b82" + }, + "source": [ + "- **ForecastingPipeline - pipeline with exogenous variables**" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "084fc9ad", + "metadata": { + "id": "084fc9ad" + }, + "outputs": [], + "source": [ + "forecaster = ForecastingPipeline(\n", + " [(\"hampel\", HampelFilter()),\n", + " (\"log\", LogTransformer()),\n", + " (\"forecaster\", ThetaForecaster(sp=4))])" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "44be0974", + "metadata": { + "id": "44be0974" + }, + "outputs": [], + "source": [ + "forecaster.fit(y_train, X_train)\n", + "y_pred = forecaster.predict(fh=fh_abs, X=X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "fc1cd3f2", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 346 + }, + "id": "fc1cd3f2", + "outputId": "967df069-c71f-4095-85a5-03a6e13305b7", + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred\"]);" + ] + }, + { + "cell_type": "markdown", + "id": "72ff0f21", + "metadata": { + "id": "72ff0f21" + }, + "source": [ + "### Automated model selection" + ] + }, + { + "cell_type": "markdown", + "id": "d4d6422c", + "metadata": { + "id": "d4d6422c" + }, + "source": [ + "- **`MultiplexForecaster` - finding the best forecaster**" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "f6e4a51f", + "metadata": { + "id": "f6e4a51f" + }, + "outputs": [], + "source": [ + "forecaster = MultiplexForecaster(\n", + " forecasters=[\n", + " (\"theta\", ThetaForecaster(sp=4)),\n", + " (\"ets\", ExponentialSmoothing(trend=\"add\", sp=4)),\n", + " (\"autoarima\", AutoARIMA(sp=4)),\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "688b8fd9", + "metadata": { + "id": "688b8fd9" + }, + "outputs": [], + "source": [ + "cv = SlidingWindowSplitter(initial_window=60, window_length=30)\n", + "forecaster_param_grid = {\"selected_forecaster\": [\"theta\", \"ets\", \"autoarima\"]}\n", + "gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=forecaster_param_grid)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "010e1f5c", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 109 + }, + "id": "010e1f5c", + "outputId": "16d55de5-07e2-4136-a0b2-a3adfa4d4dc9" + }, + "outputs": [ + { + "data": { + "text/html": [ + "<style>#sk-e74645bf-5326-44da-8525-5bcee84bf3fb {color: black;background-color: white;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb pre{padding: 0;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-toggleable {background-color: white;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb label.sk-toggleable__label-arrow:before {content: \"βΈ\";float: left;margin-right: 0.25em;color: #696969;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"βΎ\";}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-estimator:hover {background-color: #d4ebff;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-item {z-index: 1;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-parallel-item:only-child::after {width: 0;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-e74645bf-5326-44da-8525-5bcee84bf3fb div.sk-text-repr-fallback {display: none;}</style><div id='sk-e74645bf-5326-44da-8525-5bcee84bf3fb' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ForecastingGridSearchCV(cv=SlidingWindowSplitter(initial_window=60,\n", + " window_length=30),\n", + " forecaster=MultiplexForecaster(forecasters=[('theta',\n", + " ThetaForecaster(sp=4)),\n", + " ('ets',\n", + " ExponentialSmoothing(sp=4,\n", + " trend='add')),\n", + " ('autoarima',\n", + " AutoARIMA(sp=4))]),\n", + " param_grid={'selected_forecaster': ['theta', 'ets',\n", + " 'autoarima']})</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class='sk-label-container'><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('79a611c5-a2d7-439b-8146-8e7d5fba8d02') type=\"checkbox\" ><label for=UUID('79a611c5-a2d7-439b-8146-8e7d5fba8d02') class='sk-toggleable__label sk-toggleable__label-arrow'>ForecastingGridSearchCV</label><div class=\"sk-toggleable__content\"><pre>ForecastingGridSearchCV(cv=SlidingWindowSplitter(initial_window=60,\n", + " window_length=30),\n", + " forecaster=MultiplexForecaster(forecasters=[('theta',\n", + " ThetaForecaster(sp=4)),\n", + " ('ets',\n", + " ExponentialSmoothing(sp=4,\n", + " trend='add')),\n", + " ('autoarima',\n", + " AutoARIMA(sp=4))]),\n", + " param_grid={'selected_forecaster': ['theta', 'ets',\n", + " 'autoarima']})</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('23b5532d-08fe-4e23-a723-d54c83f0985d') type=\"checkbox\" ><label for=UUID('23b5532d-08fe-4e23-a723-d54c83f0985d') class='sk-toggleable__label sk-toggleable__label-arrow'>SlidingWindowSplitter</label><div class=\"sk-toggleable__content\"><pre>SlidingWindowSplitter(initial_window=60, window_length=30)</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('1f0258b3-6771-4626-b5a3-b68b294da6d4') type=\"checkbox\" ><label for=UUID('1f0258b3-6771-4626-b5a3-b68b294da6d4') class='sk-toggleable__label sk-toggleable__label-arrow'>ThetaForecaster</label><div class=\"sk-toggleable__content\"><pre>ThetaForecaster(sp=4)</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('b6334082-f211-4343-ab1a-1c994183edf6') type=\"checkbox\" ><label for=UUID('b6334082-f211-4343-ab1a-1c994183edf6') class='sk-toggleable__label sk-toggleable__label-arrow'>ExponentialSmoothing</label><div class=\"sk-toggleable__content\"><pre>ExponentialSmoothing(sp=4, trend='add')</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('eb63c10a-471f-40e3-9a55-b4322a91b6b9') type=\"checkbox\" ><label for=UUID('eb63c10a-471f-40e3-9a55-b4322a91b6b9') class='sk-toggleable__label sk-toggleable__label-arrow'>AutoARIMA</label><div class=\"sk-toggleable__content\"><pre>AutoARIMA(sp=4)</pre></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div>" + ], + "text/plain": [ + "ForecastingGridSearchCV(cv=SlidingWindowSplitter(initial_window=60,\n", + " window_length=30),\n", + " forecaster=MultiplexForecaster(forecasters=[('theta',\n", + " ThetaForecaster(sp=4)),\n", + " ('ets',\n", + " ExponentialSmoothing(sp=4,\n", + " trend='add')),\n", + " ('autoarima',\n", + " AutoARIMA(sp=4))]),\n", + " param_grid={'selected_forecaster': ['theta', 'ets',\n", + " 'autoarima']})" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gscv.fit(y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "c2839de5", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "c2839de5", + "outputId": "02387375-cf9e-4eb7-d022-d7ad434bbb0e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'selected_forecaster': 'ets'}" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gscv.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "4f1d6928", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 94 + }, + "id": "4f1d6928", + "outputId": "4d970c4d-f7ac-46a3-c258-460d92f3fa48" + }, + "outputs": [ + { + "data": { + "text/html": [ + "<style>#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 {color: black;background-color: white;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 pre{padding: 0;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-toggleable {background-color: white;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 label.sk-toggleable__label-arrow:before {content: \"βΈ\";float: left;margin-right: 0.25em;color: #696969;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"βΎ\";}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-estimator:hover {background-color: #d4ebff;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-item {z-index: 1;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-parallel::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 2em;bottom: 0;left: 50%;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-parallel-item {display: flex;flex-direction: column;position: relative;background-color: white;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-parallel-item:only-child::after {width: 0;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;position: relative;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-label label {font-family: monospace;font-weight: bold;background-color: white;display: inline-block;line-height: 1.2em;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-label-container {position: relative;z-index: 2;text-align: center;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-2c671cf3-e0c0-47fa-8287-d8e10422d592 div.sk-text-repr-fallback {display: none;}</style><div id='sk-2c671cf3-e0c0-47fa-8287-d8e10422d592' class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MultiplexForecaster(forecasters=[('theta', ThetaForecaster(sp=4)),\n", + " ('ets',\n", + " ExponentialSmoothing(sp=4, trend='add')),\n", + " ('autoarima', AutoARIMA(sp=4))],\n", + " selected_forecaster='ets')</pre><b>Please rerun this cell to show the HTML repr or trust the notebook.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class='sk-label-container'><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('cf93a545-84b5-4df2-8433-9e8e7e8d432a') type=\"checkbox\" ><label for=UUID('cf93a545-84b5-4df2-8433-9e8e7e8d432a') class='sk-toggleable__label sk-toggleable__label-arrow'>MultiplexForecaster</label><div class=\"sk-toggleable__content\"><pre>MultiplexForecaster(forecasters=[('theta', ThetaForecaster(sp=4)),\n", + " ('ets',\n", + " ExponentialSmoothing(sp=4, trend='add')),\n", + " ('autoarima', AutoARIMA(sp=4))],\n", + " selected_forecaster='ets')</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('cd38cb2a-45a2-49f6-8d78-9a9eb88487b8') type=\"checkbox\" ><label for=UUID('cd38cb2a-45a2-49f6-8d78-9a9eb88487b8') class='sk-toggleable__label sk-toggleable__label-arrow'>ThetaForecaster</label><div class=\"sk-toggleable__content\"><pre>ThetaForecaster(sp=4)</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('a1c840ac-5601-46aa-8b32-f3b4ccca1706') type=\"checkbox\" ><label for=UUID('a1c840ac-5601-46aa-8b32-f3b4ccca1706') class='sk-toggleable__label sk-toggleable__label-arrow'>ExponentialSmoothing</label><div class=\"sk-toggleable__content\"><pre>ExponentialSmoothing(sp=4, trend='add')</pre></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class='sk-item'><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=UUID('68576ee3-7378-4704-b31b-0c3798129e47') type=\"checkbox\" ><label for=UUID('68576ee3-7378-4704-b31b-0c3798129e47') class='sk-toggleable__label sk-toggleable__label-arrow'>AutoARIMA</label><div class=\"sk-toggleable__content\"><pre>AutoARIMA(sp=4)</pre></div></div></div></div></div></div></div></div></div></div>" + ], + "text/plain": [ + "MultiplexForecaster(forecasters=[('theta', ThetaForecaster(sp=4)),\n", + " ('ets',\n", + " ExponentialSmoothing(sp=4, trend='add')),\n", + " ('autoarima', AutoARIMA(sp=4))],\n", + " selected_forecaster='ets')" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gscv.best_forecaster_" + ] + }, + { + "cell_type": "markdown", + "id": "1f853084", + "metadata": { + "id": "1f853084" + }, + "source": [ + "- **`OptionalPassthrough` - finding the best transformations**" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "c638819e", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "c638819e", + "outputId": "37b97487-c5e2-43a9-9c89-92db3c0e8f6f" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'steps': [('deseasonalizer',\n", + " OptionalPassthrough(transformer=Deseasonalizer())),\n", + " ('forecaster', ExponentialSmoothing())],\n", + " 'deseasonalizer': OptionalPassthrough(transformer=Deseasonalizer()),\n", + " 'forecaster': ExponentialSmoothing(),\n", + " 'deseasonalizer__passthrough': False,\n", + " 'deseasonalizer__transformer': Deseasonalizer(),\n", + " 'deseasonalizer__transformer__model': 'additive',\n", + " 'deseasonalizer__transformer__sp': 1,\n", + " 'forecaster__damped_trend': False,\n", + " 'forecaster__damping_trend': None,\n", + " 'forecaster__initial_level': None,\n", + " 'forecaster__initial_seasonal': None,\n", + " 'forecaster__initial_trend': None,\n", + " 'forecaster__initialization_method': 'estimated',\n", + " 'forecaster__method': None,\n", + " 'forecaster__minimize_kwargs': None,\n", + " 'forecaster__optimized': True,\n", + " 'forecaster__random_state': None,\n", + " 'forecaster__remove_bias': False,\n", + " 'forecaster__seasonal': None,\n", + " 'forecaster__smoothing_level': None,\n", + " 'forecaster__smoothing_seasonal': None,\n", + " 'forecaster__smoothing_trend': None,\n", + " 'forecaster__sp': None,\n", + " 'forecaster__start_params': None,\n", + " 'forecaster__trend': None,\n", + " 'forecaster__use_boxcox': None,\n", + " 'forecaster__use_brute': True}" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "forecaster = TransformedTargetForecaster([(\"deseasonalizer\", OptionalPassthrough(Deseasonalizer())),\n", + " (\"forecaster\", ExponentialSmoothing())])\n", + "forecaster.get_params()" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "b99feefe", + "metadata": { + "id": "b99feefe" + }, + "outputs": [], + "source": [ + "cv = SlidingWindowSplitter(initial_window=60, window_length=30)\n", + "params = {\"deseasonalizer__passthrough\": [True, False],\n", + " \"deseasonalizer__transformer__model\": [\"additive\", \"multiplicative\"]}\n", + "\n", + "gscv = ForecastingGridSearchCV(forecaster=forecaster,\n", + " param_grid=params,\n", + " cv=cv)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "bad88162", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bad88162", + "outputId": "31628311-7aa2-42a2-a188-c6cbf00439cf", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'deseasonalizer__passthrough': True,\n", + " 'deseasonalizer__transformer__model': 'additive'}" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gscv.fit(y_train)\n", + "gscv.best_params_ #model performs better with the transformation" + ] + }, + { + "cell_type": "markdown", + "id": "7a1eea65", + "metadata": { + "id": "7a1eea65" + }, + "source": [ + "### Ensemble forecasting" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "afb59acb", + "metadata": { + "id": "afb59acb" + }, + "outputs": [], + "source": [ + "ensemble = EnsembleForecaster(forecasters = [(\"tbats\", TBATS()),\n", + " (\"autoarima\", AutoARIMA())],\n", + " aggfunc = \"min\")" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "b0d6b016", + "metadata": { + "id": "b0d6b016" + }, + "outputs": [], + "source": [ + "ensemble.fit(y_train)\n", + "y_pred = ensemble.predict(fh_abs) #averaged predictions of both models" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "cecb99be", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cecb99be", + "outputId": "162bbee6-b205-40eb-9249-cadb95d6193c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[TBATS(), AutoARIMA()]" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ensemble.forecasters_" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "85b0566e", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 348 + }, + "id": "85b0566e", + "outputId": "0681c436-6388-49ca-fda9-a0d2bcd51a6a", + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 1600x400 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_series(y_train, y_test, y_pred, labels=[\"y_train\", \"y_test\", \"y_pred\"]);" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/blog_posts/README.md b/examples/blog_posts/README.md new file mode 100644 index 00000000000..f52d1cc7d87 --- /dev/null +++ b/examples/blog_posts/README.md @@ -0,0 +1,10 @@ +### Notebooks from blog posts and tutorials + +This folder contains notebooks and tutorials from past blog posts and past tutorials. + +Notebooks in this folder are included in `sktime` continuous integration, +so we can check whether new releases break past code. + +#### Contents + +* Joanna Lenczuk - Why start using `sktime` for forecasting diff --git a/examples/interpolation.ipynb b/examples/interpolation.ipynb index d55caa3f570..eeb36d28b82 100644 --- a/examples/interpolation.ipynb +++ b/examples/interpolation.ipynb @@ -124,6 +124,8 @@ ], "source": [ "# randomly cut the data series in-place\n", + "\n", + "\n", "def random_cut(df):\n", " for row_i in range(df.shape[0]):\n", " for dim_i in range(df.shape[1]):\n", diff --git a/examples/interval_based_classification.ipynb b/examples/interval_based_classification.ipynb index 2f28b2494cf..45cb61fed14 100644 --- a/examples/interval_based_classification.ipynb +++ b/examples/interval_based_classification.ipynb @@ -27,25 +27,22 @@ }, { "cell_type": "markdown", - "metadata": {}, "source": [ "## 1. Imports" - ] + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "execution": { - "iopub.execute_input": "2020-12-19T14:32:05.163967Z", - "iopub.status.busy": "2020-12-19T14:32:05.163440Z", - "iopub.status.idle": "2020-12-19T14:32:05.914752Z", - "shell.execute_reply": "2020-12-19T14:32:05.915264Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ + "import numpy as np\n", "from sklearn import metrics\n", + "from sklearn.pipeline import Pipeline\n", "\n", "from sktime.classification.interval_based import (\n", " CanonicalIntervalForest,\n", @@ -54,7 +51,8 @@ " SupervisedTimeSeriesForest,\n", " TimeSeriesForestClassifier,\n", ")\n", - "from sktime.datasets import load_basic_motions, load_italy_power_demand" + "from sktime.datasets import load_basic_motions, load_italy_power_demand\n", + "from sktime.transformations.panel.compose import ColumnConcatenator" ] }, { @@ -66,25 +64,9 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "execution": { - "iopub.execute_input": "2020-12-19T14:32:05.919120Z", - "iopub.status.busy": "2020-12-19T14:32:05.918629Z", - "iopub.status.idle": "2020-12-19T14:32:06.041420Z", - "shell.execute_reply": "2020-12-19T14:32:06.040742Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(67, 1) (67,) (50, 1) (50,)\n", - "(40, 6) (40,) (40, 6) (40,)\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "X_train, y_train = load_italy_power_demand(split=\"train\", return_X_y=True)\n", "X_test, y_test = load_italy_power_demand(split=\"test\", return_X_y=True)\n", @@ -118,24 +100,9 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "execution": { - "iopub.execute_input": "2020-12-19T14:32:06.045197Z", - "iopub.status.busy": "2020-12-19T14:32:06.044696Z", - "iopub.status.idle": "2020-12-19T14:32:06.460714Z", - "shell.execute_reply": "2020-12-19T14:32:06.461260Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TSF Accuracy: 0.98\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "tsf = TimeSeriesForestClassifier(n_estimators=50, random_state=47)\n", "tsf.fit(X_train, y_train)\n", @@ -144,6 +111,230 @@ "print(\"TSF Accuracy: \" + str(metrics.accuracy_score(y_test, tsf_preds)))" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "tsf = Pipeline(\n", + " [\n", + " (\"column_concatenar\", ColumnConcatenator()),\n", + " (\"classify\", TimeSeriesForestClassifier(n_estimators=50, random_state=47)),\n", + " ]\n", + ")\n", + "tsf.fit(X_train_mv, y_train_mv)\n", + "\n", + "tsf_preds = tsf.predict(X_test_mv)\n", + "print(\"TSF Accuracy: \" + str(metrics.accuracy_score(y_test_mv, tsf_preds)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "temporal_feature_importance = tsf[\"classify\"].feature_importances_\n", + "separators = range(0, tsf[\"classify\"].series_length, len(X_train_mv.iloc[0, 0]))\n", + "\n", + "ax = temporal_feature_importance.plot(figsize=(20, 10))\n", + "for index, separator in enumerate(separators):\n", + " ax.vlines(\n", + " separator,\n", + " temporal_feature_importance.min().min(),\n", + " temporal_feature_importance.max().max(),\n", + " color=\"r\",\n", + " alpha=0.3,\n", + " )\n", + " ax.text(\n", + " separator, temporal_feature_importance.max().max(), X_train_mv.columns[index]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "X_train_mv_columns = list(X_train_mv.columns)\n", + "np.random.shuffle(X_train_mv_columns)\n", + "\n", + "X_train_shuffled = X_train_mv[X_train_mv_columns]\n", + "X_train_shuffled.columns = X_train_mv.columns\n", + "\n", + "X_test_shuffled = X_test_mv[X_train_mv_columns]\n", + "X_test_shuffled.columns = X_test_mv.columns\n", + "\n", + "tsf = Pipeline(\n", + " [\n", + " (\"column_concatenator\", ColumnConcatenator()),\n", + " (\"classify\", TimeSeriesForestClassifier(n_estimators=50, random_state=47)),\n", + " ]\n", + ")\n", + "tsf.fit(X_train_shuffled, y_train_mv)\n", + "\n", + "tsf_preds = tsf.predict(X_test_shuffled)\n", + "print(\"TSF Accuracy: \" + str(metrics.accuracy_score(y_test_mv, tsf_preds)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "temporal_feature_importance = tsf[\"classify\"].feature_importances_\n", + "separators = range(0, tsf[\"classify\"].series_length, len(X_train_mv.iloc[0, 0]))\n", + "\n", + "ax = temporal_feature_importance.plot(figsize=(20, 10))\n", + "for index, separator in enumerate(separators):\n", + " ax.vlines(\n", + " separator,\n", + " temporal_feature_importance.min().min(),\n", + " temporal_feature_importance.max().max(),\n", + " color=\"r\",\n", + " alpha=0.3,\n", + " )\n", + " ax.text(\n", + " separator, temporal_feature_importance.max().max(), X_train_mv_columns[index]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "tsf = Pipeline(\n", + " [\n", + " (\"column_concatenator\", ColumnConcatenator()),\n", + " (\n", + " \"classify\",\n", + " TimeSeriesForestClassifier(\n", + " n_estimators=50, random_state=47, inner_series_length=100\n", + " ),\n", + " ),\n", + " ]\n", + ")\n", + "tsf.fit(X_train_mv, y_train_mv)\n", + "\n", + "tsf_preds = tsf.predict(X_test_mv)\n", + "print(\"TSF Accuracy: \" + str(metrics.accuracy_score(y_test_mv, tsf_preds)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "temporal_feature_importance = tsf[\"classify\"].feature_importances_\n", + "separators = range(0, tsf[\"classify\"].series_length, len(X_train_mv.iloc[0, 0]))\n", + "\n", + "ax = temporal_feature_importance.plot(figsize=(20, 10))\n", + "for index, separator in enumerate(separators):\n", + " ax.vlines(\n", + " separator,\n", + " temporal_feature_importance.min().min(),\n", + " temporal_feature_importance.max().max(),\n", + " color=\"r\",\n", + " alpha=0.3,\n", + " )\n", + " ax.text(\n", + " separator, temporal_feature_importance.max().max(), X_train_mv.columns[index]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "X_train_mv_columns = list(X_train_mv.columns)\n", + "np.random.shuffle(X_train_mv_columns)\n", + "\n", + "X_train_shuffled = X_train_mv[X_train_mv_columns]\n", + "X_train_shuffled.columns = X_train_mv.columns\n", + "\n", + "X_test_shuffled = X_test_mv[X_train_mv_columns]\n", + "X_test_shuffled.columns = X_test_mv.columns\n", + "\n", + "tsf = Pipeline(\n", + " [\n", + " (\"column_concatenator\", ColumnConcatenator()),\n", + " (\n", + " \"classify\",\n", + " TimeSeriesForestClassifier(\n", + " n_estimators=50, random_state=47, inner_series_length=100\n", + " ),\n", + " ),\n", + " ]\n", + ")\n", + "tsf.fit(X_train_shuffled, y_train_mv)\n", + "\n", + "tsf_preds = tsf.predict(X_test_shuffled)\n", + "print(\"TSF Accuracy: \" + str(metrics.accuracy_score(y_test_mv, tsf_preds)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "temporal_feature_importance = tsf[\"classify\"].feature_importances_\n", + "separators = range(0, tsf[\"classify\"].series_length, len(X_train_mv.iloc[0, 0]))\n", + "\n", + "ax = temporal_feature_importance.plot(figsize=(20, 10))\n", + "for index, separator in enumerate(separators):\n", + " ax.vlines(\n", + " separator,\n", + " temporal_feature_importance.min().min(),\n", + " temporal_feature_importance.max().max(),\n", + " color=\"r\",\n", + " alpha=0.3,\n", + " )\n", + " ax.text(\n", + " separator, temporal_feature_importance.max().max(), X_train_mv_columns[index]\n", + " )" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -155,21 +346,13 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "RISE Accuracy: 1.0\n" - ] - } - ], + "outputs": [], "source": [ "rise = RandomIntervalSpectralEnsemble(n_estimators=50, random_state=47)\n", "rise.fit(X_train, y_train)\n", @@ -189,21 +372,13 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "STSF Accuracy: 0.96\n" - ] - } - ], + "outputs": [], "source": [ "stsf = SupervisedTimeSeriesForest(n_estimators=50, random_state=47)\n", "stsf.fit(X_train, y_train)\n", @@ -226,7 +401,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:06.471294Z", @@ -238,15 +413,7 @@ "name": "#%%\n" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CIF Accuracy: 0.98\n" - ] - } - ], + "outputs": [], "source": [ "cif = CanonicalIntervalForest(n_estimators=50, att_subsample_size=8, random_state=47)\n", "cif.fit(X_train, y_train)\n", @@ -264,21 +431,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CIF Accuracy: 1.0\n" - ] - } - ], + "outputs": [], "source": [ "cif_m = CanonicalIntervalForest(n_estimators=50, att_subsample_size=8, random_state=47)\n", "cif_m.fit(X_train_mv, y_train_mv)\n", @@ -300,21 +459,13 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DrCIF Accuracy: 0.98\n" - ] - } - ], + "outputs": [], "source": [ "drcif = DrCIF(n_estimators=5, att_subsample_size=10, random_state=47)\n", "drcif.fit(X_train, y_train)\n", @@ -336,21 +487,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DrCIF Accuracy: 1.0\n" - ] - } - ], + "outputs": [], "source": [ "drcif_m = DrCIF(n_estimators=5, att_subsample_size=10, random_state=47)\n", "drcif_m.fit(X_train_mv, y_train_mv)\n", diff --git a/examples/mlflow.ipynb b/examples/mlflow.ipynb index 79189d2029f..fd91ca73aa1 100644 --- a/examples/mlflow.ipynb +++ b/examples/mlflow.ipynb @@ -133,7 +133,6 @@ ], "source": [ "with mlflow.start_run():\n", - "\n", " forecaster = NaiveForecaster()\n", " forecaster.fit(\n", " y_train,\n", @@ -691,7 +690,6 @@ "mlflow.set_experiment(\"Test Sktime\")\n", "\n", "with mlflow.start_run() as run:\n", - "\n", " forecaster = NaiveForecaster()\n", " forecaster.fit(y_train, X=X_train, fh=[1, 2, 3])\n", " forecaster.pyfunc_predict_conf = pyfunc_predict_conf\n", diff --git a/examples/partition_based_clustering.ipynb b/examples/partition_based_clustering.ipynb index 4b802232f44..4aea2cbfc47 100644 --- a/examples/partition_based_clustering.ipynb +++ b/examples/partition_based_clustering.ipynb @@ -134,7 +134,7 @@ " <br>\n", " These three cluster initialisation algorithms have been implemented and can\n", " be chosen to use when constructing either k-means or k-medoids partitioning\n", - " algorithms by parsing the string values 'random' for random iniitialisation,\n", + " algorithms by parsing the string values 'random' for random initialisation,\n", " 'forgy' for forgy and 'k-means++' for k-means++.\n", "\n", "### Assignment (distance measure)\n", diff --git a/extension_templates/alignment.py b/extension_templates/alignment.py index e50caa38429..3fbe2c8ddba 100644 --- a/extension_templates/alignment.py +++ b/extension_templates/alignment.py @@ -69,10 +69,25 @@ class MyAligner(BaseAligner): # optional todo: override base class estimator default tags here if necessary # these are the default values, only add if different to these. _tags = { + # packaging info + # -------------- + "authors": ["author1", "author2"], # authors, GitHub handles + "maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles + # author = significant contribution to code at some point + # maintainer = algorithm maintainer role, "owner" + # specify one or multiple authors and maintainers, only for sktime contribution + # remove maintainer tag if maintained by sktime core team + # + "python_version": None, # PEP 440 python version specifier to limit versions + "python_dependencies": None, # PEP 440 python dependencies specifier, + # e.g., "numba>0.53", or a list, e.g., ["numba>0.53", "numpy>=1.19.0"] + # delete if no python dependencies or version limitations + # + # estimator tags + # -------------- "capability:multiple-alignment": False, # can align more than two sequences? "capability:distance": False, # does compute/return overall distance? "capability:distance-matrix": False, # does compute/return distance matrix? - "python_version": None, # PEP 440 python version specifier to limit versions } # todo: add any hyper-parameters and components to constructor diff --git a/extension_templates/classification.py b/extension_templates/classification.py index 532743cd52b..2a8cbdc9a3e 100644 --- a/extension_templates/classification.py +++ b/extension_templates/classification.py @@ -70,16 +70,33 @@ class MyTimeSeriesClassifier(BaseClassifier): # optional todo: override base class estimator default tags here if necessary # these are the default values, only add if different to these. _tags = { + # packaging info + # -------------- + "authors": ["author1", "author2"], # authors, GitHub handles + "maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles + # author = significant contribution to code at some point + # maintainer = algorithm maintainer role, "owner" + # specify one or multiple authors and maintainers, only for sktime contribution + # remove maintainer tag if maintained by sktime core team + # + "python_version": None, # PEP 440 python version specifier to limit versions + "python_dependencies": None, # PEP 440 python dependencies specifier, + # e.g., "numba>0.53", or a list, e.g., ["numba>0.53", "numpy>=1.19.0"] + # delete if no python dependencies or version limitations + # + # estimator tags + # -------------- "X_inner_mtype": "numpy3D", # which type do _fit/_predict accept, usually - # this is either "numpy3D" or "nested_univ" (nested pd.DataFrame). Other + "y_inner_mtype": "numpy1D", # which type do _fit/_predict return, usually + # this is either "numpy3D", "pd-multiindex" or "nested_univ" (nested df). Other # types are allowable, see datatypes/panel/_registry.py for options. - "capability:multivariate": False, + "capability:multivariate": False, # ability to handle multivariate X + "capability:multioutput": False, # ability to predict multiple columns in y "capability:unequal_length": False, "capability:missing_values": False, "capability:train_estimate": False, "capability:contractable": False, "capability:multithreading": False, - "python_version": None, # PEP 440 python version specifier to limit versions } # todo: add any hyper-parameters and components to constructor @@ -129,13 +146,19 @@ def _fit(self, X, y): ---------- X : guaranteed to be of a type in self.get_tag("X_inner_mtype") if self.get_tag("X_inner_mtype") = "numpy3D": - 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] - if self.get_tag("X_inner_mtype") = "nested_univ": - pd.DataFrame with each column a dimension, each cell a pd.Series + 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] + if self.get_tag("X_inner_mtype") = "pd-multiindex:": + pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices for list of other mtypes, see datatypes.SCITYPE_REGISTER for specifications, see examples/AA_datatypes_and_datasets.ipynb - y : 1D np.array of int, of shape [n_instances] - class labels for fitting - indices correspond to instance indices in X + y : guaranteed to be of a type in self.get_tag("y_inner_mtype") + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + class labels for fitting + if self.get_tag("capaility:multioutput") = False, guaranteed to be 1D + if self.get_tag("capaility:multioutput") = True, guaranteed to be 2D Returns ------- @@ -152,31 +175,32 @@ def _fit(self, X, y): # 3. read from self in _fit, 4. pass to interfaced_model.fit in _fit # todo: implement this, mandatory - def _predict(self, X) -> np.ndarray: + def _predict(self, X): """Predict labels for sequences in X. private _predict containing the core logic, called from predict - State required: - Requires state to be "fitted". - - Accesses in self: - Fitted model attributes ending in "_" - Parameters ---------- X : guaranteed to be of a type in self.get_tag("X_inner_mtype") if self.get_tag("X_inner_mtype") = "numpy3D": - 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] - if self.get_tag("X_inner_mtype") = "nested_univ": - pd.DataFrame with each column a dimension, each cell a pd.Series + 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] + if self.get_tag("X_inner_mtype") = "pd-multiindex:": + pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices for list of other mtypes, see datatypes.SCITYPE_REGISTER for specifications, see examples/AA_datatypes_and_datasets.ipynb Returns ------- - y : 1D np.array of int, of shape [n_instances] - predicted class labels + y : should be of mtype in self.get_tag("y_inner_mtype") + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + predicted class labels indices correspond to instance indices in X + if self.get_tag("capaility:multioutput") = False, should be 1D + if self.get_tag("capaility:multioutput") = True, should be 2D """ # implement here @@ -200,9 +224,11 @@ def _predict_proba(self, X) -> np.ndarray: ---------- X : guaranteed to be of a type in self.get_tag("X_inner_mtype") if self.get_tag("X_inner_mtype") = "numpy3D": - 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] - if self.get_tag("X_inner_mtype") = "nested_univ": - pd.DataFrame with each column a dimension, each cell a pd.Series + 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] + if self.get_tag("X_inner_mtype") = "pd-multiindex:": + pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices for list of other mtypes, see datatypes.SCITYPE_REGISTER for specifications, see examples/AA_datatypes_and_datasets.ipynb diff --git a/extension_templates/clustering.py b/extension_templates/clustering.py index 7c3cc2966cd..4e9936de1b3 100644 --- a/extension_templates/clustering.py +++ b/extension_templates/clustering.py @@ -65,6 +65,22 @@ class MyClusterer(BaseClusterer): # optional todo: override base class estimator default tags here if necessary # these are the default values, only add if different to these. _tags = { + # packaging info + # -------------- + "authors": ["author1", "author2"], # authors, GitHub handles + "maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles + # author = significant contribution to code at some point + # maintainer = algorithm maintainer role, "owner" + # specify one or multiple authors and maintainers, only for sktime contribution + # remove maintainer tag if maintained by sktime core team + # + "python_version": None, # PEP 440 python version specifier to limit versions + "python_dependencies": None, # PEP 440 python dependencies specifier, + # e.g., "numba>0.53", or a list, e.g., ["numba>0.53", "numpy>=1.19.0"] + # delete if no python dependencies or version limitations + # + # estimator tags + # -------------- "X_inner_mtype": "numpy3D", # which type do _fit/_predict accept, usually # this is either "numpy3D" or "nested_univ" (nested pd.DataFrame). Other # types are allowable, see datatypes/panel/_registry.py for options. diff --git a/extension_templates/dist_kern_panel.py b/extension_templates/dist_kern_panel.py index 5bd714daa18..4586d798618 100644 --- a/extension_templates/dist_kern_panel.py +++ b/extension_templates/dist_kern_panel.py @@ -57,9 +57,14 @@ class MyTrafoPwPanel(BasePairwiseTransformerPanel): # todo: fill out transformer tags here # delete the tags that you *didn't* change - these defaults are inherited - # _tags = { - # currently there are no tags for pairwise transformers - # } + _tags = { + # specify one or multiple authors and maintainers, only for sktime contribution + "authors": ["author1", "author2"], # authors, GitHub handles + "maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles + # author = significant contribution to code at some point + # maintainer = algorithm maintainer role, "owner" + # remove maintainer tag if maintained by sktime core team + } # in case of inheritance, concrete class should typically set tags # alternatively, descendants can set tags in __init__ (avoid this if possible) diff --git a/extension_templates/dist_kern_tab.py b/extension_templates/dist_kern_tab.py index 926449789fd..7c5cbc3e657 100644 --- a/extension_templates/dist_kern_tab.py +++ b/extension_templates/dist_kern_tab.py @@ -57,9 +57,14 @@ class MyTrafoPw(BasePairwiseTransformer): # todo: fill out transformer tags here # delete the tags that you *didn't* change - these defaults are inherited - # _tags = { - # currently there are no tags for pairwise transformers - # } + _tags = { + # specify one or multiple authors and maintainers, only for sktime contribution + "authors": ["author1", "author2"], # authors, GitHub handles + "maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles + # author = significant contribution to code at some point + # maintainer = algorithm maintainer role, "owner" + # remove maintainer tag if maintained by sktime core team + } # in case of inheritance, concrete class should typically set tags # alternatively, descendants can set tags in __init__ (avoid this if possible) diff --git a/extension_templates/forecasting.py b/extension_templates/forecasting.py index 525499c4b10..532210ac275 100644 --- a/extension_templates/forecasting.py +++ b/extension_templates/forecasting.py @@ -152,6 +152,27 @@ class MyForecaster(BaseForecaster): # only needs to be set if capability:pred_int is True # if False, exception raised if proba methods are called with in-sample fh # + # ---------------------------------------------------------------------------- + # packaging info - only required for sktime contribution or 3rd party packages + # ---------------------------------------------------------------------------- + # + # ownership and contribution tags + # ------------------------------- + # + # author = author(s) of th estimator + # an author is anyone with significant contribution to the code at some point + "authors": ["author1", "author2"], + # valid values: str or list of str, should be GitHub handles + # this should follow best scientific contribution practices + # scope is the code, not the methodology (method is per paper citation) + # + # maintainer = current maintainer(s) of the estimator + # per algorithm maintainer role, see governance document + # this is an "owner" type role, with rights and maintenance duties + "maintainers": ["maintainer1", "maintainer2"], + # valid values: str or list of str, should be GitHub handles + # remove tag if maintained by sktime core team + # # dependency tags: python version and soft dependencies # ----------------------------------------------------- # @@ -161,8 +182,8 @@ class MyForecaster(BaseForecaster): # raises exception at construction if local python version is incompatible # # soft dependency requirement - "python_dependencies": None - # valid values: str or list of str + "python_dependencies": None, + # valid values: str or list of str, PEP 440 valid package version specifiers # raises exception at construction if modules at strings cannot be imported } # in case of inheritance, concrete class should typically set tags diff --git a/extension_templates/forecasting_simple.py b/extension_templates/forecasting_simple.py index 54ecbd9ea78..7fe6210ca34 100644 --- a/extension_templates/forecasting_simple.py +++ b/extension_templates/forecasting_simple.py @@ -107,6 +107,23 @@ class MyForecaster(BaseForecaster): "requires-fh-in-fit": True, # valid values: boolean True (yes), False (no) # if True, raises exception in fit if fh has not been passed + # + # ownership and contribution tags + # ------------------------------- + # + # author = author(s) of th estimator + # an author is anyone with significant contribution to the code at some point + "authors": ["author1", "author2"], + # valid values: str or list of str, should be GitHub handles + # this should follow best scientific contribution practices + # scope is the code, not the methodology (method is per paper citation) + # + # maintainer = current maintainer(s) of the estimator + # per algorithm maintainer role, see governance document + # this is an "owner" type role, with rights and maintenance duties + "maintainers": ["maintainer1", "maintainer2"], + # valid values: str or list of str, should be GitHub handles + # remove tag if maintained by sktime core team } # todo: add any hyper-parameters and components to constructor diff --git a/extension_templates/forecasting_supersimple.py b/extension_templates/forecasting_supersimple.py index 83c6c2ad736..d944cc30595 100644 --- a/extension_templates/forecasting_supersimple.py +++ b/extension_templates/forecasting_supersimple.py @@ -72,6 +72,13 @@ class MyForecaster(BaseForecaster): # "univariate": inner _fit, _predict, receives only single-column DataFrame # "both": inner _predict gets pd.DataFrame series with any number of columns # + # specify one or multiple authors and maintainers, only for sktime contribution + "authors": ["author1", "author2"], # authors, GitHub handles + "maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles + # author = significant contribution to code at some point + # maintainer = algorithm maintainer role, "owner" + # remove maintainer tag if maintained by sktime core team + # # do not change these: # (look at advanced templates if you think these should change) "y_inner_mtype": "pd.DataFrame", diff --git a/extension_templates/param_est.py b/extension_templates/param_est.py index a8dffbd923f..975a87e9cac 100644 --- a/extension_templates/param_est.py +++ b/extension_templates/param_est.py @@ -102,6 +102,26 @@ class MyTimeSeriesParamFitter(BaseParamFitter): # valid values: boolean True (yes), False (no) # if False, raises exception if X passed has more than one variable # + # ---------------------------------------------------------------------------- + # packaging info - only required for sktime contribution or 3rd party packages + # ---------------------------------------------------------------------------- + # + # ownership and contribution tags + # ------------------------------- + # + # author = author(s) of th estimator + # an author is anyone with significant contribution to the code at some point + "authors": ["author1", "author2"], + # valid values: str or list of str, should be GitHub handles + # this should follow best scientific contribution practices + # scope is the code, not the methodology (method is per paper citation) + # + # maintainer = current maintainer(s) of the estimator + # per algorithm maintainer role, see governance document + # this is an "owner" type role, with rights and maintenance duties + "maintainers": ["maintainer1", "maintainer2"], + # valid values: str or list of str, should be GitHub handles + # remove tag if maintained by sktime core team # # dependency tags: python version and soft dependencies # ----------------------------------------------------- @@ -112,8 +132,8 @@ class MyTimeSeriesParamFitter(BaseParamFitter): # raises exception at construction if local python version is incompatible # # soft dependency requirement - "python_dependencies": None - # valid values: str or list of str + "python_dependencies": None, + # valid values: str or list of str, PEP 440 valid package version specifiers # raises exception at construction if modules at strings cannot be imported } # in case of inheritance, concrete class should typically set tags diff --git a/extension_templates/split.py b/extension_templates/split.py new file mode 100644 index 00000000000..a3679c8062a --- /dev/null +++ b/extension_templates/split.py @@ -0,0 +1,290 @@ +"""Extension template for parameter estimators. + +Purpose of this implementation template: + quick implementation of new estimators following the template + NOT a concrete class to import! This is NOT a base class or concrete class! + This is to be used as a "fill-in" coding template. + +How to use this implementation template to implement a new estimator: +- make a copy of the template in a suitable location, give it a descriptive name. +- work through all the "todo" comments below +- fill in code for mandatory methods, and optionally for optional methods +- do not write to reserved variables: _tags, _tags_dynamic +- you can add more private methods, but do not override BaseEstimator's private methods + an easy way to be safe is to prefix your methods with "_custom" +- change docstrings for functions and the file +- ensure interface compatibility by sktime.utils.estimator_checks.check_estimator +- once complete: use as a local library, or contribute to sktime via PR +- more details: + https://www.sktime.net/en/stable/developer_guide/add_estimators.html + +Mandatory implements: + splitting (iloc reference) - _split(self, y) + +Optional implements: + splitting (loc reference) - _split_loc(self, y) + get number of splits - get_n_splits(self, y) + +Testing - required for sktime test framework and check_estimator usage: + get default parameters for test instance(s) - get_test_params(cls) + +copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +""" +# todo: write an informative docstring for the file or module, remove the above +# todo: add an appropriate copyright notice for your estimator +# estimators contributed to sktime should have the copyright notice at the top +# estimators of your own do not need to have permissive or BSD-3 copyright + +# todo: uncomment the following line, enter authors' GitHub IDs +# __author__ = [authorGitHubID, anotherAuthorGitHubID] + + +from sktime.split.base import BaseSplitter + +# todo: add any necessary imports here + +# todo: if any imports are sktime soft dependencies: +# make sure to fill in the "python_dependencies" tag with the package import name + + +class MySplitter(BaseSplitter): + """Custom splitter. todo: write docstring. + + todo: describe your custom splitter here + + Parameters + ---------- + parama : int + descriptive explanation of parama + paramb : string, optional (default='default') + descriptive explanation of paramb + paramc : boolean, optional (default= whether paramb is not the default) + descriptive explanation of paramc + and so on + est : sktime.estimator, BaseEstimator descendant + descriptive explanation of est + est2: another estimator + descriptive explanation of est2 + and so on + """ + + # todo: fill out estimator tags here + # tags are inherited from parent class if they are not set + _tags = { + # + # behavioural tags + # ---------------- + # + # internal support for hierarchical and panel data + "split_hierarchical": False, + # valid values: True, False + # if False, splitter broadcasts over instances for hierarchical data + # if True, internal _split must support pd.MultiIndex + # + # which of _split and _split_loc is called in split_series by default + "split_series_uses": "iloc", + # valid values: "iloc" or "loc" + # determines whether split_series under the hood + # calls split ("iloc") or split_loc ("loc"). Setting this can give + # performance advantages, e.g., if "loc" is faster to obtain. + # + # ---------------------------------------------------------------------------- + # packaging info - only required for sktime contribution or 3rd party packages + # ---------------------------------------------------------------------------- + # + # ownership and contribution tags + # ------------------------------- + # + # author = author(s) of th estimator + # an author is anyone with significant contribution to the code at some point + "authors": ["author1", "author2"], + # valid values: str or list of str, should be GitHub handles + # this should follow best scientific contribution practices + # scope is the code, not the methodology (method is per paper citation) + # + # maintainer = current maintainer(s) of the estimator + # per algorithm maintainer role, see governance document + # this is an "owner" type role, with rights and maintenance duties + "maintainers": ["maintainer1", "maintainer2"], + # valid values: str or list of str, should be GitHub handles + # remove tag if maintained by sktime core team + # + # dependency tags: python version and soft dependencies + # ----------------------------------------------------- + # + # python version requirement + "python_version": None, + # valid values: str, PEP 440 valid python version specifiers + # raises exception at construction if local python version is incompatible + # + # soft dependency requirement + "python_dependencies": None, + # valid values: str or list of str, PEP 440 valid package version specifiers + # raises exception at construction if modules at strings cannot be imported + } + + # todo: add any hyper-parameters and components to constructor + def __init__(self, est, parama, est2=None, paramb="default", paramc=None): + # estimators should precede parameters + # if estimators have default values, set None and initialize below + + # todo: write any hyper-parameters and components to self + self.est = est + self.parama = parama + self.paramb = paramb + self.paramc = paramc + + # leave this as is + super().__init__() + + # todo: optional, parameter checking logic (if applicable) should happen here + # if writes derived values to self, should *not* overwrite self.parama etc + # instead, write to self._parama, self._newparam (starting with _) + + # todo: default estimators should have None arg defaults + # and be initialized here + # do this only with default estimators, not with parameters + # if est2 is None: + # self.estimator = MyDefaultEstimator() + + # todo: if tags of estimator depend on component tags, set these here + # only needed if estimator is a composite + # tags set in the constructor apply to the object and override the class + # + # example 1: conditional setting of a tag + # if est.foo == 42: + # self.set_tags(handles-missing-data=True) + # example 2: cloning tags from component + # self.clone_tags(est2, ["enforce_index_type", "handles-missing-data"]) + + # todo: implement this, mandatory + def _split(self, y): + """Get iloc references to train/test splits of `y`. + + private _split containing the core logic, called from split + + Parameters + ---------- + y : pd.Index + Index of time series to split + + Yields + ------ + train : 1D np.ndarray of dtype int + Training window indices, iloc references to training indices in y + test : 1D np.ndarray of dtype int + Test window indices, iloc references to test indices in y + """ + # todo: implement the core logic of your splitter here + # ensure to avoid side effects to self or y + # + # example: + # for train, test in some_logic(y): + # yield train, test + + # todo: consider implementing this, optional + # if not implementing, delete this - default is as below and present in base class + def _split_loc(self, y): + """Get loc references to train/test splits of `y`. + + private _split containing the core logic, called from split_loc + + Default implements using split and y.index to look up the loc indices. + Can be overridden for faster implementation. + + Parameters + ---------- + y : pd.Index + index of time series to split + + Yields + ------ + train : pd.Index + Training window indices, loc references to training indices in y + test : pd.Index + Test window indices, loc references to test indices in y + """ + for train, test in self.split(y): + # default gets loc index from iloc index + yield y[train], y[test] + + # todo: consider implementing this, optional + # only implement if the result does not depend on y + # if not implementing, delete this - default is as below and present in base class + def get_n_splits(self, y) -> int: + """Return the number of splits. + + Parameters + ---------- + y : pd.Series or pd.Index, optional (default=None) + Time series to split + + Returns + ------- + n_splits : int + The number of splits. + """ + return len(list(self.split(y))) + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the splitter. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + + # todo: set the testing parameters for the object + # Testing parameters can be dictionary or list of dictionaries + # Testing parameter choice should cover internal cases well. + # + # this method can, if required, use: + # class properties (e.g., inherited); parent class test case + # imported objects such as estimators from sktime or sklearn + # important: all such imports should be *inside get_test_params*, not at the top + # since imports are used only at testing time + # + # The parameter_set argument is not used for automated, module level tests. + # It can be used in custom, estimator specific tests, for "special" settings. + # A parameter dictionary must be returned *for all values* of parameter_set, + # i.e., "parameter_set not available" errors should never be raised. + # + # A good parameter set should primarily satisfy two criteria, + # 1. Chosen set of parameters should have a low testing time, + # ideally in the magnitude of few seconds for the entire test suite. + # This is vital for the cases where default values result in + # "big" models which not only increases test time but also + # run into the risk of test workers crashing. + # 2. There should be a minimum two such parameter sets with different + # sets of values to ensure a wide range of code coverage is provided. + # + # example 1: specify params as dictionary + # any number of params can be specified + # params = {"est": value0, "parama": value1, "paramb": value2} + # + # example 2: specify params as list of dictionary + # note: Only first dictionary will be used by create_test_instance + # params = [{"est": value1, "parama": value2}, + # {"est": value3, "parama": value4}] + # return params + # + # example 3: parameter set depending on param_set value + # note: only needed if a separate parameter set is needed in tests + # if parameter_set == "special_param_set": + # params = {"est": value1, "parama": value2} + # return params + # + # # "default" params - always returned except for "special_param_set" value + # params = {"est": value3, "parama": value4} + # return params diff --git a/extension_templates/transformer.py b/extension_templates/transformer.py index f330db6dee8..5fccc728890 100644 --- a/extension_templates/transformer.py +++ b/extension_templates/transformer.py @@ -239,6 +239,26 @@ class MyTransformer(BaseTransformer): # valid values: boolean True (yes), False (no) # used for search index and validity checking, does not raise direct exception # + # ---------------------------------------------------------------------------- + # packaging info - only required for sktime contribution or 3rd party packages + # ---------------------------------------------------------------------------- + # + # ownership and contribution tags + # ------------------------------- + # + # author = author(s) of th estimator + # an author is anyone with significant contribution to the code at some point + "authors": ["author1", "author2"], + # valid values: str or list of str, should be GitHub handles + # this should follow best scientific contribution practices + # scope is the code, not the methodology (method is per paper citation) + # + # maintainer = current maintainer(s) of the estimator + # per algorithm maintainer role, see governance document + # this is an "owner" type role, with rights and maintenance duties + "maintainers": ["maintainer1", "maintainer2"], + # valid values: str or list of str, should be GitHub handles + # remove tag if maintained by sktime core team # # dependency tags: python version and soft dependencies # ----------------------------------------------------- @@ -249,8 +269,8 @@ class MyTransformer(BaseTransformer): # raises exception at construction if local python version is incompatible # # soft dependency requirement - "python_dependencies": None - # valid values: str or list of str + "python_dependencies": None, + # valid values: str or list of str, PEP 440 valid package version specifiers # raises exception at construction if modules at strings cannot be imported } # in case of inheritance, concrete class should typically set tags diff --git a/extension_templates/transformer_simple.py b/extension_templates/transformer_simple.py index b1e37755320..e5b29d76dae 100644 --- a/extension_templates/transformer_simple.py +++ b/extension_templates/transformer_simple.py @@ -172,6 +172,23 @@ class MyTransformer(BaseTransformer): "handles-missing-data": False, # can estimator handle missing data? # valid values: boolean True (yes), False (no) # if False, may raise exception when passed time series with missing values + # + # ownership and contribution tags + # ------------------------------- + # + # author = author(s) of th estimator + # an author is anyone with significant contribution to the code at some point + "authors": ["author1", "author2"], + # valid values: str or list of str, should be GitHub handles + # this should follow best scientific contribution practices + # scope is the code, not the methodology (method is per paper citation) + # + # maintainer = current maintainer(s) of the estimator + # per algorithm maintainer role, see governance document + # this is an "owner" type role, with rights and maintenance duties + "maintainers": ["maintainer1", "maintainer2"], + # valid values: str or list of str, should be GitHub handles + # remove tag if maintained by sktime core team } # todo: add any hyper-parameters and components to constructor diff --git a/extension_templates/transformer_supersimple.py b/extension_templates/transformer_supersimple.py index 405f8beabb9..a321336506f 100644 --- a/extension_templates/transformer_supersimple.py +++ b/extension_templates/transformer_supersimple.py @@ -75,6 +75,13 @@ class MyTransformer(BaseTransformer): # True: inner _fit, _predict, receives only single-column DataFrame # False: inner _predict gets pd.DataFrame series with any number of columns # + # specify one or multiple authors and maintainers, only for sktime contribution + "authors": ["author1", "author2"], # authors, GitHub handles + "maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles + # author = significant contribution to code at some point + # maintainer = algorithm maintainer role, "owner" + # remove maintainer tag if maintained by sktime core team + # # do not change these: # (look at advanced templates if you think these should change) "scitype:transform-input": "Series", diff --git a/pyproject.toml b/pyproject.toml index c0a5dbd9124..c1b5c1683b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sktime" -version = "0.24.0" +version = "0.26.0" description = "A unified framework for machine learning with time series" readme = "README.md" keywords = [ @@ -56,8 +56,8 @@ dependencies = [ "numpy<1.27,>=1.21", # required for framework layer and base class logic "packaging", # for estimator specific dependency parsing "pandas<2.2.0,>=1.1", # pandas is the main in-memory data container - "scikit-base<0.7.0", # base module for sklearn compatible base API - "scikit-learn<1.4.0,>=0.24", # required for estimators and framework layer + "scikit-base<0.8.0", # base module for sklearn compatible base API + "scikit-learn>=0.24,<1.5.0", # required for estimators and framework layer "scipy<2.0.0,>=1.2", # required for estimators and framework layer ] @@ -77,10 +77,10 @@ dependencies = [ # or "pip install sktime[all_extras_pandas2]", to install only pandas 2 compatible deps # all_extras = [ - "arch<6.3.0,>=5.6", + "arch>=5.6,<6.4.0", "cloudpickle", "dash!=2.9.0", - "dask", + "dask<2024.1.1", "dtw-python", 'esig==0.9.7; python_version < "3.10"', 'filterpy>=1.4.5; python_version < "3.11"', @@ -107,16 +107,16 @@ all_extras = [ "statsmodels>=0.12.1", 'stumpy>=1.5.1; python_version < "3.11"', 'tbats>=1.1; python_version < "3.12"', - 'tensorflow; python_version < "3.11"', + 'tensorflow; python_version < "3.12"', 'tsfresh>=0.17; python_version < "3.12"', - 'tslearn<0.6.0,>=0.5.2; python_version < "3.11"', + 'tslearn<0.7.0,!=0.6.0,>=0.5.2; python_version < "3.11"', "xarray", ] all_extras_pandas2 = [ - "arch<6.3.0,>=5.6", + "arch>=5.6,<6.4.0", "cloudpickle", "dash!=2.9.0", - "dask<2023.7.1", + "dask<2024.1.1", "dtw-python", 'esig==0.9.7; python_version < "3.10"', 'filterpy>=1.4.5; python_version < "3.11"', @@ -142,14 +142,14 @@ all_extras_pandas2 = [ "statsmodels>=0.12.1", 'stumpy>=1.5.1; python_version < "3.11"', 'tbats>=1.1; python_version < "3.12"', - 'tensorflow; python_version < "3.11"', + 'tensorflow; python_version < "3.12"', 'tsfresh>=0.17; python_version < "3.12"', - 'tslearn<0.6.0,>=0.5.2; python_version < "3.11"', + 'tslearn<0.7.0,!=0.6.0,>=0.5.2; python_version < "3.11"', "xarray", ] # single-task dependencies, e.g., forecasting, classification, etc. -# manually curated and intentionally smaller to avoid dependeny conflicts +# manually curated and intentionally smaller to avoid dependency conflicts # names are identical with the names of the modules and estimator type strings # dependency sets are selected to cover the most popular estimators in each module # (this is a subjective choice, and may change over time as the ecosystem evolves, @@ -174,10 +174,10 @@ classification = [ ] clustering = [ 'numba<0.59,>=0.53; python_version < "3.12"', - 'tslearn<0.6.3,>=0.5.2; python_version < "3.12"', + 'tslearn<0.7.0,!=0.6.0,>=0.5.2; python_version < "3.12"', ] forecasting = [ - "arch<6.3,>=5.6", + "arch>=5.6,<6.4", 'pmdarima!=1.8.1,<2.1,>=1.8; python_version < "3.12"', "prophet<1.2,>=1.1", "skpro<2.2,>=2", @@ -200,10 +200,10 @@ regression = [ transformations = [ 'esig<0.10,>=0.9.7; python_version < "3.11"', "filterpy<1.5,>=1.4.5", - "holidays<0.35,>=0.29", - "mne<1.6,>=1.5", + "holidays>=0.29,<0.43", + "mne>=1.5,<1.7", 'numba<0.59,>=0.53; python_version < "3.12"', - "pycatch22<0.4.4,>=0.4", + "pycatch22>=0.4,<0.4.4", "pykalman-bardo<0.10,>=0.9.7", "statsmodels<0.15,>=0.12.1", 'stumpy<1.13,>=1.5.1; python_version < "3.12"', @@ -236,16 +236,16 @@ docs = [ "Sphinx!=7.2.0,<8.0.0", "sphinx-copybutton", "sphinx-design<0.6.0", - "sphinx-gallery<0.15.0", - "sphinx-issues<4.0.0", + "sphinx-gallery<0.16.0", + "sphinx-issues<5.0.0", "tabulate", ] tests = [ - "pytest<7.5,>=7.4", + "pytest>=7.4,<8.1", "pytest-cov<4.2,>=4.1", "pytest-randomly<3.16,>=3.15", "pytest-timeout>=2.1,<2.3", - "pytest-xdist<3.4,>=3.3", + "pytest-xdist>=3.3,<3.6", ] # CI related soft dependency sets - not for users of sktime, only for developers @@ -260,7 +260,8 @@ cython_extras = [ "numba<0.59", ] dl = [ - "tensorflow", + 'tensorflow<=2.14,>=2; python_version < "3.12"', + 'torch; python_version < "3.12"', ] mlflow = [ "mlflow", diff --git a/sktime/__init__.py b/sktime/__init__.py index c30032c0c59..0e1131d015a 100644 --- a/sktime/__init__.py +++ b/sktime/__init__.py @@ -1,6 +1,6 @@ """sktime.""" -__version__ = "0.24.0" +__version__ = "0.26.0" __all__ = ["show_versions"] diff --git a/sktime/_contrib/notebooks/windows_installation.ipynb b/sktime/_contrib/notebooks/windows_installation.ipynb index fc31c29689d..53f743b917c 100644 --- a/sktime/_contrib/notebooks/windows_installation.ipynb +++ b/sktime/_contrib/notebooks/windows_installation.ipynb @@ -340,7 +340,7 @@ "source": [ "# Section 4: Configuring `pre-commit` with PyCharm\n", "\n", - "`pre-commit` is a very useful package for checking your code for simple stye errors at the commit stage. This is very useful when working on large collaborative projects as it allows code reviewers to focus on the function of new code rather than conformity to style. For example, consider the following code: \n", + "`pre-commit` is a very useful package for checking your code for simple style errors at the commit stage. This is very useful when working on large collaborative projects as it allows code reviewers to focus on the function of new code rather than conformity to style. For example, consider the following code: \n", "\n", "![21_pre-commit_examples.png](img/windows_installation/21_pre-commit_examples.png)\n", "\n", diff --git a/sktime/alignment/dtw_numba.py b/sktime/alignment/dtw_numba.py index b466cec72e0..bf806b6538b 100644 --- a/sktime/alignment/dtw_numba.py +++ b/sktime/alignment/dtw_numba.py @@ -117,11 +117,16 @@ class AlignerDtwNumba(BaseAligner): """ _tags = { + # packaging info + # -------------- + "authors": ["chrisholder", "TonyBagnall", "fkiraly"], + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multiple-alignment": False, # can align more than two sequences? "capability:distance": True, # does compute/return overall distance? "capability:distance-matrix": True, # does compute/return distance matrix? "X_inner_mtype": "numpy3D", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/alignment/dtw_python.py b/sktime/alignment/dtw_python.py index 7334c2b73de..fbd211f858d 100644 --- a/sktime/alignment/dtw_python.py +++ b/sktime/alignment/dtw_python.py @@ -47,12 +47,17 @@ class AlignerDTW(BaseAligner): """ _tags = { + # packaging info + # -------------- + "authors": ["fkiraly"], + "python_dependencies": "dtw-python", + "python_dependencies_alias": {"dtw-python": "dtw"}, + # estimator type + # -------------- "capability:multiple-alignment": False, # can align more than two sequences? "capability:distance": True, # does compute/return overall distance? "capability:distance-matrix": True, # does compute/return distance matrix? "alignment_type": "partial", - "python_dependencies": "dtw-python", - "python_dependencies_alias": {"dtw-python": "dtw"}, } def __init__( diff --git a/sktime/alignment/edit_numba.py b/sktime/alignment/edit_numba.py index 5af0c3a39f2..6b012872e87 100644 --- a/sktime/alignment/edit_numba.py +++ b/sktime/alignment/edit_numba.py @@ -112,13 +112,18 @@ class AlignerEditNumba(BaseAligner): """ _tags = { + # packaging info + # -------------- + "authors": ["chrisholder", "TonyBagnall", "fkiraly"], + "python_dependencies": "numba", + # estimator type + # -------------- "symmetric": True, # all the distances are symmetric "capability:multiple-alignment": False, # can align more than two sequences? "capability:distance": True, # does compute/return overall distance? "capability:distance-matrix": True, # does compute/return distance matrix? "alignment_type": "partial", "X_inner_mtype": "numpy3D", - "python_dependencies": "numba", } ALLOWED_DISTANCE_STR = ["lcss", "edr", "erp", "twe"] diff --git a/sktime/alignment/lucky.py b/sktime/alignment/lucky.py index 5fb25afcd14..ddba6d70704 100644 --- a/sktime/alignment/lucky.py +++ b/sktime/alignment/lucky.py @@ -30,6 +30,11 @@ class AlignerLuckyDtw(BaseAligner): """ _tags = { + # packaging info + # -------------- + "authors": ["fkiraly", "Krisztian A Buza"], + # estimator type + # -------------- "capability:multiple-alignment": False, # can align more than two sequences? "capability:distance": True, # does compute/return overall distance? "capability:distance-matrix": True, # does compute/return distance matrix? diff --git a/sktime/alignment/naive.py b/sktime/alignment/naive.py index d78f5f6caca..9a04f59a7f1 100644 --- a/sktime/alignment/naive.py +++ b/sktime/alignment/naive.py @@ -25,6 +25,11 @@ class AlignerNaive(BaseAligner): """ _tags = { + # packaging info + # -------------- + "authors": ["fkiraly"], + # estimator type + # -------------- "capability:multiple-alignment": True, # can align more than two sequences? } diff --git a/sktime/alignment/utils/utils_align.py b/sktime/alignment/utils/utils_align.py index e88848830fa..a0db08d78ec 100644 --- a/sktime/alignment/utils/utils_align.py +++ b/sktime/alignment/utils/utils_align.py @@ -67,7 +67,13 @@ def convert_align_to_align_loc(align, X, align_name="align", df_name="X", copy=T """ from sktime.datatypes import check_is_mtype - check_is_mtype(align, "alignment", scitype="Alignment", var_name=align_name) + check_is_mtype( + align, + "alignment", + scitype="Alignment", + var_name=align_name, + msg_return_dict="list", + ) if not isinstance(X, list): raise ValueError(f"{df_name} must be a list of pandas.DataFrame") diff --git a/sktime/annotation/clasp.py b/sktime/annotation/clasp.py index c49306062a6..0e07ab0677f 100644 --- a/sktime/annotation/clasp.py +++ b/sktime/annotation/clasp.py @@ -191,7 +191,7 @@ class ClaSPSegmentation(BaseSeriesAnnotator): fmt : str {"dense", "sparse"}, optional (default="sparse") Annotation output format: * If "sparse", a pd.Series of the found Change Points is returned - * If "dense", a pd.IndexSeries with the Segmenation of X is returned + * If "dense", a pd.IndexSeries with the Segmentation of X is returned exclusion_radius : int Exclusion Radius for change points to be non-trivial matches diff --git a/sktime/annotation/ggs.py b/sktime/annotation/ggs.py index 29ec206a8c8..2b8262061a2 100644 --- a/sktime/annotation/ggs.py +++ b/sktime/annotation/ggs.py @@ -34,14 +34,15 @@ import logging import math -from dataclasses import asdict, dataclass, field -from typing import Dict, List, Tuple +from dataclasses import dataclass, field +from typing import List, Tuple import numpy as np import numpy.typing as npt +import pandas as pd from sklearn.utils.validation import check_random_state -from sktime.base import BaseEstimator +from sktime.annotation.base._base import BaseSeriesAnnotator from sktime.utils.validation._dependencies import _check_estimator_deps logger = logging.getLogger(__name__) @@ -367,7 +368,7 @@ def find_change_points(self, data: npt.ArrayLike) -> List[int]: return change_points -class GreedyGaussianSegmentation(BaseEstimator): +class GreedyGaussianSegmentation(BaseSeriesAnnotator): """Greedy Gaussian Segmentation Estimator. The method approximates solutions for the problem of breaking a @@ -408,10 +409,6 @@ class GreedyGaussianSegmentation(BaseEstimator): change_points_: array_like, default=[] Locations of change points as integer indexes. By convention change points include the identity segmentation, i.e. first and last index + 1 values. - _intermediate_change_points: List[List[int]], default=[] - Intermediate values of change points for each value of k = 1...k_max - _intermediate_ll: List[float], default=[] - Intermediate values for log-likelihood for each value of k = 1...k_max Notes ----- @@ -428,6 +425,8 @@ class GreedyGaussianSegmentation(BaseEstimator): https://doi.org/10.1007/s11634-018-0335-0 """ + _tags = {"fit_is_empty": True} + def __init__( self, k_max: int = 10, @@ -444,7 +443,7 @@ def __init__( self.random_state = random_state _check_estimator_deps(self) - super().__init__() + super().__init__(fmt="dense", labels="int_label") self._adaptee = GGS( k_max=k_max, @@ -454,33 +453,47 @@ def __init__( random_state=random_state, ) - def fit(self, X: npt.ArrayLike, y: npt.ArrayLike = None): - """Fit method for compatibility with sklearn-type estimator interface. + @property + def _intermediate_change_points(self) -> List[List[int]]: + """Intermediate values of change points for each value of k = 1...k_max. + + Default value is an empty list. + """ + return self._adaptee._intermediate_change_points - It sets the internal state of the estimator and returns the initialized - instance. + @property + def _intermediate_ll(self) -> List[float]: + """Intermediate values for log-likelihood for each value of k = 1...k_max. + + Default value is an empty list. + """ + return self._adaptee._intermediate_ll + + def _fit(self, X, Y=None): + """Fit method for compatibility with sklearn-type estimator interface. Parameters ---------- - X: array_like - 2D `array_like` representing time series with sequence index along - the first dimension and value series as columns. + X: array_like (1D or 2D), pd.Series, or pd.DataFrame + 1D array of timeseries values, or 2D array with index along the first + dimension and columns representing features of the timeseries. If pd.Series, + the values of the timeseries are the values of the series. If pd.DataFrame, + each column represents a feature of the timeseries. y: array_like Placeholder for compatibility with sklearn-api, not used, default=None. """ - self._adaptee.initialize_intermediates() return self - def predict(self, X: npt.ArrayLike, y: npt.ArrayLike = None) -> npt.ArrayLike: + def _predict(self, X) -> npt.ArrayLike: """Perform segmentation. Parameters ---------- - X: array_like - 2D `array_like` representing time series with sequence index along - the first dimension and value series as columns. - y: array_like - Placeholder for compatibility with sklearn-api, not used, default=None. + X: array_like (1D or 2D), pd.Series, or pd.DataFrame + 1D array of timeseries values, or 2D array with index along the first + dimension and columns representing features of the timeseries. If pd.Series, + the values of the timeseries are the values of the series. If pd.DataFrame, + each column represents a feature of the timeseries. Returns ------- @@ -489,6 +502,15 @@ def predict(self, X: npt.ArrayLike, y: npt.ArrayLike = None) -> npt.ArrayLike: dimension of X. The numerical values represent distinct segments labels for each of the data points. """ + if isinstance(X, pd.Series): + X = X.values[:, np.newaxis] + elif isinstance(X, pd.DataFrame): + X = X.values + elif len(X.shape) == 1: + X = X[:, np.newaxis] + elif len(X.shape) > 2: + raise ValueError("X must not have more than two dimensions.") + self._adaptee.initialize_intermediates() self.change_points_ = self._adaptee.find_change_points(X) labels = np.zeros(X.shape[0], dtype=np.int32) @@ -498,16 +520,16 @@ def predict(self, X: npt.ArrayLike, y: npt.ArrayLike = None) -> npt.ArrayLike: labels[start:stop] = i return labels - def fit_predict(self, X: npt.ArrayLike, y: npt.ArrayLike = None) -> npt.ArrayLike: + def fit_predict(self, X) -> npt.ArrayLike: """Perform segmentation. Parameters ---------- - X: array_like - 2D `array_like` representing time series with sequence index along - the first dimension and value series as columns. - y: array_like - Placeholder for compatibility with sklearn-api, not used, default=None. + X: array_like (1D or 2D), pd.Series, or pd.DataFrame + 1D array of timeseries values, or 2D array with index along the first + dimension and columns representing features of the timeseries. If pd.Series, + the values of the timeseries are the values of the series. If pd.DataFrame, + each column represents a feature of the timeseries. Returns ------- @@ -516,49 +538,21 @@ def fit_predict(self, X: npt.ArrayLike, y: npt.ArrayLike = None) -> npt.ArrayLik dimension of X. The numerical values represent distinct segments labels for each of the data points. """ - return self.fit(X, y).predict(X, y) + return self.fit(X, None).predict(X) - def get_params(self, deep: bool = True) -> Dict: - """Return initialization parameters. + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. Parameters ---------- - deep: bool - Dummy argument for compatibility with sklearn-api, not used. + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. Returns ------- - params: dict - Dictionary with the estimator's initialization parameters, with - keys being argument names and values being argument values. + params : dict or list of dict """ - attrs_to_ignore = [ - "change_points_", - "_intermediate_change_points", - "_intermediate_ll", - ] - params = asdict(self._adaptee) - params = { - key: value for key, value in params.items() if key not in attrs_to_ignore - } + params = {"k_max": 10, "lamb": 1.0} return params - - def set_params(self, **parameters): - """Set the parameters of this object. - - Parameters - ---------- - parameters : dict - Initialization parameters for th estimator. - - Returns - ------- - self : reference to self (after parameters have been set) - """ - for key, value in parameters.items(): - setattr(self._adaptee, key, value) - return self - - def __repr__(self) -> str: - """Return a string representation of the estimator.""" - return self._adaptee.__repr__() diff --git a/sktime/annotation/tests/test_plotting.py b/sktime/annotation/tests/test_plotting.py new file mode 100644 index 00000000000..6474229d392 --- /dev/null +++ b/sktime/annotation/tests/test_plotting.py @@ -0,0 +1,75 @@ +import numpy as np +import pandas as pd +import pytest + +from sktime.annotation.plotting.utils import ( + plot_time_series_with_change_points, + plot_time_series_with_profiles, +) +from sktime.utils.validation._dependencies import _check_soft_dependencies + + +@pytest.fixture +def time_series_data(): + ts_data = np.random.rand(100) + ts = pd.DataFrame({"Data": ts_data}) + true_cps = [4, 8] + font_size = 12 + ts_name = "Test Time Series" + profiles = np.array([np.random.rand(100) for _ in range(20)]) + found_cps = [35, 65] + score_name = "Custom Score" + return { + "ts_name": ts_name, + "ts": ts, + "true_cps": true_cps, + "font_size": font_size, + "profiles": profiles, + "found_cps": found_cps, + "score_name": score_name, + } + + +@pytest.mark.skipif( + not _check_soft_dependencies("matplotlib", "seaborn", severity="none"), + reason="Seaborn is required as a dependency for this plot.", +) +def test_plot_time_series_with_change_points(time_series_data): + import matplotlib.pyplot as plt + + # Access data from the fixture + ts_name = time_series_data["ts_name"] + ts = time_series_data["ts"] + true_cps = time_series_data["true_cps"] + font_size = time_series_data["font_size"] + + fig, ax = plot_time_series_with_change_points(ts_name, ts, true_cps, font_size) + + assert isinstance(fig, plt.Figure) + assert isinstance(ax, plt.Axes) + assert ax.get_title() == ts_name + + +@pytest.mark.skipif( + not _check_soft_dependencies("seaborn", "matplotlib", severity="none"), + reason="Seaborn is required as a dependency for this plot.", +) +def test_plot_time_series_with_profiles(time_series_data): + import matplotlib.pyplot as plt + + # Access data from the fixture + ts_name = time_series_data["ts_name"] + ts = time_series_data["ts"] + true_cps = time_series_data["true_cps"] + font_size = time_series_data["font_size"] + profiles = time_series_data["profiles"] + found_cps = time_series_data["found_cps"] + score_name = time_series_data["score_name"] + + fig, ax = plot_time_series_with_profiles( + ts_name, ts, profiles, true_cps, found_cps, score_name, font_size + ) + + assert isinstance(fig, plt.Figure) + assert isinstance(ax, np.ndarray) + assert ax[0].get_title() == ts_name diff --git a/sktime/base/__init__.py b/sktime/base/__init__.py index 4a30db2d045..6cf431c8361 100644 --- a/sktime/base/__init__.py +++ b/sktime/base/__init__.py @@ -6,10 +6,12 @@ __all__ = [ "BaseObject", "BaseEstimator", + "BasePanelMixin", "_HeterogenousMetaEstimator", "load", ] from sktime.base._base import BaseEstimator, BaseObject +from sktime.base._base_panel import BasePanelMixin from sktime.base._meta import _HeterogenousMetaEstimator from sktime.base._serialize import load diff --git a/sktime/base/_base.py b/sktime/base/_base.py index 6c89b5ed34e..bac9533a719 100644 --- a/sktime/base/_base.py +++ b/sktime/base/_base.py @@ -66,6 +66,11 @@ class name: BaseEstimator from sktime.exceptions import NotFittedError from sktime.utils.random_state import set_random_state +SERIALIZATION_FORMATS = { + "pickle", + "cloudpickle", +} + class BaseObject(_BaseObject): """Base class for parametric objects with tags in sktime. @@ -73,7 +78,15 @@ class BaseObject(_BaseObject): Extends skbase BaseObject with additional features. """ - _config = {"warnings": "on"} + _config = { + "warnings": "on", + "backend:parallel": None, # parallelization backend for broadcasting + # {None, "dask", "loky", "multiprocessing", "threading"} + # None: no parallelization + # "loky", "multiprocessing" and "threading": uses `joblib` Parallel loops + # "dask": uses `dask`, requires `dask` package in environment + "backend:parallel:params": None, # params for parallelization backend, + } _config_doc = { "display": """ @@ -86,8 +99,8 @@ class BaseObject(_BaseObject): "print_changed_only": """ print_changed_only : bool, default=True whether printing of self lists only self-parameters that differ - from defaults (False), or all parameter names and values (False) - does not nest, i.e., only affects self and not component estimators + from defaults (False), or all parameter names and values (False). + Does not nest, i.e., only affects self and not component estimators. """, "warnings": """ warnings : str, "on" (default), or "off" @@ -96,12 +109,38 @@ class BaseObject(_BaseObject): * "on" = will raise warnings from sktime * "off" = will not raise warnings from sktime """, + "backend:parallel": """ + backend:parallel : str, optional, default="None" + backend to use for parallelization when broadcasting/vectorizing, one of + + - "None": executes loop sequentally, simple list comprehension + - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` + - "dask": uses ``dask``, requires ``dask`` package in environment + """, + "backend:parallel:params": """ + backend:parallel:params : dict, optional, default={} (no parameters passed) + additional parameters passed to the parallelization backend as config. + Valid keys depend on the value of ``backend:parallel``: + + - "None": no additional parameters, ``backend_params`` is ignored + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., + ``n_jobs``, with the exception of ``backend`` which is directly + controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, + e.g., ``spark``. Any valid keys for ``joblib.Parallel`` + can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "dask": any valid keys for ``dask.compute`` can be passed, + e.g., ``scheduler`` + """, } - def __init__(self): - super().__init__() - self.__class__.set_config.__doc__ = self._get_set_config_doc() - def __eq__(self, other): """Equality dunder. Checks equal class and parameters. @@ -110,7 +149,7 @@ def __eq__(self, other): Nested BaseObject descendants from get_params are compared via __eq__ as well. """ - from sktime.utils._testing.deep_equals import deep_equals + from sktime.utils.deep_equals import deep_equals if not isinstance(other, BaseObject): return False @@ -157,7 +196,16 @@ def _get_set_config_doc(cls): doc += doc_end return doc - def save(self, path=None): + @classmethod + def _init_dynamic_doc(cls): + """Set docstring for set_config from self._config_doc.""" + try: # try/except to avoid unexpected failures + cls.set_config = deepcopy_func(cls.set_config) + cls.set_config.__doc__ = cls._get_set_config_doc() + except Exception: + pass + + def save(self, path=None, serialization_format="pickle"): """Save serialized self to bytes-like object or to (.zip) file. Behaviour: @@ -177,6 +225,12 @@ def save(self, path=None): path="/home/stored/estimator" then a zip file `estimator.zip` will be stored in `/home/stored/`. + serialization_format: str, default = "pickle" + Module to use for serialization. + The available options are "pickle" and "cloudpickle". + Note that non-default formats might require + installation of other soft dependencies. + Returns ------- if `path` is None - in-memory serialized self @@ -187,21 +241,44 @@ def save(self, path=None): from pathlib import Path from zipfile import ZipFile - if path is None: - return (type(self), pickle.dumps(self)) - if not isinstance(path, (str, Path)): + from sktime.utils.validation._dependencies import _check_soft_dependencies + + if serialization_format not in SERIALIZATION_FORMATS: + raise ValueError( + f"The provided `serialization_format`='{serialization_format}' " + "is not yet supported. The possible formats are: " + f"{SERIALIZATION_FORMATS}." + ) + + if path is not None and not isinstance(path, (str, Path)): raise TypeError( "`path` is expected to either be a string or a Path object " f"but found of type:{type(path)}." ) + if path is not None: + path = Path(path) if isinstance(path, str) else path + path.mkdir() + + if serialization_format == "cloudpickle": + _check_soft_dependencies("cloudpickle", severity="error") + import cloudpickle + + if path is None: + return (type(self), cloudpickle.dumps(self)) - path = Path(path) if isinstance(path, str) else path - path.mkdir() + with open(path / "_metadata", "wb") as file: + cloudpickle.dump(type(self), file) + with open(path / "_obj", "wb") as file: + cloudpickle.dump(self, file) - with open(path / "_metadata", "wb") as file: - pickle.dump(type(self), file) - with open(path / "_obj", "wb") as file: - pickle.dump(self, file) + elif serialization_format == "pickle": + if path is None: + return (type(self), pickle.dumps(self)) + + with open(path / "_metadata", "wb") as file: + pickle.dump(type(self), file) + with open(path / "_obj", "wb") as file: + pickle.dump(self, file) shutil.make_archive(base_name=path, format="zip", root_dir=path) shutil.rmtree(path) @@ -550,12 +627,12 @@ def _get_fitted_params_default(self, obj=None): # default retrieves all self attributes ending in "_" # and returns them with keys that have the "_" removed - fitted_params = [attr for attr in dir(obj) if attr.endswith("_")] - fitted_params = [x for x in fitted_params if not x.startswith("_")] - fitted_params = [x for x in fitted_params if hasattr(obj, x)] - fitted_param_dict = {p[:-1]: getattr(obj, p) for p in fitted_params} - - return fitted_param_dict + fitted_params = { + attr[:-1]: getattr(obj, attr) + for attr in dir(obj) + if attr.endswith("_") and not attr.startswith("_") and hasattr(obj, attr) + } + return fitted_params def _get_fitted_params(self): """Get fitted parameters. @@ -580,3 +657,20 @@ def _clone_estimator(base_estimator, random_state=None): set_random_state(estimator, random_state) return estimator + + +def deepcopy_func(f, name=None): + """Deepcopy of a function.""" + import types + + return types.FunctionType( + f.__code__, + f.__globals__, + name or f.__name__, + f.__defaults__, + f.__closure__, + ) + + +# initialize dynamic docstrings +BaseObject._init_dynamic_doc() diff --git a/sktime/base/_base_panel.py b/sktime/base/_base_panel.py new file mode 100644 index 00000000000..c6c9008a2c4 --- /dev/null +++ b/sktime/base/_base_panel.py @@ -0,0 +1,485 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Intermediate base class mixin with common functionality for panel tasks. + +Inherits from BaseEstimator, descendents are: + +BaseClassifier +BaseRegressor +""" + +__author__ = ["fkiraly"] +__all__ = ["BasePanelMixin"] + + +import numpy as np +import pandas as pd + +from sktime.base import BaseEstimator +from sktime.datatypes import ( + MTYPE_LIST_PANEL, + MTYPE_LIST_TABLE, + VectorizedDF, + check_is_error_msg, + check_is_scitype, + convert, +) +from sktime.utils.warnings import warn + + +class BasePanelMixin(BaseEstimator): + """Abstract base class for time series panel tasks, e.g., classifiers, regressors. + + The class contains boilerplate for checks and data conversions, + which are common to multiple descendants, most importantly, + BaseClassifier and BaseRegressor. + """ + + # convenience constant to control which metadata of input data + # are regularly retrieved in input checks + METADATA_REQ_IN_CHECKS = [ + "n_instances", + "has_nans", + "is_univariate", + "is_equal_length", + ] + + # attribute name where vectorized estimators are stored + VECTORIZATION_ATTR = "estimators_" # e.g., classifiers_, regressors_ + + # used in error messages + TASK = "panel data tasks" # e.g., classification, regression + EST_TYPE = "estimator" # e.g., classifier, regressor + EST_TYPE_PLURAL = "estimators" # e.g., classifiers, regressors + + def _vectorize(self, methodname, **kwargs): + """Vectorized/iterated loop over methods of BaseClassifier, BaseRegressor. + + Stores one estimator per loop index. + """ + # retrieve data arguments + X = kwargs.pop("X", None) + y = kwargs.pop("y", None) + + # add some common arguments to kwargs + kwargs["rowname_default"] = self.EST_TYPE_PLURAL + kwargs["colname_default"] = self.EST_TYPE_PLURAL + kwargs["backend"] = self.get_config()["backend:parallel"] + kwargs["backend_params"] = self.get_config()["backend:parallel:params"] + + if methodname == "fit": + self._yvec = y + + ests_ = self._yvec.vectorize_est(self, method="clone", **kwargs) + ests_fit = self._yvec.vectorize_est( + ests_, + method=methodname, + args={"y": y}, + X=X, + **kwargs, + ) + setattr(self, self.VECTORIZATION_ATTR, ests_fit) + return self + else: # methodname == "predict" or methodname == "predict_proba": + ests_ = getattr(self, self.VECTORIZATION_ATTR) + y_pred = self._yvec.vectorize_est( + ests_, + method=methodname, + X=X, + **kwargs, + ) + y_pred = pd.DataFrame( + {str(i): y_pred[col].values[0] for i, col in enumerate(y_pred.columns)} + ) + return y_pred + + def _fit_predict_boilerplate(self, X, y, cv, change_state, method): + """Boilerplate logic for fit_predict and fit_predict_proba.""" + from sklearn.model_selection import KFold + + if isinstance(cv, int): + random_state = getattr(self, "random_state", None) + cv = KFold(cv, random_state=random_state, shuffle=True) + + if change_state: + self.reset() + est = self + else: + est = self.clone() + + if cv is None: + return getattr(est.fit(X, y), method)(X) + elif change_state: + self.fit(X, y) + + # we now know that cv is an sklearn splitter + X, y = self._internal_convert(X, y) + X_metadata = self._check_input( + X, y, return_metadata=self.METADATA_REQ_IN_CHECKS + ) + X_mtype = X_metadata["mtype"] + # Check this estimator can handle characteristics + self._check_capabilities(X_metadata) + + # handle single class case + if len(self._class_dictionary) == 1: + return self._single_class_y_pred(X) + + # Convert data to format easily usable for applying cv + if isinstance(X, np.ndarray): + X = convert( + X, + from_type=X_mtype, + to_type="numpy3D", + as_scitype="Panel", + store_behaviour="freeze", + ) + else: + X = convert( + X, + from_type=X_mtype, + to_type="nested_univ", + as_scitype="Panel", + store_behaviour="freeze", + ) + + if method == "predict_proba": + y_pred = np.empty([len(y), len(np.unique(y))]) + else: + y_pred = np.empty_like(y) + y_pred[:] = -1 + if isinstance(X, np.ndarray): + for tr_idx, tt_idx in cv.split(X): + X_train = X[tr_idx] + X_test = X[tt_idx] + y_train = y[tr_idx] + fitted_est = self.clone().fit(X_train, y_train) + y_pred[tt_idx] = getattr(fitted_est, method)(X_test) + else: + for tr_idx, tt_idx in cv.split(X): + X_train = X.iloc[tr_idx] + X_test = X.iloc[tt_idx] + y_train = y[tr_idx] + fitted_est = self.clone().fit(X_train, y_train) + y_pred[tt_idx] = getattr(fitted_est, method)(X_test) + + return y_pred + + def _check_convert_X_for_predict(self, X): + """Input checks, capability checks, repeated in all predict/score methods. + + Parameters + ---------- + X : any object (to check/convert) + should be of a supported Panel mtype or 2D numpy.ndarray + + Returns + ------- + X: an object of a supported Panel mtype, numpy3D if X was a 2D numpy.ndarray + + Raises + ------ + ValueError if X is of invalid input data type, or there is not enough data + ValueError if the capabilities in self._tags do not handle the data. + """ + X = self._internal_convert(X) + X_metadata = self._check_input(X, return_metadata=self.METADATA_REQ_IN_CHECKS) + X_mtype = X_metadata["mtype"] + # Check that estimator can handle characteristics + self._check_capabilities(X_metadata) + # Convert data as dictated by the estimator tags + X = self._convert_X(X, X_mtype=X_mtype) + + return X + + def _check_capabilities(self, X_metadata): + """Check whether this estimator can handle the data characteristics. + + Parameters + ---------- + missing : boolean, does the data passed to fit contain missing values? + multivariate : boolean, does the data passed to fit contain missing values? + unequal : boolea, do the time series passed to fit have variable lengths? + + Raises + ------ + ValueError if the capabilities in self._tags do not handle the data. + """ + missing = X_metadata["has_nans"] + multivariate = not X_metadata["is_univariate"] + unequal = not X_metadata["is_equal_length"] + + allow_multivariate = self.get_tag("capability:multivariate") + allow_missing = self.get_tag("capability:missing_values") + allow_unequal = self.get_tag("capability:unequal_length") + + self_name = type(self).__name__ + + # identify problems, mismatch of capability and inputs + problems = [] + if missing and not allow_missing: + problems += ["missing values"] + if multivariate and not allow_multivariate: + problems += ["multivariate series"] + if unequal and not allow_unequal: + problems += ["unequal length series"] + + # construct error message + problems_and = " and ".join(problems) + problems_or = " or ".join(problems) + msg = ( + f"Data seen by {self_name} instance has {problems_and}, " + f"but this {self_name} instance cannot handle {problems_or}. " + f"Calls with {problems_or} may result in error or unreliable results." + ) + + # raise exception or warning with message + # if self is composite, raise a warning, since passing could be fine + # see discussion in PR 2366 why + if len(problems) > 0: + if self.is_composite(): + warn(msg, obj=self) + else: + raise ValueError(msg) + + def _convert_X(self, X, X_mtype): + """Convert equal length series from DataFrame to numpy array or vice versa. + + Parameters + ---------- + X : input data for the estimator, any Panel mtype + X_mtype : str, a Panel mtype string, e.g., "pd_multiindex", "numpy3D" + + Returns + ------- + X : input X converted to type in "X_inner_mtype" tag + usually a pd.DataFrame (nested) or 3D np.ndarray + Checked and possibly converted input data + """ + inner_type = self.get_tag("X_inner_mtype") + # convert pd.DataFrame + X = convert( + X, + from_type=X_mtype, + to_type=inner_type, + as_scitype="Panel", + ) + return X + + def _check_y(self, y=None, return_to_mtype=False): + """Check and coerce X/y for fit/transform functions. + + Parameters + ---------- + y : pd.DataFrame, pd.Series or np.ndarray + return_to_mtype : bool + whether to return the mtype of y output + + Returns + ------- + y_inner : object of sktime compatible time series type + can be Series, Panel, Hierarchical + y_metadata : dict + metadata of y, returned by check_is_scitype + y_mtype : str, only returned if return_to_mtype=True + mtype of y_inner, after convert + """ + if y is None: + if return_to_mtype: + return None, None, None + else: + return None, None + + capa_multioutput = self.get_tag("capability:multioutput") + y_inner_mtype = self.get_tag("y_inner_mtype") + + y_valid, y_msg, y_metadata = check_is_scitype( + y, "Table", return_metadata=["is_univariate"] + ) + + if not y_valid: + allowed_msg = ( + f"In {self.TASK}, y must be of a supported type, " + f"for instance 1D or 2D numpy arrays, pd.DataFrame, or pd.Series. " + f"Allowed compatible mtype format specifications are:" + f" {MTYPE_LIST_TABLE} ." + ) + check_is_error_msg( + y_msg, var_name="y", allowed_msg=allowed_msg, raise_exception=True + ) + + y_uni = y_metadata["is_univariate"] + y_mtype = y_metadata["mtype"] + + requires_vectorization = not capa_multioutput and not y_uni + + if requires_vectorization: + y_df = convert( + y, + from_type=y_mtype, + to_type="pd_DataFrame_Table", + as_scitype="Table", + store=self._converter_store_y, + ) + y_vec = VectorizedDF([y_df], iterate_cols=True) + if return_to_mtype: + return y_vec, y_metadata, "pd_DataFrame_Table" + else: + return y_vec, y_metadata + + y_inner, y_inner_mtype = convert( + y, + from_type=y_mtype, + to_type=y_inner_mtype, + as_scitype="Table", + store=self._converter_store_y, + return_to_mtype=True, + ) + + if return_to_mtype: + return y_inner, y_metadata, y_inner_mtype + else: + return y_inner, y_metadata + + def _convert_output_y(self, y): + """Convert output y to original format. + + Parameters + ---------- + y : np.ndarray or pd.DataFrame + output to convert + + Returns + ------- + y : np.ndarray or pd.DataFrame + """ + # for consistency with legacy behaviour: + # output is coerced to numpy1D in case of univariate output + if not self._y_metadata["is_univariate"]: + output_mtype = self._y_metadata["mtype"] + converter_store = self._converter_store_y + else: + output_mtype = "numpy1D" + converter_store = None + + # inner return mtype is what we convert from + # special treatment for 1D numpy array + # this can be returned in composites due to + # current downwards compatible choice "1D return is always numpy" + if isinstance(y, np.ndarray) and y.ndim == 1: + inner_return_mtype = "numpy1D" + else: + inner_return_mtype = self._y_inner_mtype + + y = convert( + y, + from_type=inner_return_mtype, + to_type=output_mtype, + as_scitype="Table", + store=converter_store, + store_behaviour="freeze", + ) + return y + + def _check_input(self, X, y=None, enforce_min_instances=1, return_metadata=True): + """Check whether input X and y are valid formats with minimum data. + + Raises a ValueError if the input is not valid. + + Parameters + ---------- + X : check whether conformant with any sktime Panel mtype specification + y : check whether a pd.Series or np.array + enforce_min_instances : int, optional (default=1) + check there are a minimum number of instances. + return_metadata : bool, str, or list of str + metadata fields to return with X_metadata, input to check_is_scitype + + Returns + ------- + metadata : dict with metadata for X returned by datatypes.check_is_scitype + + Raises + ------ + ValueError + If y or X is invalid input data type, or there is not enough data + """ + # Check X is valid input type and recover the data characteristics + X_valid, msg, X_metadata = check_is_scitype( + X, scitype="Panel", return_metadata=return_metadata + ) + # raise informative error message if X is in wrong format + allowed_msg = ( + f"Allowed scitypes for {self.EST_TYPE_PLURAL} are Panel mtypes, " + f"for instance a pandas.DataFrame with MultiIndex and last(-1) " + f"level an sktime compatible time index. " + f"Allowed compatible mtype format specifications are: {MTYPE_LIST_PANEL} ." + ) + if not X_valid: + check_is_error_msg( + msg, var_name="X", allowed_msg=allowed_msg, raise_exception=True + ) + + n_cases = X_metadata["n_instances"] + if n_cases < enforce_min_instances: + raise ValueError( + f"Minimum number of cases required is {enforce_min_instances} but X " + f"has : {n_cases}" + ) + + # Check y if passed + if y is not None: + # Check y valid input + if not isinstance(y, (pd.Series, pd.DataFrame, np.ndarray)): + raise ValueError( + "y must be a np.array or a pd.Series or pd.DataFrame, but found ", + f"type: {type(y)}", + ) + # Check matching number of labels + n_labels = y.shape[0] + if n_cases != n_labels: + raise ValueError( + f"Mismatch in number of cases. Number in X = {n_cases} nos in y = " + f"{n_labels}" + ) + if isinstance(y, np.ndarray): + if y.ndim > 2: + raise ValueError( + f"np.ndarray y must be 1-dimensional or 2-dimensional, " + f"but found {y.ndim} dimensions" + ) + # warn if only a single class label is seen + # this should not raise exception since this can occur by train subsampling + if len(np.unique(y)) == 1: + warn( + "only single label seen in y passed to " + f"fit of {self.EST_TYPE} {type(self).__name__}", + obj=self, + ) + + return X_metadata + + def _internal_convert(self, X, y=None): + """Convert X and y if necessary as a user convenience. + + Convert X to a 3D numpy array if already a 2D and convert y into an 1D numpy + array if passed as a Series. + + Parameters + ---------- + X : an object of a supported Panel mtype, or 2D numpy.ndarray + y : np.ndarray or pd.Series + + Returns + ------- + X: an object of a supported Panel mtype, numpy3D if X was a 2D numpy.ndarray + y: np.ndarray + """ + if isinstance(X, np.ndarray): + # Temporary fix to insist on 3D numpy. For univariate problems, + # most panel estimators simply convert back to 2D. This squeezing should be + # done here, but touches a lot of files, so will get this to work first. + if X.ndim == 2: + X = X.reshape(X.shape[0], 1, X.shape[1]) + if y is None: + return X + return X, y diff --git a/sktime/base/_meta.py b/sktime/base/_meta.py index 78901df761d..6e49c88e68a 100644 --- a/sktime/base/_meta.py +++ b/sktime/base/_meta.py @@ -97,29 +97,31 @@ def is_composite(self): def _get_params(self, attr, deep=True, fitted=False): if fitted: - method = "_get_fitted_params" - methodd = "get_fitted_params" + private_method = "_get_fitted_params" + public_method = "get_fitted_params" deepkw = {} else: - method = "get_params" - methodd = "get_params" + private_method = "get_params" + public_method = "get_params" deepkw = {"deep": deep} - out = getattr(super(), method)(**deepkw) + out = getattr(super(), private_method)(**deepkw) if deep and hasattr(self, attr): estimators = getattr(self, attr) estimators = [(x[0], x[1]) for x in estimators] out.update(estimators) for name, estimator in estimators: # checks estimator has the method we want to call - cond1 = hasattr(estimator, methodd) + cond1 = hasattr(estimator, public_method) # checks estimator is fitted if calling get_fitted_params is_fitted = hasattr(estimator, "is_fitted") and estimator.is_fitted # if we call get_params and not get_fitted_params, this is True cond2 = not fitted or is_fitted # check both conditions together if cond1 and cond2: - for key, value in getattr(estimator, methodd)(**deepkw).items(): + for key, value in getattr(estimator, public_method)( + **deepkw + ).items(): out[f"{name}__{key}"] = value return out diff --git a/sktime/base/_panel/__init__.py b/sktime/base/_panel/__init__.py new file mode 100644 index 00000000000..a4a06b1b7fa --- /dev/null +++ b/sktime/base/_panel/__init__.py @@ -0,0 +1,2 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Base classes for specific panel estimators, e.g., classifier and regressor.""" diff --git a/sktime/base/_panel/forest/__init__.py b/sktime/base/_panel/forest/__init__.py new file mode 100644 index 00000000000..73964dae484 --- /dev/null +++ b/sktime/base/_panel/forest/__init__.py @@ -0,0 +1,2 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Base classes for time series forests.""" diff --git a/sktime/series_as_features/base/estimators/_ensemble.py b/sktime/base/_panel/forest/_composable.py similarity index 99% rename from sktime/series_as_features/base/estimators/_ensemble.py rename to sktime/base/_panel/forest/_composable.py index 4ebce281df0..232b9270a7e 100644 --- a/sktime/series_as_features/base/estimators/_ensemble.py +++ b/sktime/base/_panel/forest/_composable.py @@ -22,7 +22,6 @@ from sklearn.exceptions import DataConversionWarning from sklearn.utils import check_array, check_random_state, compute_sample_weight -from sktime.transformations.panel.summarize import RandomIntervalFeatureExtractor from sktime.utils.random_state import set_random_state from sktime.utils.warnings import warn @@ -292,6 +291,9 @@ def feature_importances_(self, normalise_time_points=False): # assumes particular structure of clf, # with each tree consisting of a particular pipeline, # as in modular tsf + from sktime.transformations.panel.summarize import ( + RandomIntervalFeatureExtractor, + ) if not isinstance( self.estimators_[0].steps[0][1], RandomIntervalFeatureExtractor diff --git a/sktime/series_as_features/base/estimators/interval_based/_tsf.py b/sktime/base/_panel/forest/_tsf.py similarity index 53% rename from sktime/series_as_features/base/estimators/interval_based/_tsf.py rename to sktime/base/_panel/forest/_tsf.py index c7892df8bcc..cb2ed3c59d2 100644 --- a/sktime/series_as_features/base/estimators/interval_based/_tsf.py +++ b/sktime/base/_panel/forest/_tsf.py @@ -1,4 +1,4 @@ -"""Time Series Forest (TSF) Classifier.""" +"""Time Series Forest (TSF) classifier base class.""" __author__ = [ "TonyBagnall", @@ -16,13 +16,14 @@ ] import math +from typing import Optional import numpy as np from joblib import Parallel, delayed +from numpy.random import RandomState from sklearn.utils.multiclass import class_distribution from sklearn.utils.validation import check_random_state -from sktime.base._base import _clone_estimator from sktime.utils.slope_and_trend import _slope from sktime.utils.validation import check_n_jobs @@ -35,6 +36,7 @@ def __init__( min_interval=3, n_estimators=200, n_jobs=1, + inner_series_length: Optional[int] = None, random_state=None, ): super().__init__( @@ -46,6 +48,7 @@ def __init__( self.n_estimators = n_estimators self.min_interval = min_interval self.n_jobs = n_jobs + self.inner_series_length = inner_series_length # The following set in method fit self.n_classes = 0 self.series_length = 0 @@ -88,6 +91,8 @@ def _fit(self, X, y): self : object An fitted instance of the classifier """ + from sktime.base._base import _clone_estimator + X = X.squeeze(1) n_instances, self.series_length = X.shape @@ -105,7 +110,13 @@ def _fit(self, X, y): self.min_interval = self.series_length self.intervals_ = [ - _get_intervals(self.n_intervals, self.min_interval, self.series_length, rng) + _get_intervals( + self.n_intervals, + self.min_interval, + self.series_length, + rng, + self.inner_series_length, + ) for _ in range(self.n_estimators) ] @@ -127,6 +138,102 @@ def _get_fitted_params(self): } +def _get_intervals( + n_intervals: int, + min_interval: int, + series_length: int, + rng: RandomState, + inner_series_length: Optional[int] = None, +) -> np.ndarray: + """Generate random intervals for given parameters. + + Parameters + ---------- + n_intervals : int + Number of intervals to generate. + min_interval : int + Minimum length of an interval. + series_length : int + Length of the series. + rng : RandomState + Random number generator. + inner_series_length : int, optional (default=None) + Length of the inner series, define the maximum of an interval + and forces intervals to be contained in disjoint segments of + length inner_series_length. If None, defaults to series_length. + + Returns + ------- + intervals_starts_and_end_matrix : np.ndarray + Matrix of shape (n_intervals, 2) where each row represents an + interval and contains its start and end. + """ + interval_max_length = ( + series_length if inner_series_length is None else inner_series_length + ) + capped_min_interval = ( + interval_max_length if min_interval >= interval_max_length else min_interval + ) + number_of_inner_intervals = series_length // interval_max_length + intervals_starts_and_end_matrix = np.zeros((n_intervals, 2), dtype=int) + for interval_index in range(n_intervals): + inner_intervals_step = rng.randint(number_of_inner_intervals) + current_interval_start = ( + inner_intervals_step * interval_max_length + + rng.randint(max(1, interval_max_length - capped_min_interval)) + ) + current_interval_length = compute_interval_length( + capped_min_interval, + current_interval_start, + inner_intervals_step, + interval_max_length, + rng, + ) + current_interval_end = current_interval_start + current_interval_length + intervals_starts_and_end_matrix[interval_index, :] = [ + current_interval_start, + current_interval_end, + ] + return intervals_starts_and_end_matrix + + +def compute_interval_length( + capped_min_interval: int, + current_interval_start: int, + inner_intervals_step: int, + interval_max_length: int, + rng: RandomState, +) -> int: + if ( + capped_min_interval + < interval_max_length * (inner_intervals_step + 1) - current_interval_start + ): + current_interval_length = max( + capped_min_interval, + rng.randint( + interval_max_length * (inner_intervals_step + 1) + - current_interval_start + - 1, + ), + ) + elif ( + capped_min_interval + == interval_max_length * (inner_intervals_step + 1) - current_interval_start + ): + current_interval_length = capped_min_interval + else: + highest_possible_interval_length = ( + interval_max_length * (inner_intervals_step + 1) - current_interval_start + ) + raise ValueError( + f"low({capped_min_interval}) > " + f"high({highest_possible_interval_length}): " + f"Decrease capped_min_interval({capped_min_interval}) " + f"or increase interval_max_length({interval_max_length})" + ) + return current_interval_length + + def _transform(X, intervals): """Transform X for given intervals. @@ -159,18 +266,6 @@ def _transform(X, intervals): return transformed_x.T -def _get_intervals(n_intervals, min_interval, series_length, rng): - """Generate random intervals for given parameters.""" - intervals = np.zeros((n_intervals, 2), dtype=int) - for j in range(n_intervals): - intervals[j][0] = rng.randint(series_length - min_interval) - length = rng.randint(series_length - intervals[j][0] - 1) - if length < min_interval: - length = min_interval - intervals[j][1] = intervals[j][0] + length - return intervals - - def _fit_estimator(estimator, X, y, intervals): """Fit an estimator on input data (X, y).""" transformed_x = _transform(X, intervals) diff --git a/sktime/base/_panel/forest/tests/__init__.py b/sktime/base/_panel/forest/tests/__init__.py new file mode 100644 index 00000000000..cd2e2f3c149 --- /dev/null +++ b/sktime/base/_panel/forest/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for time series forests.""" diff --git a/sktime/series_as_features/base/estimators/tests/test_feature_importances_.py b/sktime/base/_panel/forest/tests/test_feature_importances_.py similarity index 98% rename from sktime/series_as_features/base/estimators/tests/test_feature_importances_.py rename to sktime/base/_panel/forest/tests/test_feature_importances_.py index 8ec6c26f094..2983c6249c9 100644 --- a/sktime/series_as_features/base/estimators/tests/test_feature_importances_.py +++ b/sktime/base/_panel/forest/tests/test_feature_importances_.py @@ -14,8 +14,6 @@ from sktime.transformations.series.adapt import TabularToSeriesAdaptor from sktime.utils._testing.panel import make_classification_problem -X_train, y_train = make_classification_problem() - @pytest.mark.xfail(reason="array dimension mismatch since 1.2.0, see #3930") def test_feature_importances_single_feature_interval_and_estimator(): @@ -41,6 +39,8 @@ def test_feature_importances_single_feature_interval_and_estimator(): clf1 = ComposableTimeSeriesForestClassifier( estimator=base_estimator, random_state=random_state, n_estimators=1 ) + + X_train, y_train = make_classification_problem() clf1.fit(X_train, y_train) # Extract the interval and the estimator, and compute using pipelines @@ -100,6 +100,8 @@ def test_feature_importances_multi_intervals_estimators(n_intervals, n_estimator clf1 = ComposableTimeSeriesForestClassifier( estimator=base_estimator, random_state=random_state, n_estimators=n_estimators ) + + X_train, y_train = make_classification_problem() clf1.fit(X_train, y_train) fi_expected = np.zeros([n_estimators, n_intervals * n_features]) diff --git a/sktime/base/_panel/forest/tests/test_tsf.py b/sktime/base/_panel/forest/tests/test_tsf.py new file mode 100644 index 00000000000..6620120c42d --- /dev/null +++ b/sktime/base/_panel/forest/tests/test_tsf.py @@ -0,0 +1,199 @@ +"""Tests for get intervals in time series forests.""" +from typing import Optional + +import numpy as np +import pytest +from numpy.random import RandomState + +from sktime.base._panel.forest._tsf import _get_intervals + + +@pytest.mark.parametrize( + "number_of_intervals, min_interval, " + "number_of_series, inner_series_length, expected_intervals", + [(4, 3, 4, 6, np.array([[12, 17], [0, 3], [3, 10], [9, 12]]))], +) +def test_get_intervals( + number_of_intervals: int, + min_interval: int, + number_of_series: int, + inner_series_length: int, + expected_intervals: np.ndarray, +): + """Test get intervals.""" + # given + given_n_intervals = number_of_intervals + given_min_interval = min_interval + given_series_length = inner_series_length * number_of_series + given_rng: RandomState = RandomState(0) + + # When + intervals = _get_intervals( + n_intervals=given_n_intervals, + min_interval=given_min_interval, + series_length=given_series_length, + rng=given_rng, + ) + + # Then + assert np.array_equal(intervals, expected_intervals) + + +@pytest.mark.parametrize("number_of_intervals", [2, 5]) +@pytest.mark.parametrize("min_interval", [3, 10, 30]) +@pytest.mark.parametrize("inner_series_length", [10, 30, 100]) +@pytest.mark.parametrize("number_of_series", [3, 4, 10]) +def test_get_intervals_should_produce_as_much_interval_as_given( + number_of_intervals: int, + min_interval: int, + inner_series_length: int, + number_of_series: int, +): + """Test get_intervals should produce as much interval as given.""" + # given + given_n_intervals = number_of_intervals + given_min_interval = min_interval + given_series_length = inner_series_length * number_of_series + given_rng: RandomState = RandomState(42) + given_inner_series_length: Optional[int] = inner_series_length + + # When + intervals = _get_intervals( + n_intervals=given_n_intervals, + min_interval=given_min_interval, + series_length=given_series_length, + rng=given_rng, + inner_series_length=given_inner_series_length, + ) + + # Then + assert len(intervals) == given_n_intervals + + +@pytest.mark.parametrize("number_of_intervals", [2, 5]) +@pytest.mark.parametrize("min_interval", [3, 10, 30]) +@pytest.mark.parametrize("inner_series_length", [100]) +@pytest.mark.parametrize("number_of_series", [3, 4, 10]) +def test_get_intervals_at_least_greater_than_min_interval_given( + number_of_intervals: int, + min_interval: int, + inner_series_length: int, + number_of_series: int, +): + """Test get_intervals should at least greater than min interval given.""" + # given + given_n_intervals = number_of_intervals + given_min_interval = min_interval + given_series_length = inner_series_length * number_of_series + given_rng: RandomState = RandomState(42) + given_inner_series_length: Optional[int] = inner_series_length + + # When + intervals = _get_intervals( + n_intervals=given_n_intervals, + min_interval=given_min_interval, + series_length=given_series_length, + rng=given_rng, + inner_series_length=given_inner_series_length, + ) + + # Then + assert all((intervals[:, 1] - intervals[:, 0]) >= given_min_interval) + + +@pytest.mark.parametrize("number_of_intervals", [2, 5]) +@pytest.mark.parametrize("min_interval", [30, 50]) +@pytest.mark.parametrize("inner_series_length", [10, 20]) +@pytest.mark.parametrize("number_of_series", [3, 4, 10]) +def test_get_intervals_equals_to_inner_series_length_given_too_high_min_interval( + number_of_intervals: int, + min_interval: int, + inner_series_length: int, + number_of_series: int, +): + """Test get_intervals equals to inner series length given.""" + # given + given_n_intervals = number_of_intervals + given_min_interval = min_interval + given_series_length = inner_series_length * number_of_series + given_rng: RandomState = RandomState(42) + given_inner_series_length: Optional[int] = inner_series_length + + # When + intervals = _get_intervals( + n_intervals=given_n_intervals, + min_interval=given_min_interval, + series_length=given_series_length, + rng=given_rng, + inner_series_length=given_inner_series_length, + ) + + # Then + assert all((intervals[:, 1] - intervals[:, 0]) == given_inner_series_length) + + +@pytest.mark.parametrize("number_of_intervals", [2, 5]) +@pytest.mark.parametrize("min_interval", [3, 10, 30]) +@pytest.mark.parametrize("inner_series_length", [10, 30, 100]) +@pytest.mark.parametrize("number_of_series", [3, 4, 10]) +def test_get_intervals_should_produce_valid_intervals( + number_of_intervals: int, + min_interval: int, + inner_series_length: int, + number_of_series: int, +): + """Tests get_intervals should produce valid intervals.""" + # given + given_n_intervals = number_of_intervals + given_min_interval = min_interval + given_series_length = inner_series_length * number_of_series + given_rng: RandomState = RandomState(42) + given_inner_series_length: Optional[int] = inner_series_length + + # When + intervals = _get_intervals( + n_intervals=given_n_intervals, + min_interval=given_min_interval, + series_length=given_series_length, + rng=given_rng, + inner_series_length=given_inner_series_length, + ) + + # Then + assert np.min(intervals) >= 0 + assert np.max(intervals) <= given_series_length + + +@pytest.mark.parametrize("number_of_intervals", [2, 5]) +@pytest.mark.parametrize("min_interval", [3, 10, 30]) +@pytest.mark.parametrize("inner_series_length", [10, 30, 100]) +@pytest.mark.parametrize("number_of_series", [3, 4, 10]) +def test_get_intervals_should_produce_intervals_contained_in_inner_series_bins( + number_of_intervals: int, + min_interval: int, + inner_series_length: int, + number_of_series: int, +): + """Tests get_intervals should produce intervals contained in inner series bins.""" + # given + given_n_intervals = number_of_intervals + given_min_interval = min_interval + given_series_length = inner_series_length * number_of_series + given_rng: RandomState = RandomState(42) + given_inner_series_length: Optional[int] = inner_series_length + + # When + intervals = _get_intervals( + n_intervals=given_n_intervals, + min_interval=given_min_interval, + series_length=given_series_length, + rng=given_rng, + inner_series_length=given_inner_series_length, + ) + + # Then + assert all( + intervals[:, 0] + <= (intervals[:, 0] // given_inner_series_length + 1) + * given_inner_series_length + ) diff --git a/sktime/base/_serialize.py b/sktime/base/_serialize.py index 5f2da84c4f4..b0aee1cd5d8 100644 --- a/sktime/base/_serialize.py +++ b/sktime/base/_serialize.py @@ -34,7 +34,7 @@ def load(serial): Examples -------- - Example 1: saving an estimator as pickle and loading + Example 1: saving an estimator in-memory and loading it back >>> from sktime.datasets import load_airline >>> from sktime.forecasting.naive import NaiveForecaster @@ -79,6 +79,30 @@ def load(serial): >>> # 4. continue using the loaded estimator >>> pred = cnn.predict(X=sample_test_X) # doctest: +SKIP >>> loaded_pred = loaded_cnn.predict(X=sample_test_X) # doctest: +SKIP + + Example 3: saving an estimator using cloudpickle's serialization functionality + and loading it back + Note: `cloudpickle` is a soft dependency and is not present + with the base-installation. + + >>> from sktime.classification.feature_based import Catch22Classifier + >>> from sktime.datasets import load_basic_motions # doctest: +SKIP + >>> + >>> # 1. Fit the estimator + >>> X_train, y_train = load_basic_motions(split="TRAIN") # doctest: +SKIP + >>> X_test, y_test = load_basic_motions(split="TEST") # doctest: +SKIP + >>> est = Catch22Classifier().fit(X_train, y_train) # doctest: +SKIP + >>> + >>> # 2. save the fitted estimator + >>> cpkl_serialized = est.save(serialization_format="cloudpickle") # doctest: +SKIP + >>> + >>> # 3. load the saved estimator (possibly after sending it across a stream) + >>> from sktime.base import load # doctest: +SKIP + >>> loaded_est = load(cpkl_serialized) # doctest: +SKIP + >>> + >>> # 4. continue using the estimator as normal + >>> pred = loaded_est.predict(X_test) # doctest: +SKIP + >>> loaded_pred = loaded_est.predict(X_test) # doctest: +SKIP """ import pickle from pathlib import Path diff --git a/sktime/base/tests/test_base.py b/sktime/base/tests/test_base.py index 487d9d2663e..ca27a92327d 100644 --- a/sktime/base/tests/test_base.py +++ b/sktime/base/tests/test_base.py @@ -29,6 +29,8 @@ "test_reset", "test_reset_composite", "test_components", + "test_param_alias", + "test_nested_set_params_and_alias", "test_get_fitted_params", "test_eq_dunder", ] @@ -38,6 +40,7 @@ import pytest from sktime.base import BaseEstimator, BaseObject +from sktime.utils.validation._dependencies import _check_soft_dependencies # Fixture class for testing tag system @@ -280,6 +283,109 @@ def test_components(): assert comp_comps["foo_"] is not composite.foo +class AliasTester(BaseObject): + def __init__(self, a, bar=42): + self.a = a + self.bar = bar + + +@pytest.mark.skipif( + _check_soft_dependencies("skbase<0.6.1", severity="none"), + reason="aliasing was introduced in skbase 0.6.1", +) +def test_param_alias(): + """Tests parameter aliasing with parameter string shorthands. + + Raises + ------ + AssertionError if parameters that should be set via __ are not set + AssertionError if error that should be raised is not raised + """ + non_composite = AliasTester(a=42, bar=4242) + composite = CompositionDummy(foo=non_composite) + + # this should write to a of foo, because there is only one suffix called a + composite.set_params(**{"a": 424242}) + assert composite.get_params()["foo__a"] == 424242 + + # this should write to bar of composite, because "bar" is a full parameter string + # there is a suffix in foo, but if the full string is there, it writes to that + composite.set_params(**{"bar": 424243}) + assert composite.get_params()["bar"] == 424243 + + # trying to write to bad_param should raise an exception + # since bad_param is neither a suffix nor a full parameter string + with pytest.raises(ValueError, match=r"Invalid parameter keys provided to"): + composite.set_params(**{"bad_param": 424242}) + + # new example: highly nested composite with identical suffixes + non_composite1 = composite + non_composite2 = AliasTester(a=42, bar=4242) + uber_composite = CompositionDummy(foo=non_composite1, bar=non_composite2) + + # trying to write to a should raise an exception + # since there are two suffix a, and a is not a full parameter string + with pytest.raises(ValueError, match=r"does not uniquely determine parameter key"): + uber_composite.set_params(**{"a": 424242}) + + # same as above, should overwrite "bar" of uber_composite + uber_composite.set_params(**{"bar": 424243}) + assert uber_composite.get_params()["bar"] == 424243 + + +@pytest.mark.skipif( + _check_soft_dependencies("skbase<0.6.1", severity="none"), + reason="aliasing was introduced in skbase 0.6.1", +) +def test_nested_set_params_and_alias(): + """Tests that nested param setting works correctly. + + This specifically tests that parameters of components can be provided, + even if that component is not present in the object that set_params is called on, + but is also being set in the same set_params call. + + Also tests alias resolution, using recursive end state after set_params. + + Raises + ------ + AssertionError if parameters that should be set via __ are not set + AssertionError if error that should be raised is not raised + """ + non_composite = AliasTester(a=42, bar=4242) + composite = CompositionDummy(foo=0) + + # this should write to a of foo + # potential error here is that composite does not have foo__a to start with + # so error catching or writing foo__a to early could cause an exception + composite.set_params(**{"foo": non_composite, "foo__a": 424242}) + assert composite.get_params()["foo__a"] == 424242 + + non_composite = AliasTester(a=42, bar=4242) + composite = CompositionDummy(foo=0) + + # same, and recognizing that foo__a is the only matching suffix in the end state + composite.set_params(**{"foo": non_composite, "a": 424242}) + assert composite.get_params()["foo__a"] == 424242 + + # new example: highly nested composite with identical suffixes + non_composite1 = composite + non_composite2 = AliasTester(a=42, bar=4242) + uber_composite = CompositionDummy(foo=42, bar=42) + + # trying to write to a should raise an exception + # since there are two suffix a, and a is not a full parameter string + with pytest.raises(ValueError, match=r"does not uniquely determine parameter key"): + uber_composite.set_params( + **{"a": 424242, "foo": non_composite1, "bar": non_composite2} + ) + + uber_composite = CompositionDummy(foo=non_composite1, bar=42) + + # same as above, should overwrite "bar" of uber_composite + uber_composite.set_params(**{"bar": 424243}) + assert uber_composite.get_params()["bar"] == 424243 + + class FittableCompositionDummy(BaseEstimator): """Potentially composite object, for testing.""" diff --git a/sktime/benchmarking/evaluation.py b/sktime/benchmarking/evaluation.py index 60e5f6cf87e..30dac6b1776 100644 --- a/sktime/benchmarking/evaluation.py +++ b/sktime/benchmarking/evaluation.py @@ -111,7 +111,7 @@ def evaluate(self, metric, train_or_test="test", cv_fold="all"): # aggregate over cv folds metrics_by_strategy_dataset = ( self._metrics.groupby(["dataset", "strategy"], as_index=False) - .agg(np.mean) + .agg("mean") .drop(columns="cv_fold") ) self._metrics_by_strategy_dataset = self._metrics_by_strategy_dataset.merge( @@ -123,7 +123,7 @@ def evaluate(self, metric, train_or_test="test", cv_fold="all"): ) metrics_by_strategy = metrics_by_strategy_dataset_wo_ds.groupby( ["strategy"], as_index=False - ).agg(np.mean) + ).agg("mean") self._metrics_by_strategy = self._metrics_by_strategy.merge( metrics_by_strategy, how="outer" ) @@ -452,15 +452,7 @@ def fit_runtime(self, unit="s", train_or_test="test", cv_fold="all"): ) # load all predictions - run_times = pd.DataFrame( - columns=[ - "strategy_name", - "dataset_name", - "fit_estimator_start_time", - "fit_estimator_end_time", - "cv_fold", - ] - ) + run_times_frames = [] for cv_fold in cv_folds: for result in self.results.load_predictions( cv_fold=cv_fold, train_or_test=train_or_test @@ -483,7 +475,8 @@ def fit_runtime(self, unit="s", train_or_test="test", cv_fold="all"): "cv_fold": [cv_fold], } ) - run_times = pd.concat([run_times, unwrapped], ignore_index=True) + run_times_frames.append(unwrapped) + run_times = pd.concat(run_times_frames, ignore_index=True) # calculate run time difference run_times["fit_runtime"] = ( @@ -521,7 +514,7 @@ def fit_runtime(self, unit="s", train_or_test="test", cv_fold="all"): # # aggregate over cv folds # metrics_by_strategy_dataset = ( # self._metrics.groupby(["dataset", "strategy"], as_index=False) - # .agg(np.mean) + # .agg("mean") # .drop(columns="cv_fold") # ) # self._metrics_by_strategy_dataset = self._metrics_by_strategy_dataset.merge( @@ -530,7 +523,7 @@ def fit_runtime(self, unit="s", train_or_test="test", cv_fold="all"): # # aggregate over cv folds and datasets # metrics_by_strategy = metrics_by_strategy_dataset.groupby( # ["strategy"], as_index=False - # ).agg(np.mean) + # ).agg("mean") # self._metrics_by_strategy = self._metrics_by_strategy.merge( # metrics_by_strategy, how="outer" # ) diff --git a/sktime/benchmarking/tests/test_orchestration.py b/sktime/benchmarking/tests/test_orchestration.py index e091273c229..02f75331975 100644 --- a/sktime/benchmarking/tests/test_orchestration.py +++ b/sktime/benchmarking/tests/test_orchestration.py @@ -81,6 +81,7 @@ def test_automated_orchestration_vs_manual(data_loader): # extensive tests of orchestration and metric evaluation against sklearn +@pytest.mark.skip(reason="failures since sklearn 1.4.0, see #5797") @pytest.mark.parametrize( "dataset", [ @@ -193,8 +194,8 @@ def test_stat(): _, sign_test_df = analyse.sign_test() sign_array = [ - [sign_test_df["pf"][0], sign_test_df["pf"][1]], - [sign_test_df["tsf"][0], sign_test_df["tsf"][1]], + [sign_test_df["pf"].iloc[0], sign_test_df["pf"].iloc[1]], + [sign_test_df["tsf"].iloc[0], sign_test_df["tsf"].iloc[1]], ] sign_array_test = [[1, 1], [1, 1]] np.testing.assert_equal( diff --git a/sktime/classification/base.py b/sktime/classification/base.py index b5167b993e9..f853a7c4c4a 100644 --- a/sktime/classification/base.py +++ b/sktime/classification/base.py @@ -23,20 +23,17 @@ class name: BaseClassifier __author__ = ["mloning", "fkiraly", "TonyBagnall", "MatthewMiddlehurst"] import time -from abc import ABC, abstractmethod import numpy as np -import pandas as pd -from sktime.base import BaseEstimator -from sktime.datatypes import check_is_scitype, convert_to +from sktime.base import BasePanelMixin +from sktime.datatypes import VectorizedDF, check_is_scitype, convert from sktime.utils.sklearn import is_sklearn_transformer from sktime.utils.validation import check_n_jobs from sktime.utils.validation._dependencies import _check_estimator_deps -from sktime.utils.warnings import warn -class BaseClassifier(BaseEstimator, ABC): +class BaseClassifier(BasePanelMixin): """Abstract base class for time series classifiers. The base classifier specifies the methods and method signatures that all @@ -55,7 +52,9 @@ class BaseClassifier(BaseEstimator, ABC): _tags = { "object_type": "classifier", # type of object "X_inner_mtype": "numpy3D", # which type do _fit/_predict, support for X? + "y_inner_mtype": "numpy1D", # which type do _fit/_predict, support for y? # it should be either "numpy3D" or "nested_univ" (nested pd.DataFrame) + "capability:multioutput": False, # whether classifier supports multioutput "capability:multivariate": False, "capability:unequal_length": False, "capability:missing_values": False, @@ -65,6 +64,8 @@ class BaseClassifier(BaseEstimator, ABC): "capability:predict_proba": False, "python_version": None, # PEP 440 python version specifier to limit versions "requires_cython": False, # whether C compiler is required in env, e.g., gcc + "authors": "sktime developers", # author(s) of the object + "maintainers": "sktime developers", # current maintainer(s) of the object } # convenience constant to control which metadata of input data @@ -76,6 +77,14 @@ class BaseClassifier(BaseEstimator, ABC): "is_equal_length", ] + # attribute name where vectorized estimators are stored + VECTORIZATION_ATTR = "classifiers_" # e.g., classifiers_, regressors_ + + # used in error messages + TASK = "classification" # e.g., classification, regression + EST_TYPE = "classifier" # e.g., classifier, regressor + EST_TYPE_PLURAL = "classifiers" # e.g., classifiers, regressors + def __init__(self): # reserved attributes written to in fit self.classes_ = [] # classes seen in y, unique labels @@ -88,6 +97,8 @@ def __init__(self): # required for compatibility with some sklearn interfaces # i.e. CalibratedClassifierCV self._estimator_type = "classifier" + self._is_vectorized = False + self._converter_store_y = {} super().__init__() _check_estimator_deps(self) @@ -131,48 +142,72 @@ def __rmul__(self, other): return NotImplemented def fit(self, X, y): - """Fit time series classifier to training data. + """ + Fit time series classifier to training data. + + State change: + Changes state to "fitted". + + Writes to self: + Sets self.is_fitted to True. + Sets fitted model attributes ending in "_". Parameters ---------- - X : 3D np.array (any number of dimensions, equal length series) - of shape [n_instances, n_dimensions, series_length] - or 2D np.array (univariate, equal length series) - of shape [n_instances, series_length] - or pd.DataFrame with each column a dimension, each cell a pd.Series - (any number of dimensions, equal or unequal length series) + X : sktime compatible time series panel data container, Panel scitype, e.g., + pd-multiindex: pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices + numpy3D: 3D np.array (any number of dimensions, equal length series) + of shape [n_instances, n_dimensions, series_length] or of any other supported Panel mtype - for list of mtypes, see datatypes.SCITYPE_REGISTER - for specifications, see examples/AA_datatypes_and_datasets.ipynb - y : 1D np.array of int, of shape [n_instances] - class labels for fitting - indices correspond to instance indices in X + for list of mtypes, see datatypes.SCITYPE_REGISTER + for specifications, see examples/AA_datatypes_and_datasets.ipynb + y : sktime compatible tabular data container, Table scitype + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + class labels for fitting + 0-th indices correspond to instance indices in X + 1-st indices (if applicable) correspond to multioutput vector indices in X + supported sktime types: np.ndarray (1D, 2D), pd.Series, pd.DataFrame Returns ------- self : Reference to self. - - Notes - ----- - Changes state by creating a fitted model that updates attributes - ending in "_" and sets is_fitted flag to True. """ # reset estimator at the start of fit self.reset() + # fit timer start start = int(round(time.time() * 1000)) + + # check and convert y for multioutput vectorization + y, y_metadata, y_inner_mtype = self._check_y(y, return_to_mtype=True) + self._y_metadata = y_metadata + self._y_inner_mtype = y_inner_mtype + self._is_vectorized = isinstance(y, VectorizedDF) + + if self._is_vectorized: + self._vectorize("fit", X=X, y=y) + # fit timer end + self.fit_time_ = int(round(time.time() * 1000)) - start + # this should happen last: fitted state is set to True + self._is_fitted = True + return self + + # no vectorization needed, proceed with normal fit + # convenience conversions to allow user flexibility: # if X is 2D array, convert to 3D, if y is Series, convert to numpy X, y = self._internal_convert(X, y) - X_metadata = self._check_classifier_input( + X_metadata = self._check_input( X, y, return_metadata=self.METADATA_REQ_IN_CHECKS ) - missing = X_metadata["has_nans"] - multivariate = not X_metadata["is_univariate"] - unequal = not X_metadata["is_equal_length"] + X_mtype = X_metadata["mtype"] self._X_metadata = X_metadata # Check this classifier can handle characteristics - self._check_capabilities(missing, multivariate, unequal) + self._check_capabilities(X_metadata) # remember class labels self.classes_ = np.unique(y) @@ -189,7 +224,7 @@ def fit(self, X, y): return self # Convert data as dictated by the classifier tags - X = self._convert_X(X) + X = self._convert_X(X, X_mtype) multithread = self.get_tag("capability:multithreading") if multithread: try: @@ -203,32 +238,42 @@ def fit(self, X, y): self._fit(X, y) self.fit_time_ = int(round(time.time() * 1000)) - start - # this should happen last + # this should happen last: fitted state is set to True self._is_fitted = True return self - def predict(self, X) -> np.ndarray: + def predict(self, X): """Predicts labels for sequences in X. Parameters ---------- - X : 3D np.array (any number of dimensions, equal length series) - of shape [n_instances, n_dimensions, series_length] - or 2D np.array (univariate, equal length series) - of shape [n_instances, series_length] - or pd.DataFrame with each column a dimension, each cell a pd.Series - (any number of dimensions, equal or unequal length series) + X : sktime compatible time series panel data container, Panel scitype, e.g., + pd-multiindex: pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices + numpy3D: 3D np.array (any number of dimensions, equal length series) + of shape [n_instances, n_dimensions, series_length] or of any other supported Panel mtype - for list of mtypes, see datatypes.SCITYPE_REGISTER - for specifications, see examples/AA_datatypes_and_datasets.ipynb + for list of mtypes, see datatypes.SCITYPE_REGISTER + for specifications, see examples/AA_datatypes_and_datasets.ipynb Returns ------- - y : 1D np.array of int, of shape [n_instances] - predicted class labels - indices correspond to instance indices in X + y_pred : sktime compatible tabular data container, Table scitype + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + predicted class labels + 0-th indices correspond to instance indices in X + 1-st indices (if applicable) correspond to multioutput vector indices in X + 1D np.npdarray, if y univariate (one dimension) + otherwise, same type as y passed in fit """ self.check_is_fitted() + # enter vectorized mode if needed + if self._is_vectorized: + return self._vectorize("predict", X=X) + # boilerplate input checks for predict-like methods X = self._check_convert_X_for_predict(X) @@ -236,33 +281,42 @@ def predict(self, X) -> np.ndarray: if len(self._class_dictionary) == 1: return self._single_class_y_pred(X, method="predict") - # call internal _predict_proba - return self._predict(X) + # call internal _predict, convert output + y_pred_inner = self._predict(X) + y_pred = self._convert_output_y(y_pred_inner) + return y_pred - def predict_proba(self, X) -> np.ndarray: + def predict_proba(self, X): """Predicts labels probabilities for sequences in X. Parameters ---------- - X : 3D np.array (any number of dimensions, equal length series) - of shape [n_instances, n_dimensions, series_length] - or 2D np.array (univariate, equal length series) - of shape [n_instances, series_length] - or pd.DataFrame with each column a dimension, each cell a pd.Series - (any number of dimensions, equal or unequal length series) + X : sktime compatible time series panel data container, Panel scitype, e.g., + pd-multiindex: pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices + numpy3D: 3D np.array (any number of dimensions, equal length series) + of shape [n_instances, n_dimensions, series_length] or of any other supported Panel mtype - for list of mtypes, see datatypes.SCITYPE_REGISTER - for specifications, see examples/AA_datatypes_and_datasets.ipynb + for list of mtypes, see datatypes.SCITYPE_REGISTER + for specifications, see examples/AA_datatypes_and_datasets.ipynb Returns ------- - y : 2D array of shape [n_instances, n_classes] - predicted class probabilities - 1st dimension indices correspond to instance indices in X - 2nd dimension indices correspond to possible labels (integers) - (i, j)-th entry is predictive probability that i-th instance is of class j + y_pred : 2D np.array of int, of shape [n_instances, n_classes] + predicted class label probabilities + 0-th indices correspond to instance indices in X + 1-st indices correspond to class index, in same order as in self.classes_ + entries are predictive class probabilities, summing to 1 """ self.check_is_fitted() + # enter vectorized mode if needed + if self._is_vectorized: + return self._vectorize("predict_proba", X=X) + + self.check_is_fitted() + # boilerplate input checks for predict-like methods X = self._check_convert_X_for_predict(X) @@ -273,7 +327,7 @@ def predict_proba(self, X) -> np.ndarray: # call internal _predict_proba return self._predict_proba(X) - def fit_predict(self, X, y, cv=None, change_state=True) -> np.ndarray: + def fit_predict(self, X, y, cv=None, change_state=True): """Fit and predict labels for sequences in X. Convenience method to produce in-sample predictions and @@ -287,17 +341,22 @@ def fit_predict(self, X, y, cv=None, change_state=True) -> np.ndarray: Parameters ---------- - X : 3D np.array (any number of dimensions, equal length series) - of shape [n_instances, n_dimensions, series_length] - or 2D np.array (univariate, equal length series) - of shape [n_instances, series_length] - or pd.DataFrame with each column a dimension, each cell a pd.Series - (any number of dimensions, equal or unequal length series) + X : sktime compatible time series panel data container, Panel scitype, e.g., + pd-multiindex: pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices + numpy3D: 3D np.array (any number of dimensions, equal length series) + of shape [n_instances, n_dimensions, series_length] or of any other supported Panel mtype - for list of mtypes, see datatypes.SCITYPE_REGISTER - for specifications, see examples/AA_datatypes_and_datasets.ipynb - y : 1D np.array of int, of shape [n_instances] - class labels for fitting - indices correspond to instance indices in X + for list of mtypes, see datatypes.SCITYPE_REGISTER + for specifications, see examples/AA_datatypes_and_datasets.ipynb + y : sktime compatible tabular data container, Table scitype + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + class labels for fitting + 0-th indices correspond to instance indices in X + 1-st indices (if applicable) correspond to multioutput vector indices in X + supported sktime types: np.ndarray (1D, 2D), pd.Series, pd.DataFrame cv : None, int, or sklearn cross-validation object, optional, default=None None : predictions are in-sample, equivalent to fit(X, y).predict(X) cv : predictions are equivalent to fit(X_train, y_train).predict(X_test) @@ -315,9 +374,14 @@ def fit_predict(self, X, y, cv=None, change_state=True) -> np.ndarray: Returns ------- - y : 1D np.array of int, of shape [n_instances] - predicted class labels - indices correspond to instance indices in X - if cv is passed, -1 indicates entries not seen in union of test sets + y_pred : sktime compatible tabular data container, Table scitype + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + predicted class labels + 0-th indices correspond to instance indices in X + 1-st indices (if applicable) correspond to multioutput vector indices in X + 1D np.npdarray, if y univariate (one dimension) + otherwise, same type as y passed in fit """ return self._fit_predict_boilerplate( X=X, y=y, cv=cv, change_state=change_state, method="predict" @@ -344,14 +408,12 @@ def _fit_predict_boilerplate(self, X, y, cv, change_state, method): # we now know that cv is an sklearn splitter X, y = self._internal_convert(X, y) - X_metadata = self._check_classifier_input( + X_metadata = self._check_input( X, y, return_metadata=self.METADATA_REQ_IN_CHECKS ) - missing = X_metadata["has_nans"] - multivariate = not X_metadata["is_univariate"] - unequal = not X_metadata["is_equal_length"] + X_mtype = X_metadata["mtype"] # Check this classifier can handle characteristics - self._check_capabilities(missing, multivariate, unequal) + self._check_capabilities(X_metadata) # handle single class case if len(self._class_dictionary) == 1: @@ -359,15 +421,17 @@ def _fit_predict_boilerplate(self, X, y, cv, change_state, method): # Convert data to format easily usable for applying cv if isinstance(X, np.ndarray): - X = convert_to( + X = convert( X, + from_type=X_mtype, to_type="numpy3D", as_scitype="Panel", store_behaviour="freeze", ) else: - X = convert_to( + X = convert( X, + from_type=X_mtype, to_type="nested_univ", as_scitype="Panel", store_behaviour="freeze", @@ -395,25 +459,36 @@ def _fit_predict_boilerplate(self, X, y, cv, change_state, method): return y_pred - def fit_predict_proba(self, X, y, cv=None, change_state=True) -> np.ndarray: + def fit_predict_proba(self, X, y, cv=None, change_state=True): """Fit and predict labels probabilities for sequences in X. Convenience method to produce in-sample predictions and cross-validated out-of-sample predictions. + Writes to self, if change_state=True: + Sets self.is_fitted to True. + Sets fitted model attributes ending in "_". + + Does not update state if change_state=False. + Parameters ---------- - X : 3D np.array (any number of dimensions, equal length series) - of shape [n_instances, n_dimensions, series_length] - or 2D np.array (univariate, equal length series) - of shape [n_instances, series_length] - or pd.DataFrame with each column a dimension, each cell a pd.Series - (any number of dimensions, equal or unequal length series) + X : sktime compatible time series panel data container, Panel scitype, e.g., + pd-multiindex: pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices + numpy3D: 3D np.array (any number of dimensions, equal length series) + of shape [n_instances, n_dimensions, series_length] or of any other supported Panel mtype - for list of mtypes, see datatypes.SCITYPE_REGISTER - for specifications, see examples/AA_datatypes_and_datasets.ipynb - y : 1D np.array of int, of shape [n_instances] - class labels for fitting - indices correspond to instance indices in X + for list of mtypes, see datatypes.SCITYPE_REGISTER + for specifications, see examples/AA_datatypes_and_datasets.ipynb + y : sktime compatible tabular data container, Table scitype + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + class labels for fitting + 0-th indices correspond to instance indices in X + 1-st indices (if applicable) correspond to multioutput vector indices in X + supported sktime types: np.ndarray (1D, 2D), pd.Series, pd.DataFrame cv : None, int, or sklearn cross-validation object, optional, default=None None : predictions are in-sample, equivalent to fit(X, y).predict(X) cv : predictions are equivalent to fit(X_train, y_train).predict(X_test) @@ -429,10 +504,11 @@ def fit_predict_proba(self, X, y, cv=None, change_state=True) -> np.ndarray: Returns ------- - y : 2D array of shape [n_instances, n_classes] - predicted class probabilities - 1st dimension indices correspond to instance indices in X - 2nd dimension indices correspond to possible labels (integers) - (i, j)-th entry is predictive probability that i-th instance is of class j + y_pred : 2D np.array of int, of shape [n_instances, n_classes] + predicted class label probabilities + 0-th indices correspond to instance indices in X + 1-st indices correspond to class index, in same order as in self.classes_ + entries are predictive class probabilities, summing to 1 """ return self._fit_predict_boilerplate( X=X, y=y, cv=cv, change_state=change_state, method="predict_proba" @@ -455,17 +531,22 @@ def score(self, X, y) -> float: Parameters ---------- - X : 3D np.array (any number of dimensions, equal length series) - of shape [n_instances, n_dimensions, series_length] - or 2D np.array (univariate, equal length series) - of shape [n_instances, series_length] - or pd.DataFrame with each column a dimension, each cell a pd.Series - (any number of dimensions, equal or unequal length series) + X : sktime compatible time series panel data container, e.g., + pd-multiindex: pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices + numpy3D: 3D np.array (any number of dimensions, equal length series) + of shape [n_instances, n_dimensions, series_length] or of any other supported Panel mtype - for list of mtypes, see datatypes.SCITYPE_REGISTER - for specifications, see examples/AA_datatypes_and_datasets.ipynb - y : 1D np.ndarray of int, of shape [n_instances] - class labels (ground truth) - indices correspond to instance indices in X + for list of mtypes, see datatypes.SCITYPE_REGISTER + for specifications, see examples/AA_datatypes_and_datasets.ipynb + y : sktime compatible tabular data container, Table scitype + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + class labels for fitting + 0-th indices correspond to instance indices in X + 1-st indices (if applicable) correspond to multioutput vector indices in X + supported sktime types: np.ndarray (1D, 2D), pd.Series, pd.DataFrame Returns ------- @@ -501,35 +582,37 @@ def get_test_params(cls, parameter_set="default"): """ return super().get_test_params(parameter_set=parameter_set) - @abstractmethod def _fit(self, X, y): """Fit time series classifier to training data. Abstract method, must be implemented. + Writes to self: + Sets fitted model attributes ending in "_". + Parameters ---------- X : guaranteed to be of a type in self.get_tag("X_inner_mtype") if self.get_tag("X_inner_mtype") = "numpy3D": - 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] - if self.get_tag("X_inner_mtype") = "nested_univ": - pd.DataFrame with each column a dimension, each cell a pd.Series + 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] + if self.get_tag("X_inner_mtype") = "pd-multiindex:": + pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices for list of other mtypes, see datatypes.SCITYPE_REGISTER for specifications, see examples/AA_datatypes_and_datasets.ipynb - y : 1D np.array of int, of shape [n_instances] - class labels for fitting - indices correspond to instance indices in X + y : guaranteed to be of a type in self.get_tag("y_inner_mtype") + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + class labels for fitting + if self.get_tag("capaility:multioutput") = False, guaranteed to be 1D + if self.get_tag("capaility:multioutput") = True, guaranteed to be 2D Returns ------- - self : - Reference to self. - - Notes - ----- - Changes state by creating a fitted model that updates attributes - ending in "_" and sets is_fitted flag to True. + self : Reference to self. """ - ... + raise NotImplementedError("abstract method") def _predict(self, X) -> np.ndarray: """Predicts labels for sequences in X. @@ -540,16 +623,23 @@ def _predict(self, X) -> np.ndarray: ---------- X : guaranteed to be of a type in self.get_tag("X_inner_mtype") if self.get_tag("X_inner_mtype") = "numpy3D": - 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] - if self.get_tag("X_inner_mtype") = "nested_univ": - pd.DataFrame with each column a dimension, each cell a pd.Series + 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] + if self.get_tag("X_inner_mtype") = "pd-multiindex:": + pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices for list of other mtypes, see datatypes.SCITYPE_REGISTER for specifications, see examples/AA_datatypes_and_datasets.ipynb Returns ------- - y : 1D np.array of int, of shape [n_instances] - predicted class labels + y : should be of mtype in self.get_tag("y_inner_mtype") + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + predicted class labels indices correspond to instance indices in X + if self.get_tag("capaility:multioutput") = False, should be 1D + if self.get_tag("capaility:multioutput") = True, should be 2D """ y_proba = self._predict_proba(X) y_pred = y_proba.argmax(axis=1) @@ -567,9 +657,11 @@ def _predict_proba(self, X) -> np.ndarray: ---------- X : guaranteed to be of a type in self.get_tag("X_inner_mtype") if self.get_tag("X_inner_mtype") = "numpy3D": - 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] - if self.get_tag("X_inner_mtype") = "nested_univ": - pd.DataFrame with each column a dimension, each cell a pd.Series + 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] + if self.get_tag("X_inner_mtype") = "pd-multiindex:": + pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices for list of other mtypes, see datatypes.SCITYPE_REGISTER for specifications, see examples/AA_datatypes_and_datasets.ipynb @@ -587,206 +679,3 @@ def _predict_proba(self, X) -> np.ndarray: dists[i, self._class_dictionary[preds[i]]] = 1 return dists - - def _check_convert_X_for_predict(self, X): - """Input checks, capability checks, repeated in all predict/score methods. - - Parameters - ---------- - X : any object (to check/convert) - should be of a supported Panel mtype or 2D numpy.ndarray - - Returns - ------- - X: an object of a supported Panel mtype, numpy3D if X was a 2D numpy.ndarray - - Raises - ------ - ValueError if X is of invalid input data type, or there is not enough data - ValueError if the capabilities in self._tags do not handle the data. - """ - X = self._internal_convert(X) - X_metadata = self._check_classifier_input( - X, return_metadata=self.METADATA_REQ_IN_CHECKS - ) - missing = X_metadata["has_nans"] - multivariate = not X_metadata["is_univariate"] - unequal = not X_metadata["is_equal_length"] - # Check this classifier can handle characteristics - self._check_capabilities(missing, multivariate, unequal) - # Convert data as dictated by the classifier tags - X = self._convert_X(X) - - return X - - def _check_capabilities(self, missing, multivariate, unequal): - """Check whether this classifier can handle the data characteristics. - - Parameters - ---------- - missing : boolean, does the data passed to fit contain missing values? - multivariate : boolean, does the data passed to fit contain missing values? - unequal : boolea, do the time series passed to fit have variable lengths? - - Raises - ------ - ValueError if the capabilities in self._tags do not handle the data. - """ - allow_multivariate = self.get_tag("capability:multivariate") - allow_missing = self.get_tag("capability:missing_values") - allow_unequal = self.get_tag("capability:unequal_length") - - self_name = type(self).__name__ - - # identify problems, mismatch of capability and inputs - problems = [] - if missing and not allow_missing: - problems += ["missing values"] - if multivariate and not allow_multivariate: - problems += ["multivariate series"] - if unequal and not allow_unequal: - problems += ["unequal length series"] - - # construct error message - problems_and = " and ".join(problems) - problems_or = " or ".join(problems) - msg = ( - f"Data seen by {self_name} instance has {problems_and}, " - f"but this {self_name} instance cannot handle {problems_or}. " - f"Calls with {problems_or} may result in error or unreliable results." - ) - - # raise exception or warning with message - # if self is composite, raise a warning, since passing could be fine - # see discussion in PR 2366 why - if len(problems) > 0: - if self.is_composite(): - warn(msg, obj=self) - else: - raise ValueError(msg) - - def _convert_X(self, X): - """Convert equal length series from DataFrame to numpy array or vice versa. - - Parameters - ---------- - self : this classifier - X : pd.DataFrame or np.ndarray. Input attribute data - - Returns - ------- - X : input X converted to type in "X_inner_mtype" tag - usually a pd.DataFrame (nested) or 3D np.ndarray - Checked and possibly converted input data - """ - inner_type = self.get_tag("X_inner_mtype") - # convert pd.DataFrame - X = convert_to( - X, - to_type=inner_type, - as_scitype="Panel", - ) - return X - - def _check_classifier_input( - self, X, y=None, enforce_min_instances=1, return_metadata=True - ): - """Check whether input X and y are valid formats with minimum data. - - Raises a ValueError if the input is not valid. - - Parameters - ---------- - X : check whether conformant with any sktime Panel mtype specification - y : check whether a pd.Series or np.array - enforce_min_instances : int, optional (default=1) - check there are a minimum number of instances. - return_metadata : bool, str, or list of str - metadata fields to return with X_metadata, input to check_is_scitype - - Returns - ------- - metadata : dict with metadata for X returned by datatypes.check_is_scitype - - Raises - ------ - ValueError - If y or X is invalid input data type, or there is not enough data - """ - # Check X is valid input type and recover the data characteristics - X_valid, _, X_metadata = check_is_scitype( - X, scitype="Panel", return_metadata=return_metadata - ) - if not X_valid: - raise TypeError( - f"X is not of a supported input data type." - f"X must be in a supported mtype format for Panel, found {type(X)}" - f"Use datatypes.check_is_mtype to check conformance " - "with specifications." - ) - n_cases = X_metadata["n_instances"] - if n_cases < enforce_min_instances: - raise ValueError( - f"Minimum number of cases required is {enforce_min_instances} but X " - f"has : {n_cases}" - ) - - # Check y if passed - if y is not None: - # Check y valid input - if not isinstance(y, (pd.Series, np.ndarray)): - raise ValueError( - f"y must be a np.array or a pd.Series, but found type: {type(y)}" - ) - # Check matching number of labels - n_labels = y.shape[0] - if n_cases != n_labels: - raise ValueError( - f"Mismatch in number of cases. Number in X = {n_cases} nos in y = " - f"{n_labels}" - ) - if isinstance(y, np.ndarray): - if y.ndim > 1: - raise ValueError( - f"np.ndarray y must be 1-dimensional, " - f"but found {y.ndim} dimensions" - ) - # warn if only a single class label is seen - # this should not raise exception since this can occur by train subsampling - if len(np.unique(y)) == 1: - warn( - "only single class label seen in y passed to " - f"fit of classifier {type(self).__name__}", - obj=self, - ) - - return X_metadata - - def _internal_convert(self, X, y=None): - """Convert X and y if necessary as a user convenience. - - Convert X to a 3D numpy array if already a 2D and convert y into an 1D numpy - array if passed as a Series. - - Parameters - ---------- - X : an object of a supported Panel mtype, or 2D numpy.ndarray - y : np.ndarray or pd.Series - - Returns - ------- - X: an object of a supported Panel mtype, numpy3D if X was a 2D numpy.ndarray - y: np.ndarray - """ - if isinstance(X, np.ndarray): - # Temporary fix to insist on 3D numpy. For univariate problems, - # most classifiers simply convert back to 2D. This squeezing should be - # done here, but touches a lot of files, so will get this to work first. - if X.ndim == 2: - X = X.reshape(X.shape[0], 1, X.shape[1]) - if y is not None and isinstance(y, pd.Series): - # y should be a numpy array, although we allow Series for user convenience - y = pd.Series.to_numpy(y) - if y is None: - return X - return X, y diff --git a/sktime/classification/compose/__init__.py b/sktime/classification/compose/__init__.py index a42414b175a..3d6b35065e0 100644 --- a/sktime/classification/compose/__init__.py +++ b/sktime/classification/compose/__init__.py @@ -6,11 +6,13 @@ "ClassifierPipeline", "ComposableTimeSeriesForestClassifier", "ColumnEnsembleClassifier", + "MultiplexClassifier", "SklearnClassifierPipeline", "WeightedEnsembleClassifier", ] from sktime.classification.compose._column_ensemble import ColumnEnsembleClassifier +from sktime.classification.compose._multiplexer import MultiplexClassifier from sktime.classification.compose._pipeline import ( ClassifierPipeline, SklearnClassifierPipeline, diff --git a/sktime/classification/compose/_column_ensemble.py b/sktime/classification/compose/_column_ensemble.py index bcebc4812bc..abe211eaf41 100644 --- a/sktime/classification/compose/_column_ensemble.py +++ b/sktime/classification/compose/_column_ensemble.py @@ -20,6 +20,7 @@ class BaseColumnEnsembleClassifier(_HeterogenousMetaEstimator, BaseClassifier): """Base Class for column ensemble.""" _tags = { + "authors": ["abostrom"], "capability:multivariate": True, "capability:predict_proba": True, "X_inner_mtype": ["nested_univ", "pd-multiindex"], diff --git a/sktime/classification/compose/_multiplexer.py b/sktime/classification/compose/_multiplexer.py new file mode 100644 index 00000000000..4d4b718cfdb --- /dev/null +++ b/sktime/classification/compose/_multiplexer.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 -u +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Implements classifier for selecting among different model classes.""" +# based on MultiplexForecaster + +from sktime.base import _HeterogenousMetaEstimator +from sktime.classification._delegate import _DelegatedClassifier +from sktime.classification.base import BaseClassifier +from sktime.datatypes import MTYPE_LIST_PANEL, MTYPE_LIST_TABLE + +__author__ = ["fkiraly"] +__all__ = ["MultiplexClassifier"] + + +class MultiplexClassifier(_HeterogenousMetaEstimator, _DelegatedClassifier): + """MultiplexClassifier for selecting among different models. + + MultiplexClassifier facilitates a framework for performing + model selection process over different model classes. + It should be used in conjunction with GridSearchCV to get full utilization. + It can be used with univariate and multivariate classifiers, + single-output and multi-output classifiers. + + MultiplexClassifier is specified with a (named) list of classifiers + and a selected_classifier hyper-parameter, which is one of the classifier names. + The MultiplexClassifier then behaves precisely as the classifier with + name selected_classifier, ignoring functionality in the other classifiers. + + When used with GridSearchCV, MultiplexClassifier + provides an ability to tune across multiple estimators, i.e., to perform AutoML, + by tuning the selected_classifier hyper-parameter. This combination will then + select one of the passed classifiers via the tuning algorithm. + + Parameters + ---------- + classifiers : list of sktime classifiers, or + list of tuples (str, estimator) of sktime classifiers + MultiplexClassifier can switch ("multiplex") between these classifiers. + These are "blueprint" classifiers, states do not change when `fit` is called. + selected_classifier: str or None, optional, Default=None. + If str, must be one of the classifier names. + If no names are provided, must coincide with auto-generated name strings. + To inspect auto-generated name strings, call get_params. + If None, behaves as if the first classifier in the list is selected. + Selects the classifier as which MultiplexClassifier behaves. + + Attributes + ---------- + classifier_ : sktime classifier + clone of the selected classifier used for fitting and classification. + _classifiers : list of (str, classifier) tuples + str are identical to those passed, if passed strings are unique + otherwise unique strings are generated from class name; if not unique, + the string `_[i]` is appended where `[i]` is count of occurrence up until then + """ + + _tags = { + "authors": ["fkiraly"], + "capability:multioutput": True, + "capability:multivariate": True, + "capability:unequal_length": True, + "capability:missing_values": True, + "capability:predict_proba": True, + "X_inner_mtype": MTYPE_LIST_PANEL, + "y_inner_mtype": MTYPE_LIST_TABLE, + "fit_is_empty": False, + } + + # attribute for _DelegatedClassifier, which then delegates + # all non-overridden methods to those of same name in self.classifier_ + # see further details in _DelegatedClassifier docstring + _delegate_name = "classifier_" + + # for default get_params/set_params from _HeterogenousMetaEstimator + # _steps_attr points to the attribute of self + # which contains the heterogeneous set of estimators + # this must be an iterable of (name: str, estimator, ...) tuples for the default + _steps_attr = "_classifiers" + # if the estimator is fittable, _HeterogenousMetaEstimator also + # provides an override for get_fitted_params for params from the fitted estimators + # the fitted estimators should be in a different attribute, _steps_fitted_attr + # this must be an iterable of (name: str, estimator, ...) tuples for the default + _steps_fitted_attr = "classifiers_" + + def __init__( + self, + classifiers: list, + selected_classifier=None, + ): + super().__init__() + self.selected_classifier = selected_classifier + + self.classifiers = classifiers + self._check_estimators( + classifiers, + attr_name="classifiers", + cls_type=BaseClassifier, + clone_ests=False, + ) + self._set_classifier() + + self.clone_tags(self.classifier_) + self.set_tags(**{"fit_is_empty": False}) + # this ensures that we convert in the inner estimator, not in the multiplexer + self.set_tags(**{"X_inner_mtype": MTYPE_LIST_PANEL}) + self.set_tags(**{"y_inner_mtype": MTYPE_LIST_TABLE}) + + @property + def _classifiers(self): + """Classifiers turned into name/est tuples.""" + return self._get_estimator_tuples(self.classifiers, clone_ests=False) + + @_classifiers.setter + def _classifiers(self, value): + self.classifiers = value + + def _check_selected_classifier(self): + component_names = self._get_estimator_names(self._classifiers, make_unique=True) + selected = self.selected_classifier + if selected is not None and selected not in component_names: + raise Exception( + f"Invalid selected_classifier parameter value provided, " + f" found: {self.selected_classifier}. Must be one of these" + f" valid selected_classifier parameter values: {component_names}." + ) + + def __or__(self, other): + """Magic | (or) method, return (right) concatenated MultiplexClassifier. + + Implemented for `other` being a classifier, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` classifier, must inherit from BaseClassifier + otherwise, `NotImplemented` is returned + + Returns + ------- + MultiplexClassifier object, concatenation of `self` (first) with `other` (last). + not nested, contains only non-MultiplexClassifier `sktime` classifiers + + Raises + ------ + ValueError if other is not of type MultiplexClassifier or BaseClassifier. + """ + return self._dunder_concat( + other=other, + base_class=BaseClassifier, + composite_class=MultiplexClassifier, + attr_name="classifiers", + concat_order="left", + ) + + def __ror__(self, other): + """Magic | (or) method, return (left) concatenated MultiplexClassifier. + + Implemented for `other` being a classifier, otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` classifier, must inherit from BaseClassifier + otherwise, `NotImplemented` is returned + + Returns + ------- + MultiplexClassifier object, concatenation of `self` (last) with `other` (first). + not nested, contains only non-MultiplexClassifier `sktime` classifiers + """ + return self._dunder_concat( + other=other, + base_class=BaseClassifier, + composite_class=MultiplexClassifier, + attr_name="classifiers", + concat_order="right", + ) + + def _set_classifier(self): + self._check_selected_classifier() + # clone the selected classifier to self.classifier_ + if self.selected_classifier is not None: + for name, classifier in self._get_estimator_tuples(self.classifiers): + if self.selected_classifier == name: + self.classifier_ = classifier.clone() + else: + # if None, simply clone the first classifier to self.classifier_ + self.classifier_ = self._get_estimator_list(self.classifiers)[0].clone() + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict + """ + from sktime.classification.dummy import DummyClassifier + + params1 = { + "classifiers": [ + ("Naive_maj", DummyClassifier(strategy="most_frequent")), + ("Naive_pri", DummyClassifier(strategy="prior")), + ("Naive_uni", DummyClassifier(strategy="uniform")), + ], + "selected_classifier": "Naive_maj", + } + params2 = { + "classifiers": [ + DummyClassifier(strategy="most_frequent"), + DummyClassifier(strategy="prior"), + DummyClassifier(strategy="uniform"), + ], + } + return [params1, params2] diff --git a/sktime/classification/compose/_pipeline.py b/sktime/classification/compose/_pipeline.py index a7ae2e0f180..8c89d2609fa 100644 --- a/sktime/classification/compose/_pipeline.py +++ b/sktime/classification/compose/_pipeline.py @@ -92,6 +92,7 @@ class ClassifierPipeline(_HeterogenousMetaEstimator, BaseClassifier): """ _tags = { + "authors": ["fkiraly"], "X_inner_mtype": "pd-multiindex", # which type do _fit/_predict accept "capability:multivariate": False, "capability:unequal_length": False, diff --git a/sktime/classification/deep_learning/__init__.py b/sktime/classification/deep_learning/__init__.py index 092c206f51c..7215a2bcd06 100644 --- a/sktime/classification/deep_learning/__init__.py +++ b/sktime/classification/deep_learning/__init__.py @@ -1,16 +1,26 @@ """Deep learning based classifiers.""" __all__ = [ "CNNClassifier", + "CNTCClassifier", "FCNClassifier", "InceptionTimeClassifier", "LSTMFCNClassifier", + "MACNNClassifier", + "MCDCNNClassifier", "MLPClassifier", + "ResNetClassifier", + "SimpleRNNClassifier", "TapNetClassifier", ] from sktime.classification.deep_learning.cnn import CNNClassifier +from sktime.classification.deep_learning.cntc import CNTCClassifier from sktime.classification.deep_learning.fcn import FCNClassifier from sktime.classification.deep_learning.inceptiontime import InceptionTimeClassifier from sktime.classification.deep_learning.lstmfcn import LSTMFCNClassifier +from sktime.classification.deep_learning.macnn import MACNNClassifier +from sktime.classification.deep_learning.mcdcnn import MCDCNNClassifier from sktime.classification.deep_learning.mlp import MLPClassifier +from sktime.classification.deep_learning.resnet import ResNetClassifier +from sktime.classification.deep_learning.rnn import SimpleRNNClassifier from sktime.classification.deep_learning.tapnet import TapNetClassifier diff --git a/sktime/classification/deep_learning/base.py b/sktime/classification/deep_learning/base.py index 1dda1707b0c..76ca992efac 100644 --- a/sktime/classification/deep_learning/base.py +++ b/sktime/classification/deep_learning/base.py @@ -12,6 +12,7 @@ from sklearn.preprocessing import LabelEncoder, OneHotEncoder from sklearn.utils import check_random_state +from sktime.base._base import SERIALIZATION_FORMATS from sktime.classification.base import BaseClassifier from sktime.utils.validation._dependencies import _check_soft_dependencies @@ -115,7 +116,14 @@ def convert_y_to_keras(self, y): self.classes_ = self.label_encoder.classes_ self.n_classes_ = len(self.classes_) y = y.reshape(len(y), 1) - self.onehot_encoder = OneHotEncoder(sparse=False, categories="auto") + + # in sklearn 1.2, sparse was renamed to sparse_output + if _check_soft_dependencies("sklearn>=1.2", severity="none"): + sparse_kw = {"sparse_output": False} + else: + sparse_kw = {"sparse": False} + + self.onehot_encoder = OneHotEncoder(categories="auto", **sparse_kw) # categories='auto' to get rid of FutureWarning y = self.onehot_encoder.fit_transform(y) return y @@ -203,7 +211,7 @@ def __setstate__(self, state): if hasattr(self, "history"): self.__dict__["history"] = self.history - def save(self, path=None): + def save(self, path=None, serialization_format="pickle"): """Save serialized self to bytes-like object or to (.zip) file. Behaviour: @@ -225,18 +233,62 @@ def save(self, path=None): path="/home/stored/estimator" then a zip file `estimator.zip` will be stored in `/home/stored/`. + serialization_format: str, default = "pickle" + Module to use for serialization. + The available options are present under + `sktime.base._base.SERIALIZATION_FORMATS`. Note that non-default formats + might require installation of other soft dependencies. + Returns ------- if `path` is None - in-memory serialized self if `path` is file location - ZipFile with reference to the file """ import pickle - import shutil from pathlib import Path + + if serialization_format not in SERIALIZATION_FORMATS: + raise ValueError( + f"The provided `serialization_format`='{serialization_format}' " + "is not yet supported. The possible formats are: " + f"{SERIALIZATION_FORMATS}." + ) + + if path is not None and not isinstance(path, (str, Path)): + raise TypeError( + "`path` is expected to either be a string or a Path object " + f"but found of type:{type(path)}." + ) + + if path is not None: + path = Path(path) if isinstance(path, str) else path + path.mkdir() + + if serialization_format == "cloudpickle": + _check_soft_dependencies("cloudpickle", severity="error") + import cloudpickle + + return self._serialize_using_dump_func( + path=path, + dump=cloudpickle.dump, + dumps=cloudpickle.dumps, + ) + + elif serialization_format == "pickle": + return self._serialize_using_dump_func( + path=path, + dump=pickle.dump, + dumps=pickle.dumps, + ) + + def _serialize_using_dump_func(self, path, dump, dumps): + """Serialize & return DL Estimator using `dump` and `dumps` functions.""" + import shutil from zipfile import ZipFile + history = self.history.history if self.history is not None else None if path is None: - _check_soft_dependencies("h5py") + _check_soft_dependencies("h5py", severity="error") import h5py in_memory_model = None @@ -248,34 +300,25 @@ def save(self, path=None): h5file.flush() in_memory_model = h5file.id.get_file_image() - in_memory_history = pickle.dumps(self.history.history) - + in_memory_history = dumps(history) return ( type(self), ( - pickle.dumps(self), + dumps(self), in_memory_model, in_memory_history, ), ) - if not isinstance(path, (str, Path)): - raise TypeError( - "`path` is expected to either be a string or a Path object " - f"but found of type:{type(path)}." - ) - - path = Path(path) if isinstance(path, str) else path - path.mkdir() - if self.model_ is not None: self.model_.save(path / "keras/") with open(path / "history", "wb") as history_writer: - pickle.dump(self.history.history, history_writer) - - pickle.dump(type(self), open(path / "_metadata", "wb")) - pickle.dump(self, open(path / "_obj", "wb")) + dump(history, history_writer) + with open(path / "_metadata", "wb") as file: + dump(type(self), file) + with open(path / "_obj", "wb") as file: + dump(self, file) shutil.make_archive(base_name=path, format="zip", root_dir=path) shutil.rmtree(path) diff --git a/sktime/classification/deep_learning/cnn.py b/sktime/classification/deep_learning/cnn.py index 45d78cae6e3..899d8a3d756 100644 --- a/sktime/classification/deep_learning/cnn.py +++ b/sktime/classification/deep_learning/cnn.py @@ -17,7 +17,6 @@ class CNNClassifier(BaseDeepClassifier): Parameters ---------- - should inherited fields be listed here? n_epochs : int, default = 2000 the number of epochs to train the model batch_size : int, default = 16 @@ -35,7 +34,6 @@ class CNNClassifier(BaseDeepClassifier): whether to output extra information loss : string, default="mean_squared_error" fit parameter for the keras model - optimizer : keras.optimizer, default=keras.optimizers.Adam(), metrics : list of strings, default=["accuracy"], activation : string or a tf callable, default="sigmoid" Activation function used in the output linear layer. @@ -67,7 +65,14 @@ class CNNClassifier(BaseDeepClassifier): CNNClassifier(...) """ - _tags = {"python_dependencies": "tensorflow"} + _tags = { + # packaging info + # -------------- + "authors": ["James-Large", "TonyBagnall"], + "maintainers": ["James-Large"], + "python_dependencies": "tensorflow", + # estimator type handled by parent class + } def __init__( self, diff --git a/sktime/classification/deep_learning/cntc.py b/sktime/classification/deep_learning/cntc.py new file mode 100644 index 00000000000..900e9861318 --- /dev/null +++ b/sktime/classification/deep_learning/cntc.py @@ -0,0 +1,287 @@ +"""Contextual Time-series Neural Classifier for TSC.""" + +__author__ = ["James-Large", "TonyBagnall", "AurumnPegasus"] +__all__ = ["CNTCClassifier"] +from sklearn.utils import check_random_state + +from sktime.classification.deep_learning.base import BaseDeepClassifier +from sktime.networks.cntc import CNTCNetwork +from sktime.utils.validation._dependencies import _check_dl_dependencies + + +class CNTCClassifier(BaseDeepClassifier): + """Contextual Time-series Neural Classifier (CNTC), as described in [1]. + + Parameters + ---------- + n_epochs : int, default = 2000 + the number of epochs to train the model + batch_size : int, default = 16 + the number of samples per gradient update. + filter_sizes : tuple of shape (2), default = (16, 8) + filter sizes for CNNs in CCNN arm. + kernel_sizes : two-tuple, default = (1, 1) + the length of the 1D convolution window for + CNNs in CCNN arm. + rnn_size : int, default = 64 + number of rnn units in the CCNN arm. + lstm_size : int, default = 8 + number of lstm units in the CLSTM arm. + dense_size : int, default = 64 + dimension of dense layer in CNTC. + random_state : int or None, default=None + Seed for random number generation. + verbose : boolean, default = False + whether to output extra information + loss : string, default="mean_squared_error" + fit parameter for the keras model + optimizer : keras.optimizer, default=keras.optimizers.Adam(), + metrics : list of strings, default=["accuracy"], + + Notes + ----- + Adapted from the implementation from Fullah et. al + https://github.com/AmaduFullah/CNTC_MODEL/blob/master/cntc.ipynb + + References + ---------- + .. [1] Network originally defined in: + @article{FULLAHKAMARA202057, + title = {Combining contextual neural networks for time series classification}, + journal = {Neurocomputing}, + volume = {384}, + pages = {57-66}, + year = {2020}, + issn = {0925-2312}, + doi = {https://doi.org/10.1016/j.neucom.2019.10.113}, + url = {https://www.sciencedirect.com/science/article/pii/S0925231219316364}, + author = {Amadu {Fullah Kamara} and Enhong Chen and Qi Liu and Zhen Pan}, + keywords = {Time series classification, Contextual convolutional neural + networks, Contextual long short-term memory, Attention, Multilayer + perceptron}, + } + + Examples + -------- + >>> from sktime.classification.deep_learning.cntc import CNTCClassifier + >>> from sktime.datasets import load_unit_test + >>> X_train, y_train = load_unit_test(split="train", return_X_y=True) + >>> X_test, y_test = load_unit_test(split="test", return_X_y=True) + >>> cntc = CNTCClassifier() # doctest: +SKIP + >>> cntc.fit(X_train, y_train) # doctest: +SKIP + CNTCClassifier(...) # doctest: +SKIP + """ + + _tags = { + "authors": ["James-Large", "Withington", "TonyBagnall", "AurumnPegasus"], + "maintainers": ["James-Large", "Withington", "AurumnPegasus"], + "python_dependencies": ["tensorflow", "keras-self-attention"], + } + + def __init__( + self, + n_epochs=2000, + batch_size=16, + filter_sizes=(16, 8), + kernel_sizes=(1, 1), + rnn_size=64, + lstm_size=8, + dense_size=64, + callbacks=None, + verbose=False, + loss="categorical_crossentropy", + metrics=None, + random_state=0, + ): + _check_dl_dependencies(severity="error") + + self.kernel_sizes = kernel_sizes # used plural + self.filter_sizes = filter_sizes # used plural + self.rnn_size = rnn_size + self.lstm_size = lstm_size + self.dense_size = dense_size + self.callbacks = callbacks + self.n_epochs = n_epochs + self.batch_size = batch_size + self.verbose = verbose + self.loss = loss + self.metrics = metrics + self.random_state = random_state + self._network = CNTCNetwork() + + super().__init__(batch_size=batch_size, random_state=random_state) + + def build_model(self, input_shape, n_classes, **kwargs): + """Construct a compiled, un-trained, keras model that is ready for training. + + In sktime, time series are stored in numpy arrays of shape (d,m), where d + is the number of dimensions, m is the series length. Keras/tensorflow assume + data is in shape (m,d). This method also assumes (m,d). Transpose should + happen in fit. + + Parameters + ---------- + input_shape : tuple + The shape of the data fed into the input layer, should be (m,d) + n_classes: int + The number of classes, which becomes the size of the output layer + + Returns + ------- + output : a compiled Keras Model + """ + from tensorflow import keras + + metrics = ["accuracy"] if self.metrics is None else self.metrics + input_layer, output_layer = self._network.build_network(input_shape, **kwargs) + + output_layer = keras.layers.Dense(units=n_classes, activation="softmax")( + output_layer + ) + + model = keras.models.Model(inputs=input_layer, outputs=output_layer) + model.compile( + loss=self.loss, + optimizer=keras.optimizers.Adam(), + metrics=metrics, + ) + return model + + def prepare_input(self, X): + """ + Prepare input for the CLSTM arm of the model. + + According to the paper: + " + Time series data is fed into a CLSTM and CCNN networks simultaneously + and is perceived differently. In the CLSTM block, the input data is + viewed as a multivariate time series with a single time stamp. In + contrast, the CCNN block receives univariate data with numerous time + stamps + " + + Arguments + --------- + X: tuple of shape = (series_length (m), n_dimensions (d)) + The shape of the data fed into the model. + + Returns + ------- + trainX: tuple, + The input to be fed to the two arms of CNTC. + """ + import numpy as np + import pandas as pd + from tensorflow import keras + + if X.shape[2] == 1: + # Converting data to pandas + trainX1 = X.reshape([X.shape[0], X.shape[1]]) + pd_trainX = pd.DataFrame(trainX1) + + # Taking rolling window + window = pd_trainX.rolling(window=3).mean() + window = window.fillna(0) + + trainX2 = np.concatenate((trainX1, window), axis=1) + trainX2 = keras.backend.variable(trainX2) + trainX2 = keras.layers.Dense( + trainX1.shape[1], input_shape=(trainX2.shape[1:]) + )(trainX2) + trainX2 = keras.backend.eval(trainX2) + trainX = trainX2.reshape((trainX2.shape[0], trainX2.shape[1], 1)) + else: + trainXs = [] + for i in range(X.shape[2]): + trainX1 = X[:, :, i] + pd_trainX = pd.DataFrame(trainX1) + + window = pd_trainX.rolling(window=3).mean() + window = window.fillna(0) + + trainX2 = np.concatenate((trainX1, window), axis=1) + trainX2 = keras.backend.variable(trainX2) + trainX2 = keras.layers.Dense( + trainX1.shape[1], input_shape=(trainX2.shape[1:]) + )(trainX2) + trainX2 = keras.backend.eval(trainX2) + + trainX = trainX2.reshape((trainX2.shape[0], trainX2.shape[1], 1)) + trainXs.append(trainX) + + trainX = np.concatenate(trainXs, axis=2) + return trainX + + def _fit(self, X, y): + """Fit the classifier on the training set (X, y). + + Parameters + ---------- + X : np.ndarray of shape = (n_instances (n), n_dimensions (d), series_length (m)) + The training input samples. + y : np.ndarray of shape n + The training data class labels. + + Returns + ------- + self : object + """ + if self.callbacks is None: + self._callbacks = [] + y_onehot = self.convert_y_to_keras(y) + # Transpose to conform to Keras input style. + X = X.transpose(0, 2, 1) + + check_random_state(self.random_state) + self.input_shape = X.shape[1:] + self.model_ = self.build_model(self.input_shape, self.n_classes_) + X2 = self.prepare_input(X) + if self.verbose: + self.model_.summary() + self.history = self.model_.fit( + [X2, X, X], + y_onehot, + batch_size=self.batch_size, + epochs=self.n_epochs, + verbose=self.verbose, + callbacks=self._callbacks, + ) + return self + + def _predict(self, X, **kwargs): + import numpy as np + + probs = self._predict_proba(X, **kwargs) + rng = check_random_state(self.random_state) + return np.array( + [ + self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] + for prob in probs + ] + ) + + def _predict_proba(self, X, **kwargs): + """Find probability estimates for each class for all cases in X. + + Parameters + ---------- + X : an np.ndarray of shape = (n_instances, n_dimensions, series_length) + The training input samples. + + Returns + ------- + output : array of shape = [n_instances, n_classes] of probabilities + """ + import numpy as np + + # Transpose to work correctly with keras + X = X.transpose((0, 2, 1)) + X2 = self.prepare_input(X) + probs = self.model_.predict([X2, X, X], self.batch_size, **kwargs) + + # check if binary classification + if probs.shape[1] == 1: + # first column is probability of class 0 and second is of class 1 + probs = np.hstack([1 - probs, probs]) + probs = probs / probs.sum(axis=1, keepdims=1) + return probs diff --git a/sktime/classification/deep_learning/fcn.py b/sktime/classification/deep_learning/fcn.py index 3f86beef848..cece17d47e6 100644 --- a/sktime/classification/deep_learning/fcn.py +++ b/sktime/classification/deep_learning/fcn.py @@ -60,6 +60,14 @@ class FCNClassifier(BaseDeepClassifier): FCNClassifier(...) """ + _tags = { + # packaging info + # -------------- + "authors": ["James-Large", "AurumnPegasus"], + "maintainers": ["James-Large", "AurumnPegasus"], + # estimator type handled by parent class + } + def __init__( self, n_epochs=2000, diff --git a/sktime/classification/deep_learning/inceptiontime.py b/sktime/classification/deep_learning/inceptiontime.py index 041b196f142..e4b1d4636ed 100644 --- a/sktime/classification/deep_learning/inceptiontime.py +++ b/sktime/classification/deep_learning/inceptiontime.py @@ -43,6 +43,14 @@ class InceptionTimeClassifier(BaseDeepClassifier): https://github.com/hfawaz/InceptionTime/blob/master/classifiers/inception.py """ + _tags = { + # packaging info + # -------------- + "authors": ["james-large"], + "maintainers": ["james-large"], + # estimator type handled by parent class + } + def __init__( self, n_epochs=1500, diff --git a/sktime/classification/deep_learning/lstmfcn.py b/sktime/classification/deep_learning/lstmfcn.py index 4b73575cc25..ddabf816429 100644 --- a/sktime/classification/deep_learning/lstmfcn.py +++ b/sktime/classification/deep_learning/lstmfcn.py @@ -19,7 +19,6 @@ class LSTMFCNClassifier(BaseDeepClassifier): Combines an LSTM arm with a CNN arm. Optionally uses an attention mechanism in the LSTM which the author indicates provides improved performance. - Parameters ---------- n_epochs: int, default=2000 @@ -49,19 +48,20 @@ class LSTMFCNClassifier(BaseDeepClassifier): random_state : int or None, default=None Seed for random, integer. - - Notes - ----- - Ported from sktime-dl source code - https://github.com/sktime/sktime-dl/blob/master/sktime_dl/classification/_lstmfcn.py - References ---------- .. [1] Karim et al. Multivariate LSTM-FCNs for Time Series Classification, 2019 https://arxiv.org/pdf/1801.04503.pdf """ - _tags = {"python_dependencies": "tensorflow"} + _tags = { + # packaging info + # -------------- + "authors": ["jnrusson1", "solen0id"], + "maintainers": ["jnrusson1", "solen0id"], + "python_dependencies": "tensorflow", + # estimator type handled by parent class + } def __init__( self, diff --git a/sktime/classification/deep_learning/macnn.py b/sktime/classification/deep_learning/macnn.py index daf513c7358..ba504fd55ee 100644 --- a/sktime/classification/deep_learning/macnn.py +++ b/sktime/classification/deep_learning/macnn.py @@ -77,7 +77,14 @@ class MACNNClassifier(BaseDeepClassifier): MACNNClassifier(...) """ - _tags = {"python_dependencies": "tensorflow"} + _tags = { + # packaging info + # -------------- + "authors": ["jnrusson1"], + "maintainers": "jnrusson1", + "python_dependencies": "tensorflow", + # estimator type handled by parent class + } def __init__( self, diff --git a/sktime/classification/deep_learning/mcdcnn.py b/sktime/classification/deep_learning/mcdcnn.py index 1a9f024a20c..2c614814671 100644 --- a/sktime/classification/deep_learning/mcdcnn.py +++ b/sktime/classification/deep_learning/mcdcnn.py @@ -1,8 +1,6 @@ """Multi Channel Deep Convolutional Neural Classifier (MCDCNN).""" -__author__ = [ - "JamesLarge", -] +__author__ = ["James-Large"] from copy import deepcopy @@ -80,7 +78,14 @@ class MCDCNNClassifier(BaseDeepClassifier): MCDCNNClassifier(...) """ - _tags = {"python_dependencies": "tensorflow"} + _tags = { + # packaging info + # -------------- + "authors": ["james-large"], + "maintainers": ["james-large"], + "python_dependencies": "tensorflow", + # estimator type handled by parent class + } def __init__( self, diff --git a/sktime/classification/deep_learning/mlp.py b/sktime/classification/deep_learning/mlp.py index 0bf71a5bd1c..18adefe4dc0 100644 --- a/sktime/classification/deep_learning/mlp.py +++ b/sktime/classification/deep_learning/mlp.py @@ -60,6 +60,14 @@ class MLPClassifier(BaseDeepClassifier): MLPClassifier(...) """ + _tags = { + # packaging info + # -------------- + "authors": ["James-Large", "AurumnPegasus"], + "maintainers": ["James-Large", "AurumnPegasus"], + # estimator type handled by parent class + } + def __init__( self, n_epochs=2000, diff --git a/sktime/classification/deep_learning/resnet.py b/sktime/classification/deep_learning/resnet.py index 870da4eff9e..b5b292fa5fb 100644 --- a/sktime/classification/deep_learning/resnet.py +++ b/sktime/classification/deep_learning/resnet.py @@ -60,7 +60,14 @@ class ResNetClassifier(BaseDeepClassifier): ResNetClassifier(...) """ - _tags = {"python_dependencies": ["tensorflow"]} + _tags = { + # packaging info + # -------------- + "authors": ["James-Large", "AurumnPegasus", "nilesh05apr"], + "maintainers": ["James-Large", "AurumnPegasus", "nilesh05apr"], + "python_dependencies": ["tensorflow"], + # estimator type handled by parent class + } def __init__( self, diff --git a/sktime/classification/deep_learning/rnn.py b/sktime/classification/deep_learning/rnn.py index cbac0badb9d..41ec273aa41 100644 --- a/sktime/classification/deep_learning/rnn.py +++ b/sktime/classification/deep_learning/rnn.py @@ -49,9 +49,16 @@ class SimpleRNNClassifier(BaseDeepClassifier): https://github.com/Mcompetitions/M4-methods """ + _tags = { + # packaging info + # -------------- + "authors": ["mloning"], + # estimator type handled by parent class + } + def __init__( self, - num_epochs=100, + n_epochs=100, batch_size=1, units=6, callbacks=None, @@ -65,8 +72,9 @@ def __init__( optimizer=None, ): _check_dl_dependencies(severity="error") + super().__init__() - self.num_epochs = num_epochs + self.batch_size = batch_size self.verbose = verbose self.units = units @@ -80,6 +88,7 @@ def __init__( self.optimizer = optimizer self.history = None self._network = RNNNetwork(random_state=random_state, units=units) + self.n_epochs = n_epochs def build_model(self, input_shape, n_classes, **kwargs): """Construct a compiled, un-trained, keras model that is ready for training. @@ -189,7 +198,7 @@ def _fit(self, X, y): X, y_onehot, batch_size=self.batch_size, - epochs=self.num_epochs, + epochs=self.n_epochs, verbose=self.verbose, callbacks=self.callbacks_, ) @@ -218,7 +227,7 @@ def get_test_params(cls, parameter_set="default"): """ params1 = {} params2 = { - "num_epochs": 50, + "n_epochs": 50, "batch_size": 2, "units": 5, "use_bias": False, diff --git a/sktime/classification/deep_learning/tapnet.py b/sktime/classification/deep_learning/tapnet.py index 11f396d7412..267783999a2 100644 --- a/sktime/classification/deep_learning/tapnet.py +++ b/sktime/classification/deep_learning/tapnet.py @@ -1,7 +1,7 @@ """Time Convolutional Neural Network (CNN) for classification.""" __author__ = [ - "Jack Russon", + "jnrusson1", "TonyBagnall", "achieveordie", ] @@ -85,7 +85,14 @@ class TapNetClassifier(BaseDeepClassifier): TapNetClassifier(...) """ - _tags = {"python_dependencies": "tensorflow"} + _tags = { + # packaging info + # -------------- + "authors": ["jnrusson1", "TonyBagnall", "achieveordie"], + "maintainers": ["jnrusson1", "achieveordie"], + "python_dependencies": "tensorflow", + # estimator type handled by parent class + } def __init__( self, diff --git a/sktime/classification/dictionary_based/_boss.py b/sktime/classification/dictionary_based/_boss.py index cfcfb6ca9fc..747636923a0 100644 --- a/sktime/classification/dictionary_based/_boss.py +++ b/sktime/classification/dictionary_based/_boss.py @@ -7,6 +7,7 @@ __author__ = ["MatthewMiddlehurst", "patrickzib"] __all__ = ["BOSSEnsemble", "IndividualBOSS", "pairwise_distances"] +from copy import copy from itertools import compress import numpy as np @@ -125,11 +126,16 @@ class BOSSEnsemble(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst", "patrickzib"], + "python_dependencies": "numba", + # estimator type + # -------------- "capability:train_estimate": True, "capability:multithreading": True, "classifier_type": "dictionary", "capability:predict_proba": True, - "python_dependencies": "numba", } def __init__( @@ -528,8 +534,13 @@ class IndividualBOSS(BaseClassifier): """ _tags = { - "capability:multithreading": True, + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst", "patrickzib"], "python_dependencies": "numba", + # estimator type + # -------------- + "capability:multithreading": True, } def __init__( @@ -648,25 +659,22 @@ def _train_predict(self, train_num, distance_matrix): return self._class_vals[min_pos] def _shorten_bags(self, word_len, y): - new_boss = IndividualBOSS( - self.window_size, - word_len, - self.norm, - self.alphabet_size, - save_words=self.save_words, - use_boss_distance=self.use_boss_distance, - feature_selection=self.feature_selection, - n_jobs=self.n_jobs, - random_state=self.random_state, - ) + new_boss = copy(self) + + # change word length parameter + new_boss.word_length = word_len + + # reset internal variables + new_boss._accuracy = 0 + new_boss._subsample = [] + new_boss._train_predictions = [] + + # copy fitted transformer as reference new_boss._transformer = self._transformer + + # update shortened bags new_bag = new_boss._transformer._shorten_bags(word_len, y) new_boss._transformed_data = new_bag - new_boss._class_vals = self._class_vals - new_boss.n_classes_ = self.n_classes_ - new_boss.classes_ = self.classes_ - new_boss._class_dictionary = self._class_dictionary - new_boss._is_fitted = True return new_boss diff --git a/sktime/classification/dictionary_based/_cboss.py b/sktime/classification/dictionary_based/_cboss.py index e8d3df20d09..412c2d32372 100644 --- a/sktime/classification/dictionary_based/_cboss.py +++ b/sktime/classification/dictionary_based/_cboss.py @@ -130,12 +130,18 @@ class ContractableBOSS(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst", "BINAYKUMAR943"], + "maintainers": "BINAYKUMAR943", + "python_dependencies": "numba", + # estimator type + # -------------- "capability:train_estimate": True, "capability:contractable": True, "capability:multithreading": True, "classifier_type": "dictionary", "capability:predict_proba": True, - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/dictionary_based/_muse.py b/sktime/classification/dictionary_based/_muse.py index 46f37252d46..0789552f5c1 100644 --- a/sktime/classification/dictionary_based/_muse.py +++ b/sktime/classification/dictionary_based/_muse.py @@ -119,12 +119,18 @@ class MUSE(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["patrickzib", "BINAYKUMAR943"], + "maintainers": "BINAYKUMAR943", + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multivariate": True, "capability:multithreading": True, "capability:predict_proba": True, "X_inner_mtype": "numpy3D", # which mtypes do _fit/_predict support for X? "classifier_type": "dictionary", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/dictionary_based/_tde.py b/sktime/classification/dictionary_based/_tde.py index 636649fded8..87266a6d0c8 100644 --- a/sktime/classification/dictionary_based/_tde.py +++ b/sktime/classification/dictionary_based/_tde.py @@ -146,13 +146,18 @@ class TemporalDictionaryEnsemble(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst"], + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multivariate": True, "capability:train_estimate": True, "capability:contractable": True, "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "dictionary", - "python_dependencies": "numba", } def __init__( @@ -671,9 +676,14 @@ class IndividualTDE(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst"], + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multivariate": True, "capability:multithreading": True, - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/dictionary_based/_weasel.py b/sktime/classification/dictionary_based/_weasel.py index d6fd806ed04..047c6d09753 100644 --- a/sktime/classification/dictionary_based/_weasel.py +++ b/sktime/classification/dictionary_based/_weasel.py @@ -3,7 +3,7 @@ Dictionary based classifier based on SFA transform, BOSS and linear regression. """ -__author__ = ["patrickzib", "Arik Ermshaus"] +__author__ = ["patrickzib", "ermshaua"] __all__ = ["WEASEL"] import math @@ -120,10 +120,16 @@ class WEASEL(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["patrickzib", "ermshaua"], + "maintainers": ["ermshaua"], + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "dictionary", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/distance_based/_elastic_ensemble.py b/sktime/classification/distance_based/_elastic_ensemble.py index 6c96ac28d80..0032f01d884 100644 --- a/sktime/classification/distance_based/_elastic_ensemble.py +++ b/sktime/classification/distance_based/_elastic_ensemble.py @@ -89,6 +89,11 @@ class ElasticEnsemble(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["jasonlines", "TonyBagnall"], + # estimator type + # -------------- "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "distance", diff --git a/sktime/classification/distance_based/_proximity_forest.py b/sktime/classification/distance_based/_proximity_forest.py index f235a3c30dc..ae135102eac 100644 --- a/sktime/classification/distance_based/_proximity_forest.py +++ b/sktime/classification/distance_based/_proximity_forest.py @@ -790,6 +790,12 @@ class ProximityStump(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["goastler", "moradabaz"], + "maintainers": ["goastler", "moradabaz"], + # estimator type + # -------------- "capability:multithreading": True, "X_inner_mtype": "nested_univ", # input in nested dataframe } @@ -1061,10 +1067,16 @@ class ProximityTree(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["goastler", "moradabaz"], + "maintainers": ["goastler", "moradabaz"], + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multithreading": True, "capability:predict_proba": True, "X_inner_mtype": "nested_univ", - "python_dependencies": "numba", } def __init__( @@ -1315,11 +1327,17 @@ class ProximityForest(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["goastler", "moradabaz"], + "maintainers": ["goastler", "moradabaz"], + "python_dependencies": "numba", + # estimator type + # -------------- "X_inner_mtype": "nested_univ", "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "distance", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/distance_based/_shape_dtw.py b/sktime/classification/distance_based/_shape_dtw.py index ea4b9b3ad86..180b5ad9cac 100644 --- a/sktime/classification/distance_based/_shape_dtw.py +++ b/sktime/classification/distance_based/_shape_dtw.py @@ -15,7 +15,7 @@ KNeighborsTimeSeriesClassifier, ) from sktime.datatypes import convert -from sktime.transformations.panel.dictionary_based._paa import PAA +from sktime.transformations.panel.dictionary_based._paa import PAAlegacy as PAA from sktime.transformations.panel.dwt import DWTTransformer from sktime.transformations.panel.hog1d import HOG1DTransformer @@ -111,6 +111,12 @@ class ShapeDTW(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["vincent-nich12"], + "maintainers": ["vincent-nich12"], + # estimator type + # -------------- "capability:predict_proba": True, "classifier_type": "distance", } @@ -443,7 +449,7 @@ def _check_metric_params(self, parameters): names = list(parameters.keys()) for x in names: - if not (x in valid_metric_params): + if x not in valid_metric_params: raise ValueError( x + " is not a valid metric parameter." diff --git a/sktime/classification/distance_based/_time_series_neighbors.py b/sktime/classification/distance_based/_time_series_neighbors.py index 452de04b409..cfe56392384 100644 --- a/sktime/classification/distance_based/_time_series_neighbors.py +++ b/sktime/classification/distance_based/_time_series_neighbors.py @@ -116,6 +116,11 @@ class KNeighborsTimeSeriesClassifier(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["jasonlines", "TonyBagnall", "chrisholder", "fkiraly"], + # estimator type + # -------------- "capability:multivariate": True, "capability:unequal_length": True, "capability:missing_values": True, @@ -229,7 +234,9 @@ def _fit(self, X, y): # "X may be a sparse graph, in which case only "nonzero" elements # may be considered neighbors." X_inner_mtype = self.get_tag("X_inner_mtype") - _, _, X_meta = check_is_mtype(X, X_inner_mtype, return_metadata=True) + _, _, X_meta = check_is_mtype( + X, X_inner_mtype, return_metadata=True, msg_return_dict="list" + ) n = X_meta["n_instances"] dist_mat = np.zeros([n, n], dtype="float") diff --git a/sktime/classification/dummy/_dummy.py b/sktime/classification/dummy/_dummy.py index 215c8a0c0f1..66a10adf683 100644 --- a/sktime/classification/dummy/_dummy.py +++ b/sktime/classification/dummy/_dummy.py @@ -60,6 +60,12 @@ class prior probabilities. """ _tags = { + # packaging info + # -------------- + "authors": ["ZiyaoWei"], + "maintainers": ["ZiyaoWei"], + # estimator type + # -------------- "X_inner_mtype": "nested_univ", "capability:missing_values": True, "capability:unequal_length": True, diff --git a/sktime/classification/early_classification/_probability_threshold.py b/sktime/classification/early_classification/_probability_threshold.py index 0464bfd3e7b..18bc3a48522 100644 --- a/sktime/classification/early_classification/_probability_threshold.py +++ b/sktime/classification/early_classification/_probability_threshold.py @@ -20,7 +20,7 @@ from sktime.utils.validation.panel import check_X -# TODO: fix this in 0.25.0 +# TODO: fix this in 0.27.0 # base class should have been changed to BaseEarlyClassifier class ProbabilityThresholdEarlyClassifier(BaseClassifier): """Probability Threshold Early Classifier. diff --git a/sktime/classification/early_classification/base.py b/sktime/classification/early_classification/base.py index 335cb38b77b..d780e26570a 100644 --- a/sktime/classification/early_classification/base.py +++ b/sktime/classification/early_classification/base.py @@ -55,7 +55,9 @@ class BaseEarlyClassifier(BaseEstimator, ABC): _tags = { "object_type": "early_classifier", # type of object "X_inner_mtype": "numpy3D", # which type do _fit/_predict, support for X? + "y_inner_mtype": "numpy1D", # which type do _fit/_predict, support for y? # it should be either "numpy3D" or "nested_univ" (nested pd.DataFrame) + "capability:multioutput": False, # whether classifier supports multioutput "capability:multivariate": False, "capability:unequal_length": False, "capability:missing_values": False, @@ -71,6 +73,14 @@ class BaseEarlyClassifier(BaseEstimator, ABC): "is_equal_length", ] + # attribute name where vectorized estimators are stored + VECTORIZATION_ATTR = "classifiers_" # e.g., classifiers_, regressors_ + + # used in error messages + TASK = "early classification" # e.g., classification, regression + EST_TYPE = "early classifier" # e.g., classifier, regressor + EST_TYPE_PLURAL = "early classifiers" # e.g., classifiers, regressors + def __init__(self): self.classes_ = [] self.n_classes_ = 0 @@ -89,6 +99,8 @@ def __init__(self): """ self.state_info = None + self._converter_store_y = {} + super().__init__() def fit(self, X, y): @@ -587,42 +599,59 @@ def _check_convert_X_for_predict(self, X): _check_convert_X_for_predict = BaseClassifier._check_convert_X_for_predict return _check_convert_X_for_predict(self, X) - def _check_capabilities(self, missing, multivariate, unequal): + def _check_capabilities(self, X_metadata): """Check whether this classifier can handle the data characteristics. Parameters ---------- - missing : boolean, does the data passed to fit contain missing values? - multivariate : boolean, does the data passed to fit contain missing values? - unequal : boolea, do the time series passed to fit have variable lengths? + X_metadata : dict with metadata for X returned by datatypes.check_is_scitype Raises ------ ValueError if the capabilities in self._tags do not handle the data. """ _check_capabilities = BaseClassifier._check_capabilities - return _check_capabilities(self, missing, multivariate, unequal) + return _check_capabilities(self, X_metadata) - def _convert_X(self, X): + def _convert_X(self, X, X_mtype): """Convert equal length series from DataFrame to numpy array or vice versa. Parameters ---------- - self : this classifier - X : pd.DataFrame or np.ndarray. Input attribute data + X : input data for the classifier + X_mtype : str, a Panel mtype string, e.g., "pd_multiindex", "numpy3D" Returns ------- X : input X converted to type in "X_inner_mtype" tag - usually a pd.DataFrame (nested) or 3D np.ndarray + usually a pd.DataFrame (nested) or 3D np.ndarray Checked and possibly converted input data """ _convert_X = BaseClassifier._convert_X - return _convert_X(self, X) + return _convert_X(self, X, X_mtype) + + def _check_y(self, y=None, return_to_mtype=False): + """Check and coerce X/y for fit/transform functions. + + Parameters + ---------- + y : pd.DataFrame, pd.Series or np.ndarray + return_to_mtype : bool + whether to return the mtype of y output + + Returns + ------- + y_inner : object of sktime compatible time series type + can be Series, Panel, Hierarchical + y_metadata : dict + metadata of y, returned by check_is_scitype + y_mtype : str, only returned if return_to_mtype=True + mtype of y_inner, after convert + """ + _check_y = BaseClassifier._check_y + return _check_y(self, y, return_to_mtype=return_to_mtype) - def _check_classifier_input( - self, X, y=None, enforce_min_instances=1, return_metadata=True - ): + def _check_input(self, X, y=None, enforce_min_instances=1, return_metadata=True): """Check whether input X and y are valid formats with minimum data. Raises a ValueError if the input is not valid. @@ -645,10 +674,8 @@ def _check_classifier_input( ValueError If y or X is invalid input data type, or there is not enough data """ - _check_classifier_input = BaseClassifier._check_classifier_input - return _check_classifier_input( - self, X, y, enforce_min_instances, return_metadata - ) + _check_input = BaseClassifier._check_input + return _check_input(self, X, y, enforce_min_instances, return_metadata) def _internal_convert(self, X, y=None): """Convert X and y if necessary as a user convenience. diff --git a/sktime/classification/ensemble/_bagging.py b/sktime/classification/ensemble/_bagging.py index 621ccbe6492..04a39d7c0fe 100644 --- a/sktime/classification/ensemble/_bagging.py +++ b/sktime/classification/ensemble/_bagging.py @@ -20,14 +20,15 @@ class BaggingClassifier(BaseClassifier): On ``predict_proba``, the mean average of probabilistic predictions is returned. For a deterministic classifier, this results in majority vote for ``predict``. - The estimator allows to choose sample sizes fir instances, variables, + The estimator allows to choose sample sizes for instances, variables, and whether sampling is with or without replacement. Direct generalization of ``sklearn``'s ``BaggingClassifier`` to the time series classification task. - Note: if n_features=1, BaggingClassifier turns a univariate classifier into - a multivariate classifier (as slices seen by ``estimator`` are all univariate). + Note: if ``n_features=1``, ``BaggingClassifier`` turns a univariate classifier into + a multivariate classifier, because slices seen by ``estimator`` are all univariate. + This can be used to give a univariate classifier multivariate capabilities. Parameters ---------- @@ -77,6 +78,11 @@ class BaggingClassifier(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["fkiraly"], + # estimator type + # -------------- "capability:multivariate": True, "capability:missing_values": True, "capability:predict_proba": True, diff --git a/sktime/classification/ensemble/_ctsf.py b/sktime/classification/ensemble/_ctsf.py index 325f88a5bb0..773507f3b3f 100644 --- a/sktime/classification/ensemble/_ctsf.py +++ b/sktime/classification/ensemble/_ctsf.py @@ -17,8 +17,8 @@ from sklearn.utils import compute_sample_weight from sklearn.utils.multiclass import check_classification_targets +from sktime.base._panel.forest._composable import BaseTimeSeriesForest from sktime.classification.base import BaseClassifier -from sktime.series_as_features.base.estimators._ensemble import BaseTimeSeriesForest from sktime.transformations.panel.summarize import RandomIntervalFeatureExtractor from sktime.utils.slope_and_trend import _slope from sktime.utils.validation.panel import check_X, check_X_y @@ -174,6 +174,12 @@ class labels (multi-output problem). """ _tags = { + # packaging info + # -------------- + "authors": ["mloning", "AyushmaanSeth"], + "maintainers": ["AyushmaanSeth"], + # estimator type + # -------------- "X_inner_mtype": "nested_univ", # nested pd.DataFrame } diff --git a/sktime/classification/ensemble/_weighted.py b/sktime/classification/ensemble/_weighted.py index 89d5d321439..0a658449d86 100644 --- a/sktime/classification/ensemble/_weighted.py +++ b/sktime/classification/ensemble/_weighted.py @@ -85,6 +85,11 @@ class WeightedEnsembleClassifier(_HeterogenousMetaEstimator, BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": "fkiraly", + # estimator type + # -------------- "capability:multivariate": True, "capability:missing_values": True, "capability:predict_proba": True, diff --git a/sktime/classification/feature_based/_catch22_classifier.py b/sktime/classification/feature_based/_catch22_classifier.py index 0aed52be99c..7814c0475db 100644 --- a/sktime/classification/feature_based/_catch22_classifier.py +++ b/sktime/classification/feature_based/_catch22_classifier.py @@ -81,11 +81,17 @@ class Catch22Classifier(_DelegatedClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst", "RavenRudi", "fkiraly"], + "maintainers": ["RavenRudi"], + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multivariate": True, "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "feature", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/feature_based/_fresh_prince.py b/sktime/classification/feature_based/_fresh_prince.py index 4f34c1aaf8c..719e37866ac 100644 --- a/sktime/classification/feature_based/_fresh_prince.py +++ b/sktime/classification/feature_based/_fresh_prince.py @@ -60,13 +60,18 @@ class FreshPRINCE(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst"], + "python_version": "<3.10", + "python_dependencies": "tsfresh", + # estimator type + # -------------- "capability:multivariate": True, "capability:multithreading": True, "capability:train_estimate": True, "capability:predict_proba": True, "classifier_type": "feature", - "python_version": "<3.10", - "python_dependencies": "tsfresh", } def __init__( diff --git a/sktime/classification/feature_based/_matrix_profile_classifier.py b/sktime/classification/feature_based/_matrix_profile_classifier.py index 448720cd470..8180ddd05ee 100644 --- a/sktime/classification/feature_based/_matrix_profile_classifier.py +++ b/sktime/classification/feature_based/_matrix_profile_classifier.py @@ -65,12 +65,17 @@ class MatrixProfileClassifier(BaseClassifier): """ _tags = { - "capability:multithreading": True, - "capability:predict_proba": True, - "classifier_type": "distance", + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst"], # sklearn 1.3.0 has a bug which causes predict_proba to fail # see scikit-learn#26768 and sktime#4778 "python_dependencies": "scikit-learn!=1.3.0", + # estimator type + # -------------- + "capability:multithreading": True, + "capability:predict_proba": True, + "classifier_type": "distance", } def __init__( diff --git a/sktime/classification/feature_based/_random_interval_classifier.py b/sktime/classification/feature_based/_random_interval_classifier.py index b10afc80703..278b0a68659 100644 --- a/sktime/classification/feature_based/_random_interval_classifier.py +++ b/sktime/classification/feature_based/_random_interval_classifier.py @@ -52,6 +52,11 @@ class RandomIntervalClassifier(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst"], + # estimator type + # -------------- "capability:multivariate": True, "capability:multithreading": True, "capability:predict_proba": True, diff --git a/sktime/classification/feature_based/_signature_classifier.py b/sktime/classification/feature_based/_signature_classifier.py index 5ceac9a0a18..56b7b1320db 100644 --- a/sktime/classification/feature_based/_signature_classifier.py +++ b/sktime/classification/feature_based/_signature_classifier.py @@ -90,11 +90,17 @@ class SignatureClassifier(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": "jambo6", + "maintainers": "jambo6", + "python_dependencies": "esig", + "python_version": "<3.10", + # estimator type + # -------------- "capability:multivariate": True, "capability:predict_proba": True, "classifier_type": "feature", - "python_dependencies": "esig", - "python_version": "<3.10", } def __init__( diff --git a/sktime/classification/feature_based/_summary_classifier.py b/sktime/classification/feature_based/_summary_classifier.py index fe7e92a1d6e..992fd38eb63 100644 --- a/sktime/classification/feature_based/_summary_classifier.py +++ b/sktime/classification/feature_based/_summary_classifier.py @@ -64,6 +64,11 @@ class SummaryClassifier(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst"], + # estimator type + # -------------- "capability:multivariate": True, "capability:multithreading": True, "capability:predict_proba": True, diff --git a/sktime/classification/feature_based/_tsfresh_classifier.py b/sktime/classification/feature_based/_tsfresh_classifier.py index 50216d7bd2c..d648d9a3826 100644 --- a/sktime/classification/feature_based/_tsfresh_classifier.py +++ b/sktime/classification/feature_based/_tsfresh_classifier.py @@ -65,12 +65,17 @@ class TSFreshClassifier(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst"], + "python_version": "<3.10", + "python_dependencies": "tsfresh", + # estimator type + # -------------- "capability:multivariate": True, "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "feature", - "python_version": "<3.10", - "python_dependencies": "tsfresh", } def __init__( @@ -149,6 +154,7 @@ def _fit(self, X, y): self._estimator.n_jobs = self._threads_to_use X_t = self._transformer.fit_transform(X, y) + self._Xt_colnames = X_t.columns if X_t.shape[1] == 0: warn( @@ -182,7 +188,9 @@ def _predict(self, X) -> np.ndarray: if self._return_majority_class: return np.full(X.shape[0], self.classes_[self._majority_class]) - return self._estimator.predict(self._transformer.transform(X)) + X_t = self._transformer.transform(X) + X_t = X_t.reindex(self._Xt_colnames, axis=1, fill_value=0) + return self._estimator.predict(X_t) def _predict_proba(self, X) -> np.ndarray: """Predict class probabilities for n instances in X. @@ -207,7 +215,9 @@ def _predict_proba(self, X) -> np.ndarray: return self._estimator.predict_proba(self._transformer.transform(X)) else: dists = np.zeros((X.shape[0], self.n_classes_)) - preds = self._estimator.predict(self._transformer.transform(X)) + X_t = self._transformer.transform(X) + X_t = X_t.reindex(self._Xt_colnames, axis=1, fill_value=0) + preds = self._estimator.predict(X_t) for i in range(0, X.shape[0]): dists[i, self._class_dictionary[preds[i]]] = 1 return dists diff --git a/sktime/classification/hybrid/_hivecote_v1.py b/sktime/classification/hybrid/_hivecote_v1.py index 62ef0e02c4d..2e3d8d01967 100644 --- a/sktime/classification/hybrid/_hivecote_v1.py +++ b/sktime/classification/hybrid/_hivecote_v1.py @@ -94,10 +94,15 @@ class HIVECOTEV1(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": "MatthewMiddlehurst", + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "hybrid", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/hybrid/_hivecote_v2.py b/sktime/classification/hybrid/_hivecote_v2.py index 9ac2e493575..4f96717fe72 100644 --- a/sktime/classification/hybrid/_hivecote_v2.py +++ b/sktime/classification/hybrid/_hivecote_v2.py @@ -90,12 +90,17 @@ class HIVECOTEV2(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": "MatthewMiddlehurst", + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multivariate": True, "capability:contractable": True, "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "hybrid", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/interval_based/_cif.py b/sktime/classification/interval_based/_cif.py index 53f7c59b402..956e448832b 100644 --- a/sktime/classification/interval_based/_cif.py +++ b/sktime/classification/interval_based/_cif.py @@ -117,11 +117,16 @@ class CanonicalIntervalForest(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": "MatthewMiddlehurst", + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multivariate": True, "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "interval", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/interval_based/_drcif.py b/sktime/classification/interval_based/_drcif.py index 1e39cd14720..b6afdd16184 100644 --- a/sktime/classification/interval_based/_drcif.py +++ b/sktime/classification/interval_based/_drcif.py @@ -141,13 +141,18 @@ class DrCIF(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": "MatthewMiddlehurst", + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multivariate": True, "capability:train_estimate": True, "capability:contractable": True, "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "interval", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/interval_based/_rise.py b/sktime/classification/interval_based/_rise.py index 197dd986401..6eb3891eaed 100644 --- a/sktime/classification/interval_based/_rise.py +++ b/sktime/classification/interval_based/_rise.py @@ -177,10 +177,15 @@ class RandomIntervalSpectralEnsemble(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": "TonyBagnall", + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "interval", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/interval_based/_stsf.py b/sktime/classification/interval_based/_stsf.py index f2e08709ce6..406511ea57c 100644 --- a/sktime/classification/interval_based/_stsf.py +++ b/sktime/classification/interval_based/_stsf.py @@ -86,6 +86,11 @@ class SupervisedTimeSeriesForest(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": "MatthewMiddlehurst", + # estimator type + # -------------- "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "interval", @@ -205,7 +210,7 @@ def _predict_proba(self, X) -> np.ndarray: Returns ------- - output : nd.array of shape = (n_instances, n_classes) + output : np.ndarray of shape = (n_instances, n_classes) Predicted probabilities """ X = X.squeeze(1) diff --git a/sktime/classification/interval_based/_tsf.py b/sktime/classification/interval_based/_tsf.py index b5a474a5005..d8706c30331 100644 --- a/sktime/classification/interval_based/_tsf.py +++ b/sktime/classification/interval_based/_tsf.py @@ -6,16 +6,16 @@ __author__ = ["kkoziara", "luiszugasti", "kanand77"] __all__ = ["TimeSeriesForestClassifier"] +from typing import Optional + import numpy as np +import pandas as pd from joblib import Parallel, delayed from sklearn.ensemble._forest import ForestClassifier from sklearn.tree import DecisionTreeClassifier +from sktime.base._panel.forest._tsf import BaseTimeSeriesForest, _transform from sktime.classification.base import BaseClassifier -from sktime.series_as_features.base.estimators.interval_based import ( - BaseTimeSeriesForest, -) -from sktime.series_as_features.base.estimators.interval_based._tsf import _transform class TimeSeriesForestClassifier( @@ -26,19 +26,30 @@ class TimeSeriesForestClassifier( A time series forest is an ensemble of decision trees built on random intervals. Overview: Input n series length m. For each tree - - sample sqrt(m) intervals, - - find mean, std and slope for each interval, concatenate to form new - data set, - - build decision tree on new data set. + + - sample sqrt(m) intervals, + - find mean, std and slope for each interval, concatenate to form new + data set, if inner series length is set, then intervals are sampled + within bins of length inner_series_length. + - build decision tree on new data set. + Ensemble the trees with averaged probability estimates. This implementation deviates from the original in minor ways. It samples intervals with replacement and does not use the splitting criteria tiny refinement described in [1]. - This is an intentionally stripped down, non - configurable version for use as a hive-cote component. For a configurable - tree based ensemble, see sktime.classifiers.ensemble.TimeSeriesForestClassifier + This classifier is intentionally written with low configurability, + for performace reasons. + + * for a more configurable tree based ensemble, + use ``sktime.classication.ensemble.ComposableTimeSeriesForestClassifier``, + which also allows switching the base estimator. + * to build a a time series forest with configurable ensembling, base estimator, + and/or feature extraction, fully from composable blocks, + combine ``sktime.classication.ensemble.BaggingClassifier`` with + any classifier pipeline, e.g., pipelining any ``sklearn`` classifier + with any time series feature extraction, e.g., ``Summarizer`` Parameters ---------- @@ -49,6 +60,10 @@ class TimeSeriesForestClassifier( n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. + inner_series_length: int, default=None + The maximum length of unique segments within X from which we extract + intervals is determined. This helps prevent the extraction of + intervals that span across distinct inner series. random_state : int or None, default=None Seed for random number generation. @@ -58,6 +73,10 @@ class TimeSeriesForestClassifier( The number of classes. classes_ : list The classes labels. + feature_importances_ : pandas Dataframe of shape (series_length, 3) + The feature temporal importances for each feature type (mean, std, slope). + It shows how much each time point of your input dataset, through the + feature types extracted (mean, std, slope), contributed to the predictions. Notes ----- @@ -82,22 +101,36 @@ class TimeSeriesForestClassifier( >>> y_pred = clf.predict(X_test) """ + _feature_types = ["mean", "std", "slope"] _base_estimator = DecisionTreeClassifier(criterion="entropy") - _tags = {"capability:predict_proba": True} + _tags = { + # packaging info + # -------------- + "authors": ["kkoziara", "luiszugasti", "kanand77"], + "maintainers": ["kkoziara", "luiszugasti", "kanand77"], + # estimator type + # -------------- + "capability:predict_proba": True, + } def __init__( self, min_interval=3, n_estimators=200, + inner_series_length: Optional[int] = None, n_jobs=1, random_state=None, ): + self.criterion = "gini" # needed for BaseForest in sklearn > 1.4.0, + # because sklearn tag logic looks at this attribute + super().__init__( min_interval=min_interval, n_estimators=n_estimators, n_jobs=n_jobs, random_state=random_state, + inner_series_length=inner_series_length, ) BaseClassifier.__init__(self) @@ -155,7 +188,7 @@ def _predict_proba(self, X) -> np.ndarray: Returns ------- - output : nd.array of shape = (n_instances, n_classes) + output : np.ndarray of shape = (n_instances, n_classes) Predicted probabilities """ X = X.squeeze(1) @@ -203,6 +236,91 @@ def get_test_params(cls, parameter_set="default"): else: return {"n_estimators": 2} + def _extract_feature_importance_by_feature_type_per_tree( + self, tree_feature_importance: np.array, feature_type: str + ) -> np.array: + """Return feature importance. + + Extracting the feature importance corresponding from a feature type + (eg. "mean", "std", "slope") from tree feature importance + + Parameters + ---------- + tree_feature_importance : array-like of shape (n_features_in,) + The feature importance per feature in an estimator, n_intervals x number + of feature types + feature_type : str + feature type belonging to self.feature_types + + Returns + ------- + self : array-like of shape (n_intervals,) + Feature importance corresponding from a feature type. + """ + feature_index = np.argwhere( + [ + feature_type == feature_type_recorded + for feature_type_recorded in self._feature_types + ] + )[0, 0] + + feature_type_feature_importance = tree_feature_importance[ + [ + interval_index + feature_index + for interval_index in range( + 0, len(tree_feature_importance), len(self._feature_types) + ) + ] + ] + + return feature_type_feature_importance + + @property + def feature_importances_(self, **kwargs) -> pd.DataFrame: + """Return the temporal feature importances. + + There is an implementation of temporal feature importance in + BaseTimeSeriesForest in sktime.base._panel.forest._composable + but TimeseriesForestClassifier is inheriting from + sktime.base._panel.forest._tsf.py + which does not have feature_importance_. + + Other feature importance methods implementation: + >>> from sktime.base._panel.forest._composable import BaseTimeSeriesForest + + Returns + ------- + feature_importances_ : pandas Dataframe of shape (series_length, 3) + The feature importances for each feature type (mean, std, slope). + """ + all_importances_per_feature = { + _feature_type: np.zeros(self.series_length) + for _feature_type in self._feature_types + } + + for tree_index in range(self.n_estimators): + tree = self.estimators_[tree_index] + tree_importances = tree.feature_importances_ + tree_intervals = self.intervals_[tree_index] + for feature_type in self._feature_types: + feature_type_importances = ( + self._extract_feature_importance_by_feature_type_per_tree( + tree_importances, feature_type + ) + ) + for interval_index in range(self.n_intervals): + interval = tree_intervals[interval_index] + all_importances_per_feature[feature_type][ + interval[0] : interval[1] + ] += feature_type_importances[interval_index] + + temporal_feature_importance = ( + pd.DataFrame(all_importances_per_feature) + / self.n_estimators + / self.n_intervals + ) + return temporal_feature_importance + def _predict_single_classifier_proba(X, estimator, intervals): """Find probability estimates for each class for all cases in X.""" diff --git a/sktime/classification/interval_based/tests/test_tsf.py b/sktime/classification/interval_based/tests/test_tsf.py new file mode 100644 index 00000000000..8d390f394d9 --- /dev/null +++ b/sktime/classification/interval_based/tests/test_tsf.py @@ -0,0 +1,151 @@ +"""Tests for feature importance in time series forests.""" +from unittest.mock import MagicMock, patch + +import numpy as np +import pandas as pd +import pytest + +from sktime.classification.interval_based import TimeSeriesForestClassifier +from sktime.utils._testing.panel import make_classification_problem + +TESTED_MODULE = "sktime.classification.interval_based._tsf" + +X_train, y_train = make_classification_problem() + + +@patch( + f"{TESTED_MODULE}.TimeSeriesForestClassifier." + f"_extract_feature_importance_by_feature_type_per_tree" +) +@pytest.mark.parametrize("n_estimators", [2, 5]) +def test_time_series_forest_classifier_feature_importance( + extract_feature_importance_of_feature_mock: MagicMock, + n_estimators: int, +): + """Test TimeSeriesForestClassifier feature importance.""" + # Given + given_n_intervals = 2 + given_n_estimators = n_estimators + given_time_series_forest_classifier = TimeSeriesForestClassifier( + n_estimators=given_n_estimators + ) + + given_time_series_forest_classifier.series_length = 20 + given_time_series_forest_classifier.estimators_ = MagicMock() + given_time_series_forest_classifier.intervals_ = np.array( + [[[0, 9], [15, 20]]] * given_n_estimators + ) + given_time_series_forest_classifier.n_intervals = given_n_intervals + + extract_feature_importance_of_feature_mock.return_value = np.ones( + given_time_series_forest_classifier.n_intervals + ) + + # When + fi = given_time_series_forest_classifier.feature_importances_ + + # Then + expected_fi = pd.DataFrame( + { + "mean": [ + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0, + 0, + 0, + 0, + 0, + 0, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + ], + "std": [ + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0, + 0, + 0, + 0, + 0, + 0, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + ], + "slope": [ + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + 0, + 0, + 0, + 0, + 0, + 0, + 0.5, + 0.5, + 0.5, + 0.5, + 0.5, + ], + } + ) + pd.testing.assert_frame_equal(expected_fi, fi) + + +@pytest.mark.parametrize("number_of_intervals", [2, 5]) +@pytest.mark.parametrize("feature_type", ["mean", "std", "slope"]) +def test__extract_feature_importance_by_feature_type_per_tree( + number_of_intervals: int, feature_type: str +): + """Test TimeSeriesForestClassifier feature type feature importance.""" + # given + given_number_of_intervals = number_of_intervals + given_tree_feature_importance = np.array([1, 2, 3] * given_number_of_intervals) + given_feature_type = feature_type + given_tsf_classifier = TimeSeriesForestClassifier() + + # When + feature_importance_of_feature_type_from_tree_feature_importance = ( + given_tsf_classifier._extract_feature_importance_by_feature_type_per_tree( + given_tree_feature_importance, given_feature_type + ) + ) + + # Then + expected_feature_importance_of_feature_type_from_tree_feature_importance = { + "mean": np.ones(given_number_of_intervals) * 1, + "std": np.ones(given_number_of_intervals) * 2, + "slope": np.ones(given_number_of_intervals) * 3, + } + np.testing.assert_array_equal( + expected_feature_importance_of_feature_type_from_tree_feature_importance[ + given_feature_type + ], + feature_importance_of_feature_type_from_tree_feature_importance, + ) diff --git a/sktime/classification/kernel_based/_arsenal.py b/sktime/classification/kernel_based/_arsenal.py index 8cb144db01b..3d1d1e6c38f 100644 --- a/sktime/classification/kernel_based/_arsenal.py +++ b/sktime/classification/kernel_based/_arsenal.py @@ -111,13 +111,19 @@ class Arsenal(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst", "kachayev"], + "maintainers": ["kachayev"], + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multivariate": True, "capability:train_estimate": True, "capability:contractable": True, "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "kernel", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/kernel_based/_rocket_classifier.py b/sktime/classification/kernel_based/_rocket_classifier.py index fb5a265de82..bc53983317d 100644 --- a/sktime/classification/kernel_based/_rocket_classifier.py +++ b/sktime/classification/kernel_based/_rocket_classifier.py @@ -30,15 +30,17 @@ class RocketClassifier(_DelegatedClassifier): Shorthand for the pipeline `rocket * StandardScaler(with_mean=False) * RidgeClassifierCV(alphas)` where `alphas = np.logspace(-3, 3, 10)`, and - where `rocket` depends on params `rocket_transform`, `use_multivariate` as follows: - - | rocket_transform | `use_multivariate` | rocket (class) | - |------------------|--------------------|-------------------------| - | "rocket" | any | Rocket | - | "minirocket" | "yes | MiniRocketMultivariate | - | "minirocket" | "no" | MiniRocket | - | "multirocket" | "yes" | MultiRocketMultivariate | - | "multirocket" | "no" | MultiRocket | + where `rocket` depends on params `rocket_transform`, `use_multivariate` as follows + + ================ ================== ======================= + rocket_transform `use_multivariate` rocket (class) + ================ ================== ======================= + "rocket" any Rocket + "minirocket" "yes" MiniRocketMultivariate + "minirocket" "no" MiniRocket + "multirocket" "yes" MultiRocketMultivariate + "multirocket" "no" MultiRocket + ================ ================== ======================= classes are sktime classes, other parameters are passed on to the rocket class. @@ -105,10 +107,16 @@ class RocketClassifier(_DelegatedClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst", "victordremov", "fkiraly"], + "maintainers": ["victordremov"], + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multivariate": True, "capability:multithreading": True, "classifier_type": "kernel", - "python_dependencies": "numba", } # valid rocket strings for input validity checking diff --git a/sktime/classification/kernel_based/_svc.py b/sktime/classification/kernel_based/_svc.py index 13ca6268c7b..3b2ff31ed13 100644 --- a/sktime/classification/kernel_based/_svc.py +++ b/sktime/classification/kernel_based/_svc.py @@ -104,6 +104,11 @@ class TimeSeriesSVC(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": "fkiraly", + # estimator type + # -------------- "capability:multivariate": True, "capability:unequal_length": True, "capability:missing_values": True, diff --git a/sktime/classification/kernel_based/tests/test_arsenal.py b/sktime/classification/kernel_based/tests/test_arsenal.py index 2017cf556f1..0281a9a6341 100644 --- a/sktime/classification/kernel_based/tests/test_arsenal.py +++ b/sktime/classification/kernel_based/tests/test_arsenal.py @@ -24,4 +24,4 @@ def test_contracted_arsenal(): ) arsenal.fit(X_train, y_train) - assert len(arsenal.estimators_) > 1 + assert len(arsenal.estimators_) >= 1 diff --git a/sktime/classification/model_selection/__init__.py b/sktime/classification/model_selection/__init__.py new file mode 100644 index 00000000000..72343b457b4 --- /dev/null +++ b/sktime/classification/model_selection/__init__.py @@ -0,0 +1,6 @@ +"""Tuning of time series classifiers.""" +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) + +from sktime.classification.model_selection._tune import TSCGridSearchCV + +__all__ = ["TSCGridSearchCV"] diff --git a/sktime/classification/model_selection/_tune.py b/sktime/classification/model_selection/_tune.py new file mode 100644 index 00000000000..bd723bd064e --- /dev/null +++ b/sktime/classification/model_selection/_tune.py @@ -0,0 +1,447 @@ +"""Tuning for time series classifiers.""" + +__author__ = ["fkiraly", "achieveordie"] + +import numpy as np +from sklearn.model_selection import GridSearchCV + +from sktime.classification._delegate import _DelegatedClassifier + + +class TSCGridSearchCV(_DelegatedClassifier): + """Exhaustive search over specified parameter values for an estimator. + + Adapts sklearn GridSearchCV for sktime time series classifiers + + Optimizes hyper-parameters of `estimators` by exhaustive grid search. + + Parameters + ---------- + estimator : estimator object + This is assumed to implement the scikit-learn estimator interface. + Either estimator needs to provide a ``score`` function, + or ``scoring`` must be passed. + + param_grid : dict or list of dictionaries + Dictionary with parameters names (`str`) as keys and lists of + parameter settings to try as values, or a list of such + dictionaries, in which case the grids spanned by each dictionary + in the list are explored. This enables searching over any sequence + of parameter settings. + + scoring : str, callable, list, tuple or dict, default=None + Strategy to evaluate the performance of the cross-validated model on + the test set. + + If `scoring` represents a single score, one can use: + + - a single string (see :ref:`scoring_parameter`); + - a callable (see :ref:`scoring`) that returns a single value. + + If `scoring` represents multiple scores, one can use: + + - a list or tuple of unique strings; + - a callable returning a dictionary where the keys are the metric + names and the values are the metric scores; + - a dictionary with metric names as keys and callables a values. + + n_jobs : int, default=None + Number of jobs to run in parallel. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See :term:`Glossary <n_jobs>` + for more details. + + refit : bool, str, or callable, default=True + Refit an estimator using the best found parameters on the whole + dataset. If ``False``, the ``predict`` and ``predict_probab`` will not work. + + For multiple metric evaluation, this needs to be a `str` denoting the + scorer that would be used to find the best parameters for refitting + the estimator at the end. + + Where there are considerations other than maximum score in + choosing a best estimator, ``refit`` can be set to a function which + returns the selected ``best_index_`` given ``cv_results_``. In that + case, the ``best_estimator_`` and ``best_params_`` will be set + according to the returned ``best_index_`` while the ``best_score_`` + attribute will not be available. + + The refitted estimator is made available at the ``best_estimator_`` + attribute and permits using ``predict`` directly on this + ``GridSearchCV`` instance. + + Also for multiple metric evaluation, the attributes ``best_index_``, + ``best_score_`` and ``best_params_`` will only be available if + ``refit`` is set and all of them will be determined w.r.t this specific + scorer. + + See ``scoring`` parameter to know more about multiple metric + evaluation. + + cv : int, cross-validation generator or an iterable, default=None + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 5-fold cross validation, + - integer, to specify the number of folds in a `(Stratified)KFold`, + - :term:`CV splitter`, + - An iterable yielding (train, test) splits as arrays of indices. + + For integer/None inputs, if the estimator is a classifier and ``y`` is + either binary or multiclass, :class:`StratifiedKFold` is used. In all + other cases, :class:`KFold` is used. These splitters are instantiated + with `shuffle=False` so the splits will be the same across calls. + + Refer :ref:`User Guide <cross_validation>` for the various + cross-validation strategies that can be used here. + + verbose : int + Controls the verbosity: the higher, the more messages. + + - >1 : the computation time for each fold and parameter candidate is + displayed; + - >2 : the score is also displayed; + - >3 : the fold and candidate parameter indexes are also displayed + together with the starting time of the computation. + + pre_dispatch : int, or str, default='2*n_jobs' + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - None, in which case all the jobs are immediately + created and spawned. Use this for lightweight and + fast-running jobs, to avoid delays due to on-demand + spawning of the jobs + + - An int, giving the exact number of total jobs that are + spawned + + - A str, giving an expression as a function of n_jobs, + as in '2*n_jobs' + + error_score : 'raise' or numeric, default=np.nan + Value to assign to the score if an error occurs in estimator fitting. + If set to 'raise', the error is raised. If a numeric value is given, + FitFailedWarning is raised. This parameter does not affect the refit + step, which will always raise the error. + + return_train_score : bool, default=False + If ``False``, the ``cv_results_`` attribute will not include training + scores. + Computing training scores is used to get insights on how different + parameter settings impact the overfitting/underfitting trade-off. + However computing the scores on the training set can be computationally + expensive and is not strictly required to select the parameters that + yield the best generalization performance. + + tune_by_variable : bool, optional (default=False) + Whether to tune parameter by each time series variable separately, + in case of multivariate data passed to the tuning estimator. + Only applies if time series passed are strictly multivariate. + If True, clones of the estimator will be fit to each variable separately, + and are available in fields of the classifiers_ attribute. + Has the same effect as applying ColumnEnsembleClassifier wrapper to self. + If False, the same best parameter is selected for all variables. + + Attributes + ---------- + cv_results_ : dict of numpy (masked) ndarrays + A dict with keys as column headers and values as columns, that can be + imported into a pandas ``DataFrame``. + + For multi-metric evaluation, the scores for all the scorers are + available in the ``cv_results_`` dict at the keys ending with that + scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` shown + above. ('split0_test_precision', 'mean_train_precision' etc.) + + best_estimator_ : estimator + Estimator that was chosen by the search, i.e. estimator + which gave highest score (or smallest loss if specified) + on the left out data. Not available if ``refit=False``. + + See ``refit`` parameter for more information on allowed values. + + best_score_ : float + Mean cross-validated score of the best_estimator + + For multi-metric evaluation, this is present only if ``refit`` is + specified. + + This attribute is not available if ``refit`` is a function. + + best_params_ : dict + Parameter setting that gave the best results on the hold out data. + + For multi-metric evaluation, this is present only if ``refit`` is + specified. + + best_index_ : int + The index (of the ``cv_results_`` arrays) which corresponds to the best + candidate parameter setting. + + The dict at ``search.cv_results_['params'][search.best_index_]`` gives + the parameter setting for the best model, that gives the highest + mean score (``search.best_score_``). + + For multi-metric evaluation, this is present only if ``refit`` is + specified. + + scorer_ : function or a dict + Scorer function used on the held out data to choose the best + parameters for the model. + + For multi-metric evaluation, this attribute holds the validated + ``scoring`` dict which maps the scorer key to the scorer callable. + + n_splits_ : int + The number of cross-validation splits (folds/iterations). + + refit_time_ : float + Seconds used for refitting the best model on the whole dataset. + + This is present only if ``refit`` is not False. + + multimetric_ : bool + Whether or not the scorers compute several metrics. + + classes_ : ndarray of shape (n_classes,) + The classes labels. This is present only if ``refit`` is specified and + the underlying estimator is a classifier. + + n_features_in_ : int + Number of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes + `n_features_in_` when fit. + + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if + `best_estimator_` is defined (see the documentation for the `refit` + parameter for more details) and that `best_estimator_` exposes + `feature_names_in_` when fit. + + See Also + -------- + ParameterGrid : Generates all the combinations of a hyperparameter grid. + train_test_split : Utility function to split the data into a development + set usable for fitting a GridSearchCV instance and an evaluation set + for its final evaluation. + sklearn.metrics.make_scorer : Make a scorer from a performance metric or + loss function. + """ + + _tags = { + # packaging info + # -------------- + "authors": ["fkiraly", "achieveordie"], + # estimator type + # -------------- + "X_inner_mtype": ["nested_univ", "numpy3D"], + "y_inner_mtype": ["numpy2D"], + "capability:multivariate": True, + "capability:multioutput": True, + "capability:unequal_length": True, + "capability:missing_values": True, + "capability:multithreading": True, + "capability:predict_proba": True, + } + + def __init__( + self, + estimator, + param_grid, + scoring=None, + n_jobs=None, + refit=True, + cv=None, + verbose=0, + pre_dispatch="2*n_jobs", + error_score=np.nan, + return_train_score=False, + tune_by_variable=False, + ): + self.estimator = estimator + self.param_grid = param_grid + self.scoring = scoring + self.n_jobs = n_jobs + self.refit = refit + self.cv = cv + self.verbose = verbose + self.pre_dispatch = pre_dispatch + self.error_score = error_score + self.return_train_score = return_train_score + self.tune_by_variable = tune_by_variable + + super().__init__() + + DELEGATED_PARAMS = [ + "estimator", + "param_grid", + "scoring", + "n_jobs", + "refit", + "cv", + "verbose", + "pre_dispatch", + "error_score", + "return_train_score", + ] + + gcsvargs = {k: getattr(self, k) for k in DELEGATED_PARAMS} + + self.estimator_ = GridSearchCV(**gcsvargs) + + if self.tune_by_variable: + self.set_tags(**{"capability:multioutput": False}) + + def _fit(self, X, y): + """Fit time series classifier to training data. + + private _fit containing the core logic, called from fit + + Writes to self: + Sets fitted model attributes ending in "_". + + Parameters + ---------- + X : guaranteed to be of a type in self.get_tag("X_inner_mtype") + if self.get_tag("X_inner_mtype") = "numpy3D": + 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] + if self.get_tag("X_inner_mtype") = "pd-multiindex:": + pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices + for list of other mtypes, see datatypes.SCITYPE_REGISTER + for specifications, see examples/AA_datatypes_and_datasets.ipynb + y : guaranteed to be of a type in self.get_tag("y_inner_mtype") + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + class labels for fitting + if self.get_tag("capaility:multioutput") = False, guaranteed to be 1D + if self.get_tag("capaility:multioutput") = True, guaranteed to be 2D + + Returns + ------- + self : Reference to self. + """ + if y.shape[1] == 1: + y = y.flatten() + + estimator = self._get_delegate() + estimator.fit(X=X, y=y) + + fitted_param_names = [ + "cv_results_", + "best_estimator_", + "best_score_", + "best_params_", + "best_index_", + "scorer_", + "n_splits_", + "refit_time_", + "multimetric_", + "classes_", + ] + + for p in fitted_param_names: + if hasattr(estimator, p): + val = getattr(estimator, p) + setattr(self, p, val) + + return self + + def _predict(self, X): + """Predict labels for sequences in X. + + private _predict containing the core logic, called from predict + + State required: + Requires state to be "fitted". + + Accesses in self: + Fitted model attributes ending in "_" + + Parameters + ---------- + X : guaranteed to be of a type in self.get_tag("X_inner_mtype") + if self.get_tag("X_inner_mtype") = "numpy3D": + 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] + if self.get_tag("X_inner_mtype") = "nested_univ": + pd.DataFrame with each column a dimension, each cell a pd.Series + for list of other mtypes, see datatypes.SCITYPE_REGISTER + for specifications, see examples/AA_datatypes_and_datasets.ipynb + + Returns + ------- + y : 1D np.array of int, of shape [n_instances] - predicted class labels + indices correspond to instance indices in X + """ + estimator = self._get_delegate() + y_pred = estimator.predict(X=X) + if y_pred.ndim == 1: + y_pred = y_pred.reshape(-1, 1) + return y_pred + + # the delegate is an sklearn estimator and it does not have get_fitted_params + # therefore we have to override _get_fitted_params from the delegator, + # which would otherwise call it + def _get_fitted_params(self): + """Get fitted parameters. + + private _get_fitted_params, called from get_fitted_params + + State required: + Requires state to be "fitted". + + Returns + ------- + fitted_params : dict with str keys + fitted parameters, keyed by names of fitted parameter + """ + return {} + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + For classifiers, a "default" set of parameters should be provided for + general testing, and a "results_comparison" set for comparing against + previously recorded results if the general set does not produce suitable + probabilities to compare against. + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class. + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params`. + """ + from sklearn.gaussian_process.kernels import RBF, DotProduct + from sklearn.metrics import accuracy_score + + from sktime.classification.kernel_based import TimeSeriesSVC + from sktime.dists_kernels import AggrDist + + mean_eucl_tskernel = AggrDist(DotProduct()) + mean_rbf_tskernel = AggrDist(RBF()) + + param1 = { + "estimator": TimeSeriesSVC(kernel=mean_rbf_tskernel, probability=True), + "param_grid": {"C": [0.1, 1]}, + } + + param2 = { + "estimator": TimeSeriesSVC(kernel=mean_eucl_tskernel, probability=True), + "param_grid": {"kernel__transformer": [DotProduct(), RBF()]}, + "scoring": accuracy_score, + } + + return [param1, param2] diff --git a/sktime/classification/shapelet_based/_mrseql.py b/sktime/classification/shapelet_based/_mrseql.py index 4c93c309d56..9fa11e70cdf 100644 --- a/sktime/classification/shapelet_based/_mrseql.py +++ b/sktime/classification/shapelet_based/_mrseql.py @@ -38,9 +38,15 @@ class MrSEQL(_DelegatedClassifier): """ _tags = { - "X_inner_mtype": "nested_univ", + # packaging info + # -------------- + "authors": ["lnthach", "heerme", "fkiraly"], + "maintainers": ["lnthach", "heerme", "fkiraly"], "python_dependencies": "mrseql", "requires_cython": True, + # estimator type + # -------------- + "X_inner_mtype": "nested_univ", } def __init__( diff --git a/sktime/classification/shapelet_based/_mrsqm.py b/sktime/classification/shapelet_based/_mrsqm.py index e9b8f17f722..038a4aaef2d 100644 --- a/sktime/classification/shapelet_based/_mrsqm.py +++ b/sktime/classification/shapelet_based/_mrsqm.py @@ -30,8 +30,6 @@ class MrSQM(_DelegatedClassifier): number of representations produced by sax transformation. nsfa : int, default=0 number of representations produced by sfa transformation. - WARNING: setting this to 1 or larger will break estimator persistence (save), - known bug, see https://github.com/mlgig/mrsqm/issues/7 custom_config : dict, default=None customized parameters for the symbolic transformation. random_state : int, default=None. @@ -50,9 +48,15 @@ class MrSQM(_DelegatedClassifier): """ _tags = { - "X_inner_mtype": "nested_univ", + # packaging info + # -------------- + "authors": ["lnthach", "heerme", "fkiraly"], + "maintainers": ["lnthach", "heerme", "fkiraly"], "python_dependencies": "mrsqm", "requires_cython": True, + # estimator type + # -------------- + "X_inner_mtype": "nested_univ", } def __init__( @@ -124,15 +128,12 @@ def get_test_params(cls, parameter_set="default"): """ params1 = {} - # known problem: nsfa > 0 causes estimator to be non-pickleable - # see https://github.com/mlgig/mrsqm/issues/7 - # fix this problem once the pickling issue is resolved params2 = { "strat": "SR", "features_per_rep": 200, "selection_per_rep": 1000, "nsax": 2, - "nsfa": 0, + "nsfa": 1, "sfa_norm": False, } diff --git a/sktime/classification/shapelet_based/_stc.py b/sktime/classification/shapelet_based/_stc.py index c1bfa8a9007..af6b3306a8a 100644 --- a/sktime/classification/shapelet_based/_stc.py +++ b/sktime/classification/shapelet_based/_stc.py @@ -127,13 +127,18 @@ class ShapeletTransformClassifier(BaseClassifier): """ _tags = { + # packaging info + # -------------- + "authors": ["TonyBagnall", "MatthewMiddlehurst"], + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multivariate": True, "capability:train_estimate": True, "capability:contractable": True, "capability:multithreading": True, "capability:predict_proba": True, "classifier_type": "shapelet", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/classification/tests/_classification_test_reproduction.py b/sktime/classification/tests/_classification_test_reproduction.py index a552b7df614..be3bb3e64b4 100644 --- a/sktime/classification/tests/_classification_test_reproduction.py +++ b/sktime/classification/tests/_classification_test_reproduction.py @@ -14,10 +14,7 @@ ProximityForest, ShapeDTW, ) -from sktime.classification.early_classification import ( - TEASER, - ProbabilityThresholdEarlyClassifier, -) +from sktime.classification.early_classification import TEASER from sktime.classification.feature_based import ( Catch22Classifier, MatrixProfileClassifier, diff --git a/sktime/classification/tests/test_all_classifiers.py b/sktime/classification/tests/test_all_classifiers.py index d84533eace4..173edc08ac2 100644 --- a/sktime/classification/tests/test_all_classifiers.py +++ b/sktime/classification/tests/test_all_classifiers.py @@ -4,6 +4,7 @@ import numpy as np +import pandas as pd import pytest from sktime.classification.tests._expected_outputs import ( @@ -207,7 +208,30 @@ def test_handles_single_class(self, estimator_instance): X, y = make_classification_problem() y[:] = 42 - error_msg = "single class label" + error_msg = "single label" with pytest.warns(UserWarning, match=error_msg): estimator_instance.fit(X, y) + + def test_multioutput(self, estimator_instance): + """Test multioutput classification for all classifiers. + + All classifiers should follow the same interface, + those that do not genuinely should vectorize/broadcast over y. + """ + n_instances = 20 + X, y = make_classification_problem(n_instances=n_instances) + y_mult = pd.DataFrame({"a": y, "b": y}) + + estimator_instance.fit(X, y_mult) + y_pred = estimator_instance.predict(X) + + assert isinstance(y_pred, pd.DataFrame) + assert y_pred.shape == y_mult.shape + + # the estimator vectorizes iff it does not have the multioutput capability + vectorized = not estimator_instance.get_tag("capability:multioutput") + if vectorized: + assert hasattr(estimator_instance, "classifiers_") + assert isinstance(estimator_instance.classifiers_, pd.DataFrame) + assert estimator_instance.classifiers_.shape == (1, 2) diff --git a/sktime/classification/tests/test_base.py b/sktime/classification/tests/test_base.py index caf5b1b5fbf..33d0409f1e6 100644 --- a/sktime/classification/tests/test_base.py +++ b/sktime/classification/tests/test_base.py @@ -120,8 +120,6 @@ def _predict_proba(self, X): multivariate_message = r"multivariate series" missing_message = r"missing values" unequal_message = r"unequal length series" -incorrect_X_data_structure = r"must be a np.array or a pd.Series" -incorrect_y_data_structure = r"must be 1-dimensional" def test_base_classifier_fit(): @@ -144,8 +142,11 @@ def test_base_classifier_fit(): test_X3 = _create_example_dataframe(cases=cases, dimensions=1, length=length) test_X4 = _create_example_dataframe(cases=cases, dimensions=3, length=length) test_y1 = np.random.randint(0, 2, size=(cases)) + test_y2 = pd.DataFrame({"0": [1] * cases, "1": [0] * cases}) result = dummy.fit(test_X1, test_y1) assert result is dummy + result = dummy.fit(test_X3, test_y2) + assert result is dummy with pytest.raises(ValueError, match=multivariate_message): result = dummy.fit(test_X2, test_y1) assert result is dummy @@ -154,15 +155,6 @@ def test_base_classifier_fit(): with pytest.raises(ValueError, match=multivariate_message): result = dummy.fit(test_X4, test_y1) assert result is dummy - # Raise a specific error if y is in a 2D matrix (1,cases) - test_y2 = np.array([test_y1]) - # What if y is in a 2D matrix (cases,1)? - test_y2 = np.array([test_y1]).transpose() - with pytest.raises(ValueError, match=incorrect_y_data_structure): - result = dummy.fit(test_X1, test_y2) - # Pass a data fram - with pytest.raises(ValueError, match=incorrect_X_data_structure): - result = dummy.fit(test_X1, test_X3) TF = [True, False] @@ -178,36 +170,41 @@ def test_check_capabilities(missing, multivariate, unequal): handle it and that cannot. Obvs could loop, but I think its clearer to just explicitly test; """ + X_metadata = { + "has_nans": missing, + "is_univariate": not multivariate, + "is_equal_length": not unequal, + } handles_none = _DummyClassifier() handles_none_composite = _DummyComposite(_DummyClassifier()) # checks that errors are raised if missing: with pytest.raises(ValueError, match=missing_message): - handles_none._check_capabilities(missing, multivariate, unequal) + handles_none._check_capabilities(X_metadata) if multivariate: with pytest.raises(ValueError, match=multivariate_message): - handles_none._check_capabilities(missing, multivariate, unequal) + handles_none._check_capabilities(X_metadata) if unequal: with pytest.raises(ValueError, match=unequal_message): - handles_none._check_capabilities(missing, multivariate, unequal) + handles_none._check_capabilities(X_metadata) if not missing and not multivariate and not unequal: - handles_none._check_capabilities(missing, multivariate, unequal) + handles_none._check_capabilities(X_metadata) if missing: with pytest.warns(UserWarning, match=missing_message): - handles_none_composite._check_capabilities(missing, multivariate, unequal) + handles_none_composite._check_capabilities(X_metadata) if multivariate: with pytest.warns(UserWarning, match=multivariate_message): - handles_none_composite._check_capabilities(missing, multivariate, unequal) + handles_none_composite._check_capabilities(X_metadata) if unequal: with pytest.warns(UserWarning, match=unequal_message): - handles_none_composite._check_capabilities(missing, multivariate, unequal) + handles_none_composite._check_capabilities(X_metadata) if not missing and not multivariate and not unequal: - handles_none_composite._check_capabilities(missing, multivariate, unequal) + handles_none_composite._check_capabilities(X_metadata) handles_all = _DummyHandlesAllInput() - handles_all._check_capabilities(missing, multivariate, unequal) + handles_all._check_capabilities(X_metadata) def test_convert_input(): @@ -228,26 +225,26 @@ def _internal_convert(X, y=None): test_X1 = np.random.uniform(-1, 1, size=(cases, length)) test_X2 = np.random.uniform(-1, 1, size=(cases, 2, length)) tester = _DummyClassifier() - tempX = tester._convert_X(test_X2) + tempX = tester._convert_X(test_X2, "numpy3D") assert tempX.shape[0] == cases and tempX.shape[1] == 2 and tempX.shape[2] == length instance_list = [] for _ in range(0, cases): instance_list.append(pd.Series(np.random.randn(10))) test_X3 = _create_example_dataframe(cases=cases, dimensions=1, length=length) test_X4 = _create_example_dataframe(cases=cases, dimensions=3, length=length) - tempX = tester._convert_X(test_X3) + tempX = tester._convert_X(test_X3, "nested_univ") assert tempX.shape[0] == cases and tempX.shape[1] == 1 and tempX.shape[2] == length - tempX = tester._convert_X(test_X4) + tempX = tester._convert_X(test_X4, "nested_univ") assert tempX.shape[0] == cases and tempX.shape[1] == 3 and tempX.shape[2] == length tester = _DummyConvertPandas() - tempX = tester._convert_X(test_X2) + tempX = tester._convert_X(test_X2, "numpy3D") assert isinstance(tempX, pd.DataFrame) assert tempX.shape[0] == cases assert tempX.shape[1] == 2 test_y1 = np.random.randint(0, 1, size=(cases)) test_y1 = pd.Series(test_y1) tempX, tempY = _internal_convert(test_X1, test_y1) - assert isinstance(tempY, np.ndarray) + assert isinstance(tempY, pd.Series) assert isinstance(tempX, np.ndarray) assert tempX.ndim == 3 @@ -261,9 +258,10 @@ def test__check_classifier_input(): 4. Test incorrect: y as a list 5. Test incorrect: too few cases or too short a series """ + clf = _DummyClassifier() def _check_classifier_input(X, y=None, enforce_min_instances=1): - return BaseClassifier._check_classifier_input(None, X, y, enforce_min_instances) + return clf._check_input(X, y, enforce_min_instances) # 1. Test correct: X: np.array of 2 and 3 dimensions vs y:np.array and np.Series test_X1 = np.random.uniform(-1, 1, size=(5, 10)) @@ -287,9 +285,7 @@ def _check_classifier_input(X, y=None, enforce_min_instances=1): _check_classifier_input(test_X5, test_y1) # 4. Test incorrect data type: y is a List test_y3 = [1, 2, 3, 4, 5] - with pytest.raises( - TypeError, match=r".*X is not of a supported input data " r"type.*" - ): + with pytest.raises(TypeError, match="must be in an sktime compatible format"): _check_classifier_input(test_X1, test_y3) # 5. Test incorrect: too few cases or too short a series with pytest.raises(ValueError, match=r".*Minimum number of cases required*."): @@ -508,3 +504,26 @@ def test_deep_estimator_full(optimizer): # check if components are same assert full_dummy.__dict__ == deserialized_full.__dict__ + + +DUMMY_EST_PARAMETERS_FOO = [None, 10.3, "string", {"key": "value"}, lambda x: x**2] + + +@pytest.mark.skipif( + not _check_soft_dependencies("cloudpickle", severity="none"), + reason="skip test if required soft dependency not available", +) +@pytest.mark.parametrize("foo", DUMMY_EST_PARAMETERS_FOO) +def test_save_estimator_using_cloudpickle(foo): + """Check if serialization works with cloudpickle.""" + from sktime.base._serialize import load + + est = _DummyComposite(foo) + + serialized = est.save(serialization_format="cloudpickle") + loaded_est = load(serialized) + + if callable(foo): + assert est.foo(2) == loaded_est.foo(2) + else: + assert est.foo == loaded_est.foo diff --git a/sktime/classification/tests/test_sklearn_compatibility.py b/sktime/classification/tests/test_sklearn_compatibility.py index 5c417ff6ca2..00f7f15410e 100644 --- a/sktime/classification/tests/test_sklearn_compatibility.py +++ b/sktime/classification/tests/test_sklearn_compatibility.py @@ -82,7 +82,7 @@ ] ), CalibratedClassifierCV( - base_estimator=CanonicalIntervalForest.create_test_instance(), + estimator=CanonicalIntervalForest.create_test_instance(), cv=3, ), ] diff --git a/sktime/clustering/base.py b/sktime/clustering/base.py index b56b228e49f..d2408ef740d 100644 --- a/sktime/clustering/base.py +++ b/sktime/clustering/base.py @@ -36,6 +36,8 @@ class BaseClusterer(BaseEstimator): "capability:unequal_length": False, "capability:missing_values": False, "capability:multithreading": False, + "authors": "sktime developers", # author(s) of the object + "maintainers": "sktime developers", # current maintainer(s) of the object } def __init__(self, n_clusters: int = None): diff --git a/sktime/clustering/compose/_pipeline.py b/sktime/clustering/compose/_pipeline.py index a4be371ad6a..5ed27a3aafa 100644 --- a/sktime/clustering/compose/_pipeline.py +++ b/sktime/clustering/compose/_pipeline.py @@ -92,6 +92,11 @@ class ClustererPipeline(_HeterogenousMetaEstimator, BaseClusterer): """ _tags = { + # packaging info + # -------------- + "authors": "fkiraly", + # estimator type + # -------------- "X_inner_mtype": "pd-multiindex", # which type do _fit/_predict accept "capability:multivariate": False, "capability:unequal_length": False, diff --git a/sktime/clustering/dbscan.py b/sktime/clustering/dbscan.py index 46e824b1961..bd531cc34c1 100644 --- a/sktime/clustering/dbscan.py +++ b/sktime/clustering/dbscan.py @@ -62,6 +62,11 @@ class TimeSeriesDBSCAN(BaseClusterer): """ _tags = { + # packaging info + # -------------- + "authors": "fkiraly", + # estimator type + # -------------- "capability:multivariate": True, "capability:unequal_length": True, "capability:missing_values": True, diff --git a/sktime/clustering/k_means/_k_means.py b/sktime/clustering/k_means/_k_means.py index 48b7ef0046c..ed3770cde0c 100644 --- a/sktime/clustering/k_means/_k_means.py +++ b/sktime/clustering/k_means/_k_means.py @@ -82,7 +82,12 @@ class TimeSeriesKMeans(TimeSeriesLloyds): >>> y_pred = clusterer.predict(X_test) # doctest: +SKIP """ - _tags = {"python_dependencies": "numba"} + _tags = { + # packaging info + # -------------- + "authors": ["chrisholder", "TonyBagnall"], + "python_dependencies": "numba", + } def __init__( self, diff --git a/sktime/clustering/k_means/_k_means_tslearn.py b/sktime/clustering/k_means/_k_means_tslearn.py index 943be254f6e..796d95933a3 100644 --- a/sktime/clustering/k_means/_k_means_tslearn.py +++ b/sktime/clustering/k_means/_k_means_tslearn.py @@ -100,9 +100,14 @@ class TimeSeriesKMeansTslearn(_TslearnAdapter, BaseClusterer): """ _tags = { + # packaging info + # -------------- + "authors": "fkiraly", + "python_dependencies": "tslearn", + # estimator type + # -------------- "capability:multivariate": True, "capability:unequal_length": True, - "python_dependencies": "tslearn", } # defines the name of the attribute containing the tslearn estimator diff --git a/sktime/clustering/k_medoids.py b/sktime/clustering/k_medoids.py index 12b0d0fc3db..777dfa14a37 100755 --- a/sktime/clustering/k_medoids.py +++ b/sktime/clustering/k_medoids.py @@ -76,7 +76,12 @@ class TimeSeriesKMedoids(TimeSeriesLloyds): >>> y_pred = clusterer.predict(X_test) # doctest: +SKIP """ - _tags = {"python_dependencies": "numba"} + _tags = { + # packaging info + # -------------- + "authors": ["chrisholder", "TonyBagnall"], + "python_dependencies": "numba", + } def __init__( self, diff --git a/sktime/clustering/k_shapes.py b/sktime/clustering/k_shapes.py index 8156915220f..6d7d809265c 100755 --- a/sktime/clustering/k_shapes.py +++ b/sktime/clustering/k_shapes.py @@ -50,9 +50,14 @@ class TimeSeriesKShapes(_TslearnAdapter, BaseClusterer): """ _tags = { + # packaging info + # -------------- + "authors": "fkiraly", + "python_dependencies": "tslearn", + # estimator type + # -------------- "capability:multivariate": True, "capability:unequal_length": True, - "python_dependencies": "tslearn", } # defines the name of the attribute containing the tslearn estimator diff --git a/sktime/clustering/kernel_k_means.py b/sktime/clustering/kernel_k_means.py index dca518e56f6..4f0fd58c0c5 100755 --- a/sktime/clustering/kernel_k_means.py +++ b/sktime/clustering/kernel_k_means.py @@ -65,8 +65,13 @@ class TimeSeriesKernelKMeans(_TslearnAdapter, BaseClusterer): """ _tags = { - "capability:multivariate": True, + # packaging info + # -------------- + "authors": "fkiraly", "python_dependencies": "tslearn", + # estimator type + # -------------- + "capability:multivariate": True, } # defines the name of the attribute containing the tslearn estimator diff --git a/sktime/datasets/_data_io.py b/sktime/datasets/_data_io.py index cf954961213..e5e11b8279e 100644 --- a/sktime/datasets/_data_io.py +++ b/sktime/datasets/_data_io.py @@ -241,6 +241,8 @@ def _load_dataset(name, split, return_X_y, return_type=None, extract_path=None): if extract_path is None: # default for first check is sktime/datasets/data check_path = os.path.join(MODULE, "data") + else: + check_path = extract_path def _get_data_from(path): return _load_provided_dataset(name, split, return_X_y, return_type, path) diff --git a/sktime/datasets/_readers_writers/arff.py b/sktime/datasets/_readers_writers/arff.py index c18a0a1c0de..505008ac67f 100644 --- a/sktime/datasets/_readers_writers/arff.py +++ b/sktime/datasets/_readers_writers/arff.py @@ -17,7 +17,7 @@ # ================================================================================================== -# TODO: original author didnt add test for this function +# TODO: original author didn't add test for this function # Refactor the nested loops def load_from_arff_to_dataframe( full_file_path_and_name, @@ -202,7 +202,7 @@ def write_tabular_transformation_to_arff( dirt = f"{str(path)}/{str(problem_name)}-{type(transformation).__name__}/" try: os.makedirs(dirt) - except os.error: + except OSError: pass # raises os.error if path already exists # create arff file in the path file = open( diff --git a/sktime/datasets/_readers_writers/long.py b/sktime/datasets/_readers_writers/long.py index 6f7ae217f8d..3072cbd6493 100644 --- a/sktime/datasets/_readers_writers/long.py +++ b/sktime/datasets/_readers_writers/long.py @@ -8,7 +8,7 @@ from sktime.datatypes._panel._convert import from_long_to_nested -# TODO: original author didnt add test for this function, for research purposes? +# TODO: original author didn't add test for this function, for research purposes? def load_from_long_to_dataframe(full_file_path_and_name, separator=","): """Load data from a long format file into a Pandas DataFrame. diff --git a/sktime/datasets/_readers_writers/tsf.py b/sktime/datasets/_readers_writers/tsf.py index 42fdb6fa817..99b5b448442 100644 --- a/sktime/datasets/_readers_writers/tsf.py +++ b/sktime/datasets/_readers_writers/tsf.py @@ -29,7 +29,7 @@ def _convert_tsf_to_hierarchical( tsf file metadata freq : str, optional pandas compatible time frequency, by default None - if not speciffied it's automatically mapped from the tsf frequency to a pandas + if not specified it's automatically mapped from the tsf frequency to a pandas frequency value_column_name: str, optional The name of the column that contains the values, by default "series_value" diff --git a/sktime/datasets/_readers_writers/tsv.py b/sktime/datasets/_readers_writers/tsv.py index 52d1e58098e..77ea75d86bb 100644 --- a/sktime/datasets/_readers_writers/tsv.py +++ b/sktime/datasets/_readers_writers/tsv.py @@ -6,7 +6,7 @@ import pandas as pd -# TODO: original author didnt add test for this function +# TODO: original author didn't add test for this function def load_from_ucr_tsv_to_dataframe( full_file_path_and_name, return_separate_X_and_y=True ): diff --git a/sktime/datasets/_readers_writers/utils.py b/sktime/datasets/_readers_writers/utils.py index 751985b9f1d..f96055ce6ef 100644 --- a/sktime/datasets/_readers_writers/utils.py +++ b/sktime/datasets/_readers_writers/utils.py @@ -124,7 +124,7 @@ def _write_header( dirt = f"{str(path)}/{str(problem_name)}/" try: os.makedirs(dirt) - except os.error: + except OSError: pass # raises os.error if path already exists # create ts file in the path file = open(f"{dirt}{str(problem_name)}{fold}.ts", "w") @@ -210,7 +210,7 @@ def write_results_to_uea_format( output_path = f"{output_path}/{estimator_name}/Predictions/{dataset_name}/" try: os.makedirs(output_path) - except os.error: + except OSError: pass # raises os.error if path already exists, so just ignore this if split == "TRAIN" or split == "train": diff --git a/sktime/datasets/_single_problem_loaders.py b/sktime/datasets/_single_problem_loaders.py index 8785d650fe2..b49c5844ae4 100644 --- a/sktime/datasets/_single_problem_loaders.py +++ b/sktime/datasets/_single_problem_loaders.py @@ -1213,6 +1213,11 @@ def load_solar( api_version : string or None, default="v4" API version to call. If None then a stored sample of the data is loaded. + Returns + ------- + y : pd.Series + The solar generation time-series, as requested by parameters, see above + References ---------- .. [1] https://www.solar.sheffield.ac.uk/pvlive/ diff --git a/sktime/datasets/tests/test_data_io.py b/sktime/datasets/tests/test_data_io.py index 1ae45305481..9e612626828 100644 --- a/sktime/datasets/tests/test_data_io.py +++ b/sktime/datasets/tests/test_data_io.py @@ -36,7 +36,9 @@ def test_load_provided_dataset(return_X_y, return_type): X = _load_provided_dataset("UnitTest", "TRAIN", return_X_y, return_type) # Check whether object is same mtype or not, via bool - valid, check_msg, _ = check_is_mtype(X, return_type, return_metadata=True) + valid, check_msg, _ = check_is_mtype( + X, return_type, return_metadata=True, msg_return_dict="list" + ) msg = ( "load_basic_motions return has unexpected type on " f"return_X_y = {return_X_y}, return_type = {return_type}. " @@ -60,7 +62,9 @@ def test_load_basic_motions(return_X_y, return_type): X = load_basic_motions("TRAIN", return_X_y, return_type) # Check whether object is same mtype or not, via bool - valid, check_msg, _ = check_is_mtype(X, return_type, return_metadata=True) + valid, check_msg, _ = check_is_mtype( + X, return_type, return_metadata=True, msg_return_dict="list" + ) msg = ( "load_basic_motions return has unexpected type on " f"return_X_y = {return_X_y}, return_type = {return_type}. " @@ -80,6 +84,16 @@ def test_load_UCR_UEA_dataset(): assert X.shape == (42, 1) and y.shape == (42,) +def test_load_UCR_UEA_local(): + """Tests load_UCR_UEA_dataset looks for local file if extract_path is set. + + A FileNotFoundError indicates the function looked for the file and everything up + to that point went fine. + """ + with pytest.raises(FileNotFoundError): + load_UCR_UEA_dataset(name="UnitTest", extract_path=" ") + + _CHECKS = { "uschange": { "columns": ["Income", "Production", "Savings", "Unemployment"], diff --git a/sktime/datasets/tests/test_datadownload.py b/sktime/datasets/tests/test_datadownload.py new file mode 100644 index 00000000000..352244f33c1 --- /dev/null +++ b/sktime/datasets/tests/test_datadownload.py @@ -0,0 +1,82 @@ +"""Test data loaders that download from external sources.""" +from urllib.request import Request, urlopen + +import numpy as np +import pandas as pd +import pytest + +from sktime.datasets import load_forecastingdata, load_solar, load_UCR_UEA_dataset +from sktime.datasets.tsf_dataset_names import tsf_all, tsf_all_datasets + +# test tsf download only on a random uniform subsample of datasets +N_TSF_SUBSAMPLE = 3 +TSF_SUBSAMPLE = np.random.choice(tsf_all_datasets, N_TSF_SUBSAMPLE) + + +@pytest.mark.datadownload +def test_load_solar(): + """Test whether solar dataset can be downloaded.""" + solar = load_solar() + + assert isinstance(solar, pd.Series) + assert len(solar) == 5905 + + +@pytest.mark.xfail(reason="known sporadic failure of unknown cause, see #5460") +@pytest.mark.datadownload +def test_load_UEA(): + """Test loading of a random subset of the UEA data, to check API.""" + from sktime.datasets.tsc_dataset_names import multivariate, univariate + + TOO_LARGE_DATA = ["InsectWingbeat"] + + univariate = list(set(univariate).difference(TOO_LARGE_DATA)) + multivariate = list(set(multivariate).difference(TOO_LARGE_DATA)) + + n_univariate = 3 + n_multivariate = 2 + + univ_names = np.random.choice(univariate, n_univariate) + mult_names = np.random.choice(multivariate, n_multivariate) + + for univ_name in univ_names: + load_UCR_UEA_dataset(univ_name) + + for mult_name in mult_names: + load_UCR_UEA_dataset(mult_name) + + +@pytest.mark.datadownload +def test_load_forecastingdata(): + """Test loading downloaded dataset from forecasting.org.""" + file = "UnitTest" + loaded_datasets, metadata = load_forecastingdata(name=file) + assert len(loaded_datasets) == 1 + assert metadata["frequency"] == "yearly" + assert metadata["forecast_horizon"] == 4 + assert metadata["contain_missing_values"] is False + assert metadata["contain_equal_length"] is False + + +@pytest.mark.xfail(reason="known sporadic failure of unknown cause, see #5462") +@pytest.mark.datadownload +@pytest.mark.parametrize("name", TSF_SUBSAMPLE) +def test_check_link_downloadable(name): + """Test dataset URL from forecasting.org is downloadable and exits.""" + url = f"https://zenodo.org/record/{tsf_all[name]}/files/{name}.zip" + + # Send a GET request to check if the link exists without downloading the file + req = Request(url, method="HEAD") + response = urlopen(req) + + # Check if the response status code is 200 (OK) + assert ( + response.status == 200 + ), f"URL is not valid or does not exist. Error code {response.status}." + + # Check if the response headers indicate that the content is downloadable + content_type = response.headers.get("Content-Type") + content_disposition = response.headers.get("Content-Disposition") + + assert "application/octet-stream" in content_type, "URL is not downloadable." + assert "attachment" in content_disposition, "URL is not downloadable." diff --git a/sktime/datasets/tests/test_readers_writers.py b/sktime/datasets/tests/test_readers_writers.py index 022bc39efb1..bedfe87a5e6 100644 --- a/sktime/datasets/tests/test_readers_writers.py +++ b/sktime/datasets/tests/test_readers_writers.py @@ -1186,7 +1186,7 @@ def test_load_tsf_to_dataframe(input_path, return_type, output_df): assert_frame_equal(df, output_df, check_dtype=False) assert metadata == expected_metadata if return_type != "default_tsf": - assert check_is_mtype(obj=df, mtype=return_type) + assert check_is_mtype(obj=df, mtype=return_type, msg_return_dict="list") @pytest.mark.parametrize("freq", [None, "YS"]) diff --git a/sktime/datasets/tests/test_single_problem_loaders.py b/sktime/datasets/tests/test_single_problem_loaders.py index cf142e3b031..999ee4ffd26 100644 --- a/sktime/datasets/tests/test_single_problem_loaders.py +++ b/sktime/datasets/tests/test_single_problem_loaders.py @@ -1,24 +1,19 @@ -"""Test single problem loaders with varying return types.""" -from urllib.request import Request, urlopen - +"""Test single problem loaders using data shipping with sktime.""" import numpy as np import pandas as pd import pytest -from sktime.datasets import ( # Univariate; Unequal length; Multivariate +from sktime.datasets import ( load_acsf1, load_arrow_head, load_basic_motions, - load_forecastingdata, load_italy_power_demand, load_japanese_vowels, load_osuleaf, load_plaid, load_tecator, - load_UCR_UEA_dataset, load_unit_test, ) -from sktime.datasets.tsf_dataset_names import tsf_all, tsf_all_datasets UNIVARIATE_PROBLEMS = [ load_acsf1, @@ -36,10 +31,6 @@ load_japanese_vowels, ] -# test tsf download only on a random uniform subsample of datasets -N_TSF_SUBSAMPLE = 3 -TSF_SUBSAMPLE = np.random.choice(tsf_all_datasets, N_TSF_SUBSAMPLE) - @pytest.mark.parametrize( "loader", UNIVARIATE_PROBLEMS + MULTIVARIATE_PROBLEMS + UNEQUAL_LENGTH_PROBLEMS @@ -81,60 +72,3 @@ def test_load_numpy2d_multivariate_raises(loader): """Test that multivariate and/or unequal length raise the correct error.""" with pytest.raises(ValueError, match="attempting to load into a numpy2d"): X, y = loader(return_type="numpy2d") - - -@pytest.mark.xfail(reason="known sporadic failure of unknown cause, see #5460") -def test_load_UEA(): - """Test loading of a random subset of the UEA data, to check API.""" - from sktime.datasets.tsc_dataset_names import multivariate, univariate - - TOO_LARGE_DATA = ["InsectWingbeat"] - - univariate = list(set(univariate).difference(TOO_LARGE_DATA)) - multivariate = list(set(multivariate).difference(TOO_LARGE_DATA)) - - n_univariate = 3 - n_multivariate = 2 - - univ_names = np.random.choice(univariate, n_univariate) - mult_names = np.random.choice(multivariate, n_multivariate) - - for univ_name in univ_names: - load_UCR_UEA_dataset(univ_name) - - for mult_name in mult_names: - load_UCR_UEA_dataset(mult_name) - - -def test_load_forecastingdata(): - """Test loading downloaded dataset from forecasting.org.""" - file = "UnitTest" - loaded_datasets, metadata = load_forecastingdata(name=file) - assert len(loaded_datasets) == 1 - assert metadata["frequency"] == "yearly" - assert metadata["forecast_horizon"] == 4 - assert metadata["contain_missing_values"] is False - assert metadata["contain_equal_length"] is False - - -@pytest.mark.xfail(reason="known sporadic failure of unknown cause, see #5462") -@pytest.mark.parametrize("name", TSF_SUBSAMPLE) -def test_check_link_downloadable(name): - """Test dataset URL from forecasting.org is downloadable and exits.""" - url = f"https://zenodo.org/record/{tsf_all[name]}/files/{name}.zip" - - # Send a GET request to check if the link exists without downloading the file - req = Request(url, method="HEAD") - response = urlopen(req) - - # Check if the response status code is 200 (OK) - assert ( - response.status == 200 - ), f"URL is not valid or does not exist. Error code {response.status}." - - # Check if the response headers indicate that the content is downloadable - content_type = response.headers.get("Content-Type") - content_disposition = response.headers.get("Content-Disposition") - - assert "application/octet-stream" in content_type, "URL is not downloadable." - assert "attachment" in content_disposition, "URL is not downloadable." diff --git a/sktime/datatypes/__init__.py b/sktime/datatypes/__init__.py index 1c90739d014..a866eb0ea0b 100644 --- a/sktime/datatypes/__init__.py +++ b/sktime/datatypes/__init__.py @@ -3,6 +3,7 @@ __author__ = ["fkiraly"] from sktime.datatypes._check import ( + check_is_error_msg, check_is_mtype, check_is_scitype, check_raise, @@ -31,6 +32,7 @@ "ALL_TIME_SERIES_MTYPES", "check_is_mtype", "check_is_scitype", + "check_is_error_msg", "check_raise", "convert", "convert_to", diff --git a/sktime/datatypes/_adapter/dask_to_pd.py b/sktime/datatypes/_adapter/dask_to_pd.py index 627428c0b84..7e07fa45873 100644 --- a/sktime/datatypes/_adapter/dask_to_pd.py +++ b/sktime/datatypes/_adapter/dask_to_pd.py @@ -175,6 +175,10 @@ def check_dask_frame( metadata["is_empty"] = len(obj.index) < 1 or len(obj.columns) < 1 if _req("is_univariate", return_metadata): metadata["is_univariate"] = len(obj.columns) == 1 + if _req("n_features", return_metadata): + metadata["n_features"] = len(obj.columns) + if _req("feature_names", return_metadata): + metadata["feature_names"] = obj.columns.to_list() # check that columns are unique if not obj.columns.is_unique: diff --git a/sktime/datatypes/_check.py b/sktime/datatypes/_check.py index 90d9ffb5c44..192b4b57a1c 100644 --- a/sktime/datatypes/_check.py +++ b/sktime/datatypes/_check.py @@ -92,6 +92,7 @@ def check_is_mtype( scitype: str = None, return_metadata=False, var_name="obj", + msg_return_dict="dict", ): """Check object for compliance with mtype specification, return metadata. @@ -107,14 +108,23 @@ def check_is_mtype( if False, returns only "valid" return if True, returns all three return objects if str, list of str, metadata return dict is subset to keys in return_metadata - var_name: str, optional, default="obj" - name of input in error messages + var_name: str, optional, default="obj" + name of input in error messages + msg_return_dict: str, "list" or "dict", optional, default="dict" + whether returned msg, if returned is a str, dict or list + if "list", msg is str if mtype is str, list of str if mtype is list + if "dict", msg is str if mtype is str, dict of str if mtype is list, + if dict, has with mtype as key and error message for mtype as value Returns ------- valid: bool - whether obj is a valid object of mtype/scitype - msg: str or list of str - error messages if object is not valid, otherwise None - str if mtype is str; list of len(mtype) with message per mtype if list - returned only if return_metadata is True or str, list of str + msg: str or list/dict of str - error messages if object is not valid, otherwise None + list or dict type is controlled via msg_return_dict + if str: error message for tested mtype + it list: list of len(mtype) with message per mtype if list, same order as mtype + if dict: dict with mtype as key and error message for mtype as value + returned only if return_metadata is True or str, list of str metadata: dict - metadata about obj if valid, otherwise None returned only if return_metadata is True or str, list of str Keys populated depend on (assumed, otherwise identified) scitype of obj. @@ -153,7 +163,16 @@ def check_is_mtype( # we loop through individual mtypes in mtype and see whether they pass the check # for each check we remember whether it passed and what it returned - msg = [] + + # initialize loop variables + if msg_return_dict is None: + msg_return_dict = "dict" + msg = dict() + elif msg_return_dict == "list": + msg = [] + elif msg_return_dict == "dict": + msg = dict() + found_mtype = [] found_scitype = [] @@ -179,7 +198,10 @@ def check_is_mtype( found_scitype.append(scitype_of_m) final_result = res elif _metadata_requested(return_metadata): - msg.append(res[1]) + if msg_return_dict == "list": + msg.append(res[1]) + else: + msg[m] = res[1] # there are three options on the result of check_is_mtype: # a. two or more mtypes are found - this is unexpected and an error with checks @@ -200,7 +222,10 @@ def check_is_mtype( # c. no mtype is found - then return False and all error messages if requested else: if len(msg) == 1: - msg = msg[0] + if msg_return_dict == "list": + msg = msg[0] + else: + msg = list(msg.values())[0] return _ret(False, msg, None, return_metadata) @@ -237,6 +262,7 @@ def check_raise(obj, mtype: str, scitype: str = None, var_name: str = "input"): scitype=scitype, return_metadata=[], var_name=var_name, + msg_return_dict="list", ) if valid: @@ -301,6 +327,7 @@ def mtype( mtype=m_plus_scitype[0], scitype=m_plus_scitype[1], return_metadata=[], + msg_return_dict="list", ) if valid: mtypes_positive += [m_plus_scitype[0]] @@ -437,6 +464,46 @@ def check_is_scitype( return _ret(False, msg, None, return_metadata) +def check_is_error_msg(msg, var_name="obj", allowed_msg=None, raise_exception=False): + """Format and possibly raise error message from check_is_mtype or check_is_scitype. + + Parameters + ---------- + msg: dict[str, str] + error message from check_is_scitype, or from check_is_mtype with dict return + var_name: str, optional, default="obj" + name of input in error messages + allowed_msg: str, optional, default=None + message component detailing allowed mtypes or scitype combinations + raise_exception: bool or Exception, optional, default=False + whether to raise exception or return error message + if False, returns formatted error message + if True, raises TypeError with formatted error message + if Exception, raises that Exception with formatted error message + + Returns + ------- + str - formatted error message + """ + msg_invalid_input = ( + f"{var_name} must be in an sktime compatible format. {allowed_msg}" + f" See the data format tutorial examples/AA_datatypes_and_datasets.ipynb. " + f"If you think the data is already in an sktime supported input format, " + f"run sktime.datatypes.check_raise(data, mtype) to diagnose the error, " + f"where mtype is the string of the type specification you want. " + f"Error message for checked mtypes, in format [mtype: message], as follows:" + ) + for mtype, err in msg.items(): + msg_invalid_input += f" [{mtype}: {err}] " + + if raise_exception is True: + raise TypeError(msg_invalid_input) + elif raise_exception is False: + return msg_invalid_input + else: + raise raise_exception(msg_invalid_input) + + def scitype(obj, candidate_scitypes=SCITYPE_LIST, exclude_mtypes=AMBIGUOUS_MTYPES): """Infer the scitype of an object. diff --git a/sktime/datatypes/_convert.py b/sktime/datatypes/_convert.py index fcc46c2f7f6..eac9d2fe840 100644 --- a/sktime/datatypes/_convert.py +++ b/sktime/datatypes/_convert.py @@ -301,8 +301,8 @@ def _get_first_mtype_of_same_scitype(from_mtype, to_mtypes, varname="to_mtypes") ] if len(same_scitype_mtypes) == 0: raise TypeError( - f"{varname} contains no mtype compatible with the scitype of obj," - f"which is {scitype}" + f"{varname} contains no mtype compatible with the scitype of obj, " + f"which is {scitype}. Value of {varname} is: {to_mtypes}" ) to_type = same_scitype_mtypes[0] return to_type diff --git a/sktime/datatypes/_examples.py b/sktime/datatypes/_examples.py index 1ea554ede3b..f1555c7e21b 100644 --- a/sktime/datatypes/_examples.py +++ b/sktime/datatypes/_examples.py @@ -97,9 +97,9 @@ def get_examples( fixtures: dict with integer keys, elements being fixture - example for mtype `mtype`, scitype `as_scitype` if return_lossy=True, elements are pairs with fixture and - lossy: bool - whether the example is a lossy representation + lossy: bool - whether the example is a lossy representation if return_metadata=True, elements are triples with fixture, lossy, and - metadata: dict - metadata dict that would be returned by check_is_mtype + metadata: dict - metadata dict that would be returned by check_is_mtype """ # if as_scitype is None, infer from mtype if as_scitype is None: diff --git a/sktime/datatypes/_hierarchical/_check.py b/sktime/datatypes/_hierarchical/_check.py index efe1adb048b..6511f385437 100644 --- a/sktime/datatypes/_hierarchical/_check.py +++ b/sktime/datatypes/_hierarchical/_check.py @@ -36,6 +36,8 @@ "has_nans": bool, True iff the panel contains NaN values "n_instances": int, number of instances in the hierarchical panel "n_panels": int, number of flat panels in the hierarchical panel + "n_features": int, number of variables in series + "feature_names": list of int or object, names of variables in series """ __author__ = ["fkiraly"] diff --git a/sktime/datatypes/_hierarchical/_examples.py b/sktime/datatypes/_hierarchical/_examples.py index 3833a662ce1..18b798b06f7 100644 --- a/sktime/datatypes/_hierarchical/_examples.py +++ b/sktime/datatypes/_hierarchical/_examples.py @@ -114,6 +114,8 @@ "has_nans": False, "n_instances": 6, "n_panels": 2, + "n_features": 2, + "feature_names": ["var_0", "var_1"], } @@ -156,4 +158,6 @@ "has_nans": False, "n_instances": 6, "n_panels": 2, + "n_features": 1, + "feature_names": ["var_0"], } diff --git a/sktime/datatypes/_panel/_check.py b/sktime/datatypes/_panel/_check.py index 8203aadcfc3..b00f811c7d7 100644 --- a/sktime/datatypes/_panel/_check.py +++ b/sktime/datatypes/_panel/_check.py @@ -34,6 +34,8 @@ "is_one_series": bool, True iff there is only one series in the panel "has_nans": bool, True iff the panel contains NaN values "n_instances": int, number of instances in the panel + "n_features": int, number of variables in series + "feature_names": list of int or object, names of variables in series """ __author__ = ["fkiraly", "TonyBagnall"] @@ -124,6 +126,10 @@ def check_dflist_panel(obj, return_metadata=False, var_name="obj"): metadata["is_one_panel"] = True if _req("n_instances", return_metadata): metadata["n_instances"] = n + if _req("n_features", return_metadata): + metadata["n_features"] = len(obj[0].columns) + if _req("feature_names", return_metadata): + metadata["feature_names"] = obj[0].columns.to_list() return _ret(True, None, metadata, return_metadata) @@ -160,6 +166,10 @@ def check_numpy3d_panel(obj, return_metadata=False, var_name="obj"): metadata["n_panels"] = 1 if _req("is_one_panel", return_metadata): metadata["is_one_panel"] = True + if _req("n_features", return_metadata): + metadata["n_features"] = obj.shape[1] + if _req("feature_names", return_metadata): + metadata["feature_names"] = list(range(obj.shape[1])) # check whether there any nans; only if requested if _req("has_nans", return_metadata): @@ -233,7 +243,8 @@ def check_pdmultiindex_panel(obj, return_metadata=False, var_name="obj", panel=T ): msg = ( f"The (time) index of {var_name} must be sorted monotonically " - f"increasing, but found: {index}" + f"increasing. Use {var_name}.sort_index() to sort the index, or " + f"{var_name}.duplicated() to find duplicates." ) return _ret(False, msg, None, return_metadata) @@ -245,6 +256,10 @@ def check_pdmultiindex_panel(obj, return_metadata=False, var_name="obj", panel=T metadata["is_empty"] = len(index) < 1 or len(obj.columns) < 1 if _req("has_nans", return_metadata): metadata["has_nans"] = obj.isna().values.any() + if _req("n_features", return_metadata): + metadata["n_features"] = len(obj.columns) + if _req("feature_names", return_metadata): + metadata["feature_names"] = obj.columns.to_list() # check whether index is equally spaced or if there are any nans # compute only if needed @@ -396,10 +411,10 @@ def is_nested_dataframe(obj, return_metadata=False, var_name="obj"): # Check instance index is unique if not obj.index.is_unique: - duplicates = obj.index[obj.index.duplicated()].unique().to_list() msg = ( f"The instance index of {var_name} must be unique, " - f"but found duplicates: {duplicates}" + f"but found duplicates. Use {var_name}.duplicated() " + f"to find the duplicates." ) return _ret(False, msg, None, return_metadata) @@ -418,6 +433,10 @@ def is_nested_dataframe(obj, return_metadata=False, var_name="obj"): metadata["has_nans"] = _nested_dataframe_has_nans(obj) if _req("is_equal_length", return_metadata): metadata["is_equal_length"] = not _nested_dataframe_has_unequal(obj) + if _req("n_features", return_metadata): + metadata["n_features"] = len(obj.columns) + if _req("feature_names", return_metadata): + metadata["feature_names"] = obj.columns.to_list() # todo: this is temporary override, proper is_empty logic needs to be added if _req("is_empty", return_metadata): @@ -447,6 +466,10 @@ def check_numpyflat_Panel(obj, return_metadata=False, var_name="obj"): metadata["is_empty"] = len(obj) < 1 or obj.shape[1] < 1 if _req("is_univariate", return_metadata): metadata["is_univariate"] = True + if _req("n_features", return_metadata): + metadata["n_features"] = 1 + if _req("feature_names", return_metadata): + metadata["feature_names"] = [0] # np.arrays are considered equally spaced, equal length, by assumption if _req("is_equally_spaced", return_metadata): metadata["is_equally_spaced"] = True diff --git a/sktime/datatypes/_panel/_convert.py b/sktime/datatypes/_panel/_convert.py index f7655c29874..0b3c2165984 100644 --- a/sktime/datatypes/_panel/_convert.py +++ b/sktime/datatypes/_panel/_convert.py @@ -36,6 +36,7 @@ from sktime.datatypes._convert_utils._coerce import _coerce_df_dtypes from sktime.datatypes._convert_utils._convert import _extend_conversions from sktime.datatypes._panel._registry import MTYPE_LIST_PANEL +from sktime.utils.pandas import df_map from sktime.utils.validation._dependencies import _check_soft_dependencies # dictionary indexed by triples of types @@ -71,7 +72,7 @@ def _cell_is_series_or_array(cell): def _nested_cell_mask(X): - return X.applymap(_cell_is_series_or_array) + return df_map(X)(_cell_is_series_or_array) def are_columns_nested(X): @@ -820,7 +821,7 @@ def from_nested_to_multi_index(X, instance_index=None, time_index=None): X_col = X_col.infer_objects() # create the right MultiIndex and assign to X_mi - idx_df = X[[c]].applymap(lambda x: x.index).explode(c) + idx_df = df_map(X[[c]])(lambda x: x.index).explode(c) index = pd.MultiIndex.from_arrays([idx_df.index, idx_df[c].values]) index = index.set_names([instance_index, time_index]) X_col.index = index @@ -886,7 +887,7 @@ def from_nested_to_3d_numpy(X): # If all the columns are nested in structure if nested_col_mask.count(True) == len(nested_col_mask): X_3d = np.stack( - X.applymap(_convert_series_cell_to_numpy) + df_map(X)(_convert_series_cell_to_numpy) .apply(lambda row: np.stack(row), axis=1) .to_numpy() ) diff --git a/sktime/datatypes/_panel/_examples.py b/sktime/datatypes/_panel/_examples.py index e7c1503cc7a..d0b0998e3de 100644 --- a/sktime/datatypes/_panel/_examples.py +++ b/sktime/datatypes/_panel/_examples.py @@ -39,10 +39,10 @@ ) example_dict[("numpy3D", "Panel", 0)] = X -example_dict_lossy[("numpy3D", "Panel", 0)] = False +example_dict_lossy[("numpy3D", "Panel", 0)] = True example_dict[("numpyflat", "Panel", 0)] = None -example_dict_lossy[("numpyflat", "Panel", 0)] = None +example_dict_lossy[("numpyflat", "Panel", 0)] = True cols = [f"var_{i}" for i in range(2)] Xlist = [ @@ -68,7 +68,7 @@ example_dict_lossy[("pd-multiindex", "Panel", 0)] = False cols = [f"var_{i}" for i in range(2)] -X = pd.DataFrame(columns=cols, index=[0, 1, 2]) +X = pd.DataFrame(columns=cols, index=pd.RangeIndex(3)) X["var_0"] = pd.Series( [pd.Series([1, 2, 3]), pd.Series([1, 2, 3]), pd.Series([1, 2, 3])] ) @@ -101,6 +101,8 @@ "is_empty": False, "has_nans": False, "n_instances": 3, + "n_features": 2, + "feature_names": ["var_0", "var_1"], } ### @@ -112,12 +114,12 @@ ) example_dict[("numpy3D", "Panel", 1)] = X -example_dict_lossy[("numpy3D", "Panel", 1)] = False +example_dict_lossy[("numpy3D", "Panel", 1)] = True X = np.array([[4, 5, 6], [4, 55, 6], [42, 5, 6]], dtype=np.int64) example_dict[("numpyflat", "Panel", 1)] = X -example_dict_lossy[("numpyflat", "Panel", 1)] = False +example_dict_lossy[("numpyflat", "Panel", 1)] = True cols = [f"var_{i}" for i in range(1)] Xlist = [ @@ -143,7 +145,7 @@ example_dict_lossy[("pd-multiindex", "Panel", 1)] = False cols = [f"var_{i}" for i in range(1)] -X = pd.DataFrame(columns=cols, index=[0, 1, 2]) +X = pd.DataFrame(columns=cols, index=pd.RangeIndex(3)) X["var_0"] = pd.Series( [pd.Series([4, 5, 6]), pd.Series([4, 55, 6]), pd.Series([42, 5, 6])] ) @@ -173,6 +175,8 @@ "is_empty": False, "has_nans": False, "n_instances": 3, + "n_features": 1, + "feature_names": ["var_0"], } ### @@ -184,12 +188,12 @@ ) example_dict[("numpy3D", "Panel", 2)] = X -example_dict_lossy[("numpy3D", "Panel", 2)] = False +example_dict_lossy[("numpy3D", "Panel", 2)] = True X = np.array([[4, 5, 6]], dtype=np.int64) example_dict[("numpyflat", "Panel", 2)] = X -example_dict_lossy[("numpyflat", "Panel", 2)] = False +example_dict_lossy[("numpyflat", "Panel", 2)] = True cols = [f"var_{i}" for i in range(1)] Xlist = [ @@ -211,7 +215,7 @@ example_dict_lossy[("pd-multiindex", "Panel", 2)] = False cols = [f"var_{i}" for i in range(1)] -X = pd.DataFrame(columns=cols, index=[0]) +X = pd.DataFrame(columns=cols, index=pd.RangeIndex(1)) X["var_0"] = pd.Series([pd.Series([4, 5, 6])]) example_dict[("nested_univ", "Panel", 2)] = X @@ -238,6 +242,8 @@ "is_empty": False, "has_nans": False, "n_instances": 1, + "n_features": 1, + "feature_names": ["var_0"], } ### @@ -266,4 +272,6 @@ "is_empty": False, "has_nans": False, "n_instances": 3, + "n_features": 1, + "feature_names": ["var_0"], } diff --git a/sktime/datatypes/_series/_check.py b/sktime/datatypes/_series/_check.py index 9110c7d5800..fe3a47b6e8e 100644 --- a/sktime/datatypes/_series/_check.py +++ b/sktime/datatypes/_series/_check.py @@ -31,6 +31,8 @@ "is_equally_spaced": bool, True iff series index is equally spaced "is_empty": bool, True iff series has no variables or no instances "has_nans": bool, True iff the series contains NaN values + "n_features": int, number of variables in series + "feature_names": list of int or object, names of variables in series """ __author__ = ["fkiraly"] @@ -67,6 +69,10 @@ def check_pddataframe_series(obj, return_metadata=False, var_name="obj"): metadata["is_empty"] = len(index) < 1 or len(obj.columns) < 1 if _req("is_univariate", return_metadata): metadata["is_univariate"] = len(obj.columns) < 2 + if _req("n_features", return_metadata): + metadata["n_features"] = len(obj.columns) + if _req("feature_names", return_metadata): + metadata["feature_names"] = obj.columns.to_list() # check that columns are unique if not obj.columns.is_unique: @@ -125,6 +131,13 @@ def check_pdseries_series(obj, return_metadata=False, var_name="obj"): metadata["is_empty"] = len(index) < 1 if _req("is_univariate", return_metadata): metadata["is_univariate"] = True + if _req("n_features", return_metadata): + metadata["n_features"] = 1 + if _req("feature_names", return_metadata): + if not hasattr(obj, "name") or obj.name is None: + metadata["feature_names"] = [0] + else: + metadata["feature_names"] = [obj.name] # check that dtype is not object if "object" == obj.dtypes: @@ -178,12 +191,20 @@ def check_numpy_series(obj, return_metadata=False, var_name="obj"): metadata["is_empty"] = len(obj) < 1 or obj.shape[1] < 1 if _req("is_univariate", return_metadata): metadata["is_univariate"] = obj.shape[1] < 2 + if _req("n_features", return_metadata): + metadata["n_features"] = obj.shape[1] + if _req("feature_names", return_metadata): + metadata["feature_names"] = list(range(obj.shape[1])) elif len(obj.shape) == 1: # we now know obj is a 1D np.ndarray if _req("is_empty", return_metadata): metadata["is_empty"] = len(obj) < 1 if _req("is_univariate", return_metadata): metadata["is_univariate"] = True + if _req("n_features", return_metadata): + metadata["n_features"] = 1 + if _req("feature_names", return_metadata): + metadata["feature_names"] = [0] else: msg = f"{var_name} must be 1D or 2D numpy.ndarray, but found {len(obj.shape)}D" return ret(False, msg, None, return_metadata) @@ -271,6 +292,16 @@ def check_xrdataarray_series(obj, return_metadata=False, var_name="obj"): # The second dimension is the set of columns if _req("is_univariate", return_metadata): metadata["is_univariate"] = len(obj.dims) == 1 or len(obj[obj.dims[1]]) < 2 + if len(obj.dims) == 1: + if _req("n_features", return_metadata): + metadata["n_features"] = 1 + if _req("feature_names", return_metadata): + metadata["feature_names"] = [0] + else: + if _req("n_features", return_metadata): + metadata["n_features"] = len(obj[obj.dims[1]]) + if _req("feature_names", return_metadata): + metadata["feature_names"] = obj.indexes[obj.dims[1]].to_list() # check that columns are unique if not len(obj.dims) == len(set(obj.dims)): diff --git a/sktime/datatypes/_series/_examples.py b/sktime/datatypes/_series/_examples.py index 9cb5cd8bfab..f0a042bf100 100644 --- a/sktime/datatypes/_series/_examples.py +++ b/sktime/datatypes/_series/_examples.py @@ -80,6 +80,8 @@ "is_equally_spaced": True, "is_empty": False, "has_nans": False, + "n_features": 1, + "feature_names": ["a"], } ### @@ -121,6 +123,8 @@ "is_equally_spaced": True, "is_empty": False, "has_nans": False, + "n_features": 2, + "feature_names": ["a", "b"], } @@ -165,6 +169,8 @@ "is_equally_spaced": True, "is_empty": False, "has_nans": False, + "n_features": 2, + "feature_names": ["a", "b"], } ### @@ -201,4 +207,6 @@ "is_equally_spaced": True, "is_empty": False, "has_nans": False, + "n_features": 1, + "feature_names": ["a"], } diff --git a/sktime/datatypes/_table/_check.py b/sktime/datatypes/_table/_check.py index 6b25cf3335b..4a1d96d880d 100644 --- a/sktime/datatypes/_table/_check.py +++ b/sktime/datatypes/_table/_check.py @@ -65,11 +65,6 @@ def check_pddataframe_table(obj, return_metadata=False, var_name="obj"): if _req("has_nans", return_metadata): metadata["has_nans"] = obj.isna().values.any() - # check that no dtype is object - if "object" in obj.dtypes.values: - msg = f"{var_name} should not have column of 'object' dtype" - return _ret(False, msg, None, return_metadata) - return _ret(True, None, metadata, return_metadata) @@ -92,11 +87,6 @@ def check_pdseries_table(obj, return_metadata=False, var_name="obj"): if _req("n_instances", return_metadata): metadata["n_instances"] = len(index) - # check that dtype is not object - if "object" == obj.dtypes: - msg = f"{var_name} should not be of 'object' dtype" - return _ret(False, msg, None, return_metadata) - # check whether index is equally spaced or if there are any nans # compute only if needed if _req("has_nans", return_metadata): diff --git a/sktime/datatypes/_utilities.py b/sktime/datatypes/_utilities.py index d0271ca9c87..f588d5b9ad1 100644 --- a/sktime/datatypes/_utilities.py +++ b/sktime/datatypes/_utilities.py @@ -266,9 +266,11 @@ def get_cutoff( if reverse_order: ix = 0 agg = min + agg_str = "min" else: ix = -1 agg = max + agg_str = "max" def sub_idx(idx, ix, return_index=True): """Like sub-setting pd.index, but preserves freq attribute.""" @@ -309,7 +311,7 @@ def sub_idx(idx, ix, return_index=True): .groupby(level=inst_levels, sort=False) .nth(ix) .iloc[:, -1] - .agg(agg) + .agg(agg_str) ) if return_index: cuttoff_idx = ensure_index([cutoff]) diff --git a/sktime/datatypes/_vectorize.py b/sktime/datatypes/_vectorize.py index 5d6d73e3bef..b10b186d16f 100644 --- a/sktime/datatypes/_vectorize.py +++ b/sktime/datatypes/_vectorize.py @@ -61,9 +61,16 @@ class VectorizedDF: SERIES_SCITYPES = ["Series", "Panel", "Hierarchical"] def __init__( - self, X, y=None, iterate_as="Series", is_scitype="Panel", iterate_cols=False + self, + X, + y=None, + iterate_as="Series", + is_scitype="Panel", + iterate_cols=False, + remember_data=True, ): - self.X = X + if remember_data: + self.X = X if is_scitype is None: _, _, metadata = check_is_scitype( @@ -89,9 +96,15 @@ def __init__( self._check_iterate_cols(iterate_cols) self.iterate_cols = iterate_cols + self.remember_data = remember_data + self.converter_store = dict() - self.X_multiindex = self._init_conversion(X) + X_multiindex = self._init_conversion(X) + self.X_mi_columns = X_multiindex.columns + self.X_mi_index = X_multiindex.index + if remember_data: + self.X_multiindex = X_multiindex self.iter_indices = self._init_iter_indices() self.shape = self._iter_shape() @@ -148,14 +161,14 @@ def _init_iter_indices(self): iterate_as = self.iterate_as is_scitype = self.is_scitype iterate_cols = self.iterate_cols - X = self.X_multiindex + X_ix = self.X_mi_index if iterate_as == is_scitype: row_ix = None elif iterate_as == "Series": - row_ix = X.index.droplevel(-1).unique() + row_ix = X_ix.droplevel(-1).unique() elif iterate_as == "Panel": - row_ix = X.index.droplevel([-1, -2]).unique() + row_ix = X_ix.droplevel([-1, -2]).unique() else: raise RuntimeError( f"unexpected value found for attribute self.iterate_as: {iterate_as}" @@ -163,7 +176,7 @@ def _init_iter_indices(self): ) if iterate_cols: - col_ix = X.columns + col_ix = self.X_mi_columns else: col_ix = None @@ -172,7 +185,7 @@ def _init_iter_indices(self): @property def index(self): """Defaults to pandas index of X converted to pandas type.""" - return self.X_multiindex.index + return self.X_mi_index def get_iter_indices(self): """Get indices that are iterated over in vectorization. @@ -257,11 +270,14 @@ def _iter_cols(inst, group_name=None): yield group_name, None, _enforce_index_freq(inst) iter_levels = self._iter_levels(iterate_as) - is_self_iter = len(iter_levels) == self.X_multiindex.index.nlevels + is_self_iter = len(iter_levels) == self.X_mi_index.nlevels if is_self_iter: yield from _iter_cols(self.X_multiindex) else: + if isinstance(iter_levels, (list, tuple)) and len(iter_levels) == 1: + # single level, groupby expects scalar + iter_levels = iter_levels[0] for name, group in self.X_multiindex.groupby(level=iter_levels, sort=False): yield from _iter_cols(group.droplevel(iter_levels), group_name=name) @@ -285,7 +301,7 @@ def _iter_levels(self, iterate_as): iter_levels = 2 elif iterate_as == "Series": iter_levels = 1 - return list(range(self.X_multiindex.index.nlevels - iter_levels)) + return list(range(self.X_mi_index.nlevels - iter_levels)) def _iter_shape(self, iterate_as=None, iterate_cols=None): """Get the number of groups and columns to iterate over. @@ -306,11 +322,11 @@ def _iter_shape(self, iterate_as=None, iterate_cols=None): iterate_cols = self.iterate_cols iter_levels = self._iter_levels(iterate_as) - is_self_iter = len(iter_levels) == self.X_multiindex.index.nlevels + is_self_iter = len(iter_levels) == self.X_mi_index.nlevels return ( 1 if is_self_iter else self.X_multiindex.groupby(level=iter_levels).ngroups, - len(self.X_multiindex.columns) if iterate_cols else 1, + len(self.X_mi_columns) if iterate_cols else 1, ) def as_list(self): @@ -383,7 +399,7 @@ def _force_flat(df_list): row_ix, col_ix = self.get_iter_indices() force_flat = False if row_ix is None and col_ix is None: - X_mi_reconstructed = self.X_multiindex + X_mi_reconstructed = pd.DataFrame(df_list[0]) elif col_ix is None: X_mi_reconstructed = pd.concat(df_list, keys=row_ix, axis=0) elif row_ix is None: @@ -409,7 +425,7 @@ def _force_flat(df_list): X_mi_reconstructed = pd.concat(col_concats, keys=row_ix, axis=0) X_mi_index = X_mi_reconstructed.index - X_orig_row_index = self.X_multiindex.index + X_orig_row_index = self.X_mi_index flatten = col_multiindex == "flat" or (col_multiindex == "none" and force_flat) if flatten and isinstance(X_mi_reconstructed.columns, pd.MultiIndex): @@ -511,6 +527,7 @@ def vectorize_est( - "None": executes loop sequentally, simple list comprehension - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` - "dask": uses ``dask``, requires ``dask`` package in environment - "dask_lazy": same as "dask", but returns delayed object instead diff --git a/sktime/datatypes/tests/test_check.py b/sktime/datatypes/tests/test_check.py index 975fb8e3cc1..85d93be4e2e 100644 --- a/sktime/datatypes/tests/test_check.py +++ b/sktime/datatypes/tests/test_check.py @@ -127,7 +127,9 @@ def test_check_positive(scitype, mtype, fixture_index): # check fixtures that exist against checks that exist, when full metadata is queried if fixture is not None and check_is_defined: - check_result = check_is_mtype(fixture, mtype, scitype, return_metadata=True) + check_result = check_is_mtype( + fixture, mtype, scitype, return_metadata=True, msg_return_dict="list" + ) if not check_result[0]: msg = ( f"check_is_mtype returns False on scitype {scitype}, mtype {mtype} " @@ -138,7 +140,9 @@ def test_check_positive(scitype, mtype, fixture_index): # check fixtures that exist against checks that exist, when no metadata is queried if fixture is not None and check_is_defined: - check_result = check_is_mtype(fixture, mtype, scitype, return_metadata=[]) + check_result = check_is_mtype( + fixture, mtype, scitype, return_metadata=[], msg_return_dict="list" + ) if not check_result[0]: msg = ( f"check_is_mtype returns False on scitype {scitype}, mtype {mtype} " @@ -213,7 +217,7 @@ def test_check_metadata_inference(scitype, mtype, fixture_index): error if check itself raises an error """ # retrieve fixture for checking - fixture, _, expected_metadata = get_examples( + fixture, lossy, expected_metadata = get_examples( mtype=mtype, as_scitype=scitype, return_metadata=True ).get(fixture_index) @@ -226,6 +230,15 @@ def test_check_metadata_inference(scitype, mtype, fixture_index): # is_equal_index is not fully supported yet in inference EXCLUDE_KEYS = ["is_equal_index"] + # metadata keys to ignore if mtype is lossy + EXCLUDE_IF_LOSSY = [ + "feature_names", # lossy mtypes do not have feature names + ] + + # if mtype is in the list, add mtype specific keys to exclude + if lossy: + EXCLUDE_KEYS += EXCLUDE_IF_LOSSY + if metadata_provided: expected_metadata = expected_metadata.copy() subset_keys = set(expected_metadata.keys()).difference(EXCLUDE_KEYS) @@ -233,7 +246,9 @@ def test_check_metadata_inference(scitype, mtype, fixture_index): # check fixtures that exist against checks that exist, full metadata query if fixture is not None and check_is_defined and metadata_provided: - check_result = check_is_mtype(fixture, mtype, scitype, return_metadata=True) + check_result = check_is_mtype( + fixture, mtype, scitype, return_metadata=True, msg_return_dict="list" + ) metadata = check_result[2] # remove mtype & scitype key if exists, since comparison is on scitype level @@ -242,6 +257,11 @@ def test_check_metadata_inference(scitype, mtype, fixture_index): if "scitype" in metadata: del metadata["scitype"] + # remove keys that are not checked + for key in EXCLUDE_KEYS: + if key in metadata: + del metadata[key] + # currently we do not check this field in metadata inference msg = ( @@ -256,7 +276,11 @@ def test_check_metadata_inference(scitype, mtype, fixture_index): if fixture is not None and check_is_defined and metadata_provided: for metadata_key in subset_keys: check_result = check_is_mtype( - fixture, mtype, scitype, return_metadata=[metadata_key] + fixture, + mtype, + scitype, + return_metadata=[metadata_key], + msg_return_dict="list", ) metadata = check_result[2] @@ -317,7 +341,9 @@ def test_check_negative(scitype, mtype): # check fixtures that exist against checks that exist if fixture_wrong_type is not None and check_is_defined: - assert not check_is_mtype(fixture_wrong_type, mtype, scitype), ( + assert not check_is_mtype( + fixture_wrong_type, mtype, scitype, msg_return_dict="list" + ), ( f"check_is_mtype {mtype} returns True " f"on {wrong_mtype} fixture {i}" ) @@ -325,7 +351,11 @@ def test_check_negative(scitype, mtype): # check fixtures that exist against checks that exist if fixture_wrong_type is not None and check_is_defined: result = check_is_mtype( - fixture_wrong_type, mtype, scitype, return_metadata=[] + fixture_wrong_type, + mtype, + scitype, + return_metadata=[], + msg_return_dict="list", )[0] assert not result, ( f"check_is_mtype {mtype} returns True " diff --git a/sktime/datatypes/tests/test_convert.py b/sktime/datatypes/tests/test_convert.py index 5bd25f98dc4..dded33480a1 100644 --- a/sktime/datatypes/tests/test_convert.py +++ b/sktime/datatypes/tests/test_convert.py @@ -5,7 +5,7 @@ from sktime.datatypes import SCITYPE_REGISTER, scitype_to_mtype from sktime.datatypes._convert import _conversions_defined, convert from sktime.datatypes._examples import get_examples -from sktime.utils._testing.deep_equals import deep_equals +from sktime.utils.deep_equals import deep_equals SCITYPES = [sci[0] for sci in SCITYPE_REGISTER] diff --git a/sktime/datatypes/tests/test_convert_to.py b/sktime/datatypes/tests/test_convert_to.py index c6facdddc4b..246277b31de 100644 --- a/sktime/datatypes/tests/test_convert_to.py +++ b/sktime/datatypes/tests/test_convert_to.py @@ -4,7 +4,7 @@ from sktime.datatypes._convert import convert_to from sktime.datatypes._examples import get_examples -from sktime.utils._testing.deep_equals import deep_equals +from sktime.utils.deep_equals import deep_equals # hard-coded scitypes/mtypes to use in test_convert_to # easy to change in case the strings change diff --git a/sktime/datatypes/tests/test_panel_converters.py b/sktime/datatypes/tests/test_panel_converters.py index 2e3e4d66314..f7971c08988 100644 --- a/sktime/datatypes/tests/test_panel_converters.py +++ b/sktime/datatypes/tests/test_panel_converters.py @@ -167,7 +167,7 @@ def test_from_nested_to_multi_index(n_instances, n_columns, n_timepoints): nested, instance_index="case_id", time_index="reading_id" ) - # n_timepoints_max = nested.applymap(_nested_cell_timepoints).sum().max() + # n_timepoints_max = nested.map(_nested_cell_timepoints).sum().max() assert isinstance(mi_df, pd.DataFrame) assert mi_df.shape == (n_instances * n_timepoints, n_columns) diff --git a/sktime/datatypes/tests/test_utils.py b/sktime/datatypes/tests/test_utils.py index 0d7ef954a42..cbccfb5f70a 100644 --- a/sktime/datatypes/tests/test_utils.py +++ b/sktime/datatypes/tests/test_utils.py @@ -280,7 +280,9 @@ def test_get_window_output_type(scitype, mtype, window_length, lag): # retrieve example fixture fixture = get_examples(mtype=mtype, as_scitype=scitype, return_lossy=False)[0] X = get_window(fixture, window_length=window_length, lag=lag) - valid, err, _ = check_is_mtype(X, mtype=mtype, return_metadata=True) + valid, err, _ = check_is_mtype( + X, mtype=mtype, return_metadata=True, msg_return_dict="list" + ) msg = ( f"get_window should return an output of mtype {mtype} for that type of input, " @@ -352,7 +354,9 @@ def test_get_slice_output_type(scitype, mtype): # retrieve example fixture fixture = get_examples(mtype=mtype, as_scitype=scitype, return_lossy=False)[0] X = get_slice(fixture) - valid, err, _ = check_is_mtype(X, mtype=mtype, return_metadata=True) + valid, err, _ = check_is_mtype( + X, mtype=mtype, return_metadata=True, msg_return_dict="list" + ) msg = ( f"get_slice should return an output of mtype {mtype} for that type of input, " diff --git a/sktime/datatypes/tests/test_vectorize.py b/sktime/datatypes/tests/test_vectorize.py index 3f5dfe0c3be..c822e9ac805 100644 --- a/sktime/datatypes/tests/test_vectorize.py +++ b/sktime/datatypes/tests/test_vectorize.py @@ -10,11 +10,15 @@ from sktime.datatypes._check import AMBIGUOUS_MTYPES, check_is_mtype from sktime.datatypes._examples import get_examples from sktime.datatypes._vectorize import VectorizedDF, _enforce_index_freq -from sktime.utils._testing.deep_equals import deep_equals -from sktime.utils.validation._dependencies import _check_soft_dependencies +from sktime.utils.deep_equals import deep_equals +from sktime.utils.pandas import df_map +from sktime.utils.parallel import _get_parallel_test_fixtures SCITYPES = ["Panel", "Hierarchical"] +# list of parallelization backends to test +BACKENDS = _get_parallel_test_fixtures("estimator") + def _get_all_mtypes_for_scitype(scitype): """Return list of all mtypes for scitype. @@ -228,12 +232,20 @@ def test_item_len(scitype, mtype, fixture_index, iterate_as, iterate_cols): true_length = 1 elif iterate_as == "Series": _, _, metadata = check_is_mtype( - fixture, mtype=mtype, scitype=scitype, return_metadata=True + fixture, + mtype=mtype, + scitype=scitype, + return_metadata=True, + msg_return_dict="list", ) true_length = metadata["n_instances"] elif iterate_as == "Panel": _, _, metadata = check_is_mtype( - fixture, mtype=mtype, scitype=scitype, return_metadata=True + fixture, + mtype=mtype, + scitype=scitype, + return_metadata=True, + msg_return_dict="list", ) true_length = metadata["n_panels"] @@ -330,7 +342,10 @@ def test_series_item_mtype(scitype, mtype, fixture_index, iterate_as, iterate_co raise RuntimeError(f"found unexpected iterate_as value: {iterate_as}") X_list_valid = [ - check_is_mtype(X, mtype=correct_mtype, scitype=iterate_as) for X in X_list + check_is_mtype( + X, mtype=correct_mtype, scitype=iterate_as, msg_return_dict="list" + ) + for X in X_list ] assert np.all( @@ -421,7 +436,7 @@ def test_enforce_index_freq(item, freq): assert item.index.freq == freq -@pytest.mark.parametrize("backend", [None, "loky", "threading", "dask"]) +@pytest.mark.parametrize("backend", BACKENDS) @pytest.mark.parametrize("varname_used", [True, False]) def test_vectorize_est( scitype, mtype, fixture_index, iterate_as, iterate_cols, varname_used, backend @@ -448,10 +463,6 @@ def test_vectorize_est( if not _is_valid_iterate_as(scitype, iterate_as): return None - # escape test for dask backend if dask is not installed - if backend == "dask" and not _check_soft_dependencies("dask", severity="none"): - return None - # retrieve fixture for checking fixture = get_examples(mtype=mtype, as_scitype=scitype).get(fixture_index) X_vect = VectorizedDF( @@ -465,8 +476,10 @@ def test_vectorize_est( else: kwargs["y"] = X_vect + kwargs.update(backend) + est_clones = X_vect.vectorize_est(NaiveForecaster(), method="clone") - result = X_vect.vectorize_est(est_clones, method="fit", backend=backend, **kwargs) + result = X_vect.vectorize_est(est_clones, method="fit", **kwargs) def _len(x): if x is None: @@ -481,5 +494,5 @@ def _len(x): n_cols = _len(cols) assert isinstance(result, pd.DataFrame) assert result.shape == (n_rows, n_cols) - is_fcst_frame = result.applymap(lambda x: isinstance(x, NaiveForecaster)) + is_fcst_frame = df_map(result)(lambda x: isinstance(x, NaiveForecaster)) assert is_fcst_frame.all().all() diff --git a/sktime/dists_kernels/algebra.py b/sktime/dists_kernels/algebra.py index de1784734cf..6fc6f92d03b 100644 --- a/sktime/dists_kernels/algebra.py +++ b/sktime/dists_kernels/algebra.py @@ -53,6 +53,11 @@ class CombinedDistance(_HeterogenousMetaEstimator, BasePairwiseTransformerPanel) """ _tags = { + # packaging info + # -------------- + "authors": ["fkiraly"], + # estimator type + # -------------- "X_inner_mtype": SUPPORTED_MTYPES, "capability:missing_values": True, # can estimator handle missing data? "capability:multivariate": True, # can estimator handle multivariate data? diff --git a/sktime/dists_kernels/base/_base.py b/sktime/dists_kernels/base/_base.py index ce582a43f55..47af48e74d5 100644 --- a/sktime/dists_kernels/base/_base.py +++ b/sktime/dists_kernels/base/_base.py @@ -55,6 +55,8 @@ class BasePairwiseTransformer(BaseEstimator): "capability:missing_values": True, # can estimator handle missing data? "capability:multivariate": True, # can estimator handle multivariate data? "pwtrafo_type": "distance", # type of pw. transformer, "kernel" or "distance" + "authors": "sktime developers", # author(s) of the object + "maintainers": "sktime developers", # current maintainer(s) of the object } def __init__(self): @@ -191,6 +193,8 @@ class BasePairwiseTransformerPanel(BaseEstimator): "capability:multivariate": True, # can estimator handle multivariate data? "capability:unequal_length": True, # can dist handle unequal length panels? "pwtrafo_type": "distance", # type of pw. transformer, "kernel" or "distance" + "authors": "sktime developers", # author(s) of the object + "maintainers": "sktime developers", # current maintainer(s) of the object } def __init__(self): @@ -467,7 +471,7 @@ def transform_diag(self, X): diag = np.zeros(len(X_spl)) for i, X_instance in enumerate(X_spl): - diag[i] = self.transform(X=X_instance) + diag[i] = self.transform(X=X_instance)[0, 0] return diag diff --git a/sktime/dists_kernels/base/adapters/_tslearn.py b/sktime/dists_kernels/base/adapters/_tslearn.py index 9cbfa0c6608..e373ee65dfc 100644 --- a/sktime/dists_kernels/base/adapters/_tslearn.py +++ b/sktime/dists_kernels/base/adapters/_tslearn.py @@ -28,6 +28,11 @@ class _TslearnPwTrafoAdapter: """Base adapter mixin for tslearn distances and kernels.""" _tags = { + # packaging info + # -------------- + "python_dependencies": ["tslearn"], + # estimator type + # -------------- "symmetric": False, # is the transformer symmetric, i.e., t(x,y)=t(y,x) always? "X_inner_mtype": "df-list", # which mtype is used internally in _transform? @@ -35,7 +40,6 @@ class _TslearnPwTrafoAdapter: "capability:missing_values": True, # can estimator handle missing data? "capability:multivariate": True, # can estimator handle multivariate data? "pwtrafo_type": "distance", # type of pw. transformer, "kernel" or "distance" - "python_dependencies": ["tslearn"], } # parameters to pass to the inner tslearn estimator, list of str diff --git a/sktime/dists_kernels/compose.py b/sktime/dists_kernels/compose.py index a23a5ba35a6..ca64f62c998 100644 --- a/sktime/dists_kernels/compose.py +++ b/sktime/dists_kernels/compose.py @@ -59,6 +59,7 @@ class PwTrafoPanelPipeline(_HeterogenousMetaEstimator, BasePairwiseTransformerPa """ _tags = { + "authors": "fkiraly", "X_inner_mtype": SUPPORTED_MTYPES, "capability:missing_values": True, # can estimator handle missing data? "capability:multivariate": True, # can estimator handle multivariate data? diff --git a/sktime/dists_kernels/compose_from_align.py b/sktime/dists_kernels/compose_from_align.py index d9a7c2edabd..07790ad97d0 100644 --- a/sktime/dists_kernels/compose_from_align.py +++ b/sktime/dists_kernels/compose_from_align.py @@ -19,6 +19,7 @@ class DistFromAligner(BasePairwiseTransformerPanel): """ _tags = { + "authors": ["fkiraly"], "symmetric": True, # all the distances are symmetric "capability:unequal_length": True, # aligners can usually handle unequal length } diff --git a/sktime/dists_kernels/compose_tab_to_panel.py b/sktime/dists_kernels/compose_tab_to_panel.py index 805f030ce2a..4f2f29c2c1f 100644 --- a/sktime/dists_kernels/compose_tab_to_panel.py +++ b/sktime/dists_kernels/compose_tab_to_panel.py @@ -14,7 +14,7 @@ BasePairwiseTransformer, BasePairwiseTransformerPanel, ) -from sktime.utils._testing.deep_equals import deep_equals +from sktime.utils.deep_equals import deep_equals class AggrDist(BasePairwiseTransformerPanel): @@ -64,6 +64,8 @@ class AggrDist(BasePairwiseTransformerPanel): >>> mean_gaussian_tskernel = AggrDist(RBF()) """ + _tags = {"authors": "fkiraly"} + def __init__( self, transformer, @@ -195,6 +197,7 @@ class FlatDist(BasePairwiseTransformerPanel): """ _tags = { + "authors": "fkiraly", "X_inner_mtype": "numpy3D", # which mtype is used internally in _transform? "capability:unequal_length": False, } diff --git a/sktime/dists_kernels/ctw.py b/sktime/dists_kernels/ctw.py index 532810201d7..1b3e62b7316 100644 --- a/sktime/dists_kernels/ctw.py +++ b/sktime/dists_kernels/ctw.py @@ -50,7 +50,16 @@ class CtwDistTslearn(_TslearnPwTrafoAdapter, BasePairwiseTransformerPanel): human behavior". NIPS 2009. """ - _tags = {"symmetric": True, "pwtrafo_type": "distance"} + _tags = { + # packaging info + # -------------- + "authors": ["fkiraly"], + "python_dependencies": ["tslearn"], + # estimator type + # -------------- + "symmetric": True, + "pwtrafo_type": "distance", + } def __init__( self, diff --git a/sktime/dists_kernels/dist_to_kern.py b/sktime/dists_kernels/dist_to_kern.py index a6a0422f40b..ccdc03c735b 100644 --- a/sktime/dists_kernels/dist_to_kern.py +++ b/sktime/dists_kernels/dist_to_kern.py @@ -44,6 +44,7 @@ class KernelFromDist(BasePairwiseTransformerPanel): """ _tags = { + "authors": "fkiraly", "X_inner_mtype": SUPPORTED_MTYPES, "capability:missing_values": True, # can estimator handle missing data? "capability:multivariate": True, # can estimator handle multivariate data? @@ -175,6 +176,7 @@ class DistFromKernel(BasePairwiseTransformerPanel): """ _tags = { + "authors": "fkiraly", "X_inner_mtype": SUPPORTED_MTYPES, "capability:missing_values": True, # can estimator handle missing data? "capability:multivariate": True, # can estimator handle multivariate data? diff --git a/sktime/dists_kernels/dtw/_dtw_python.py b/sktime/dists_kernels/dtw/_dtw_python.py index adc448fe74b..ed67db4c482 100644 --- a/sktime/dists_kernels/dtw/_dtw_python.py +++ b/sktime/dists_kernels/dtw/_dtw_python.py @@ -38,13 +38,18 @@ class DtwPythonDist(_DelegatedPairwiseTransformerPanel): """ _tags = { + # packaging info + # -------------- + "authors": ["fkiraly"], + "python_dependencies": "dtw-python", + "python_dependencies_alias": {"dtw-python": "dtw"}, + # estimator type + # -------------- "pwtrafo_type": "distance", # type of pw. transformer, "kernel" or "distance" "symmetric": True, # all the distances are symmetric "capability:multivariate": True, # can estimator handle multivariate data? "capability:unequal_length": True, # can dist handle unequal length panels? "X_inner_mtype": "df-list", - "python_dependencies": "dtw-python", - "python_dependencies_alias": {"dtw-python": "dtw"}, } def __init__( diff --git a/sktime/dists_kernels/dtw/_dtw_sktime.py b/sktime/dists_kernels/dtw/_dtw_sktime.py index 67d03860286..4370a43e795 100644 --- a/sktime/dists_kernels/dtw/_dtw_sktime.py +++ b/sktime/dists_kernels/dtw/_dtw_sktime.py @@ -123,9 +123,14 @@ class DtwDist(BasePairwiseTransformerPanel): """ _tags = { + # packaging info + # -------------- + "authors": ["chrisholder", "TonyBagnall", "fkiraly"], + "python_dependencies": "numba", + # estimator type + # -------------- "symmetric": True, # all the distances are symmetric "X_inner_mtype": "numpy3D", - "python_dependencies": "numba", } def __init__( diff --git a/sktime/dists_kernels/dtw/_dtw_tslearn.py b/sktime/dists_kernels/dtw/_dtw_tslearn.py index 229228af11d..b004582d163 100644 --- a/sktime/dists_kernels/dtw/_dtw_tslearn.py +++ b/sktime/dists_kernels/dtw/_dtw_tslearn.py @@ -50,7 +50,16 @@ class DtwDistTslearn(_TslearnPwTrafoAdapter, BasePairwiseTransformerPanel): Signal Processing, vol. 26(1), pp. 43--49, 1978. """ - _tags = {"symmetric": True, "pwtrafo_type": "distance"} + _tags = { + # packaging info + # -------------- + "authors": ["fkiraly"], + "python_dependencies": ["tslearn"], + # estimator type + # -------------- + "symmetric": True, + "pwtrafo_type": "distance", + } def __init__( self, diff --git a/sktime/dists_kernels/dummy.py b/sktime/dists_kernels/dummy.py index 9508987ad7f..8044cf32a50 100644 --- a/sktime/dists_kernels/dummy.py +++ b/sktime/dists_kernels/dummy.py @@ -19,6 +19,7 @@ class ConstantPwTrafoPanel(BasePairwiseTransformerPanel): """ _tags = { + "authors": "fkiraly", "X_inner_mtype": SUPPORTED_MTYPES, "capability:missing_values": True, # can estimator handle missing data? "capability:multivariate": True, # can estimator handle multivariate data? diff --git a/sktime/dists_kernels/edit_dist.py b/sktime/dists_kernels/edit_dist.py index 55d9f5d0bd3..96c489a3154 100644 --- a/sktime/dists_kernels/edit_dist.py +++ b/sktime/dists_kernels/edit_dist.py @@ -115,9 +115,14 @@ class EditDist(BasePairwiseTransformerPanel): """ _tags = { + # packaging info + # -------------- + "authors": ["chrisholder", "TonyBagnall", "fkiraly"], + "python_dependencies": "numba", + # estimator type + # -------------- "symmetric": True, # all the distances are symmetric "X_inner_mtype": "numpy3D", - "python_dependencies": "numba", } ALLOWED_DISTANCE_STR = ["lcss", "edr", "erp", "twe"] diff --git a/sktime/dists_kernels/gak.py b/sktime/dists_kernels/gak.py index 84fcbef753a..75f0f1ee6c6 100644 --- a/sktime/dists_kernels/gak.py +++ b/sktime/dists_kernels/gak.py @@ -34,7 +34,16 @@ class GAKernel(_TslearnPwTrafoAdapter, BasePairwiseTransformerPanel): .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011. """ - _tags = {"symmetric": True, "pwtrafo_type": "kernel"} + _tags = { + # packaging info + # -------------- + "authors": ["fkiraly"], + "python_dependencies": ["tslearn"], + # estimator type + # -------------- + "symmetric": True, + "pwtrafo_type": "kernel", + } def __init__( self, diff --git a/sktime/dists_kernels/indep.py b/sktime/dists_kernels/indep.py index 5440c4a306f..b1db0648d03 100644 --- a/sktime/dists_kernels/indep.py +++ b/sktime/dists_kernels/indep.py @@ -59,6 +59,11 @@ class IndepDist(BasePairwiseTransformerPanel): """ # noqa: E501 _tags = { + # packaging info + # -------------- + "authors": "fkiraly", + # estimator type + # -------------- "X_inner_mtype": SUPPORTED_MTYPES, "capability:missing_values": True, # can estimator handle missing data? "capability:multivariate": True, # can estimator handle multivariate data? diff --git a/sktime/dists_kernels/lcss.py b/sktime/dists_kernels/lcss.py index a505cbd04a6..cf02f71de8b 100644 --- a/sktime/dists_kernels/lcss.py +++ b/sktime/dists_kernels/lcss.py @@ -44,7 +44,16 @@ class LcssTslearn(_TslearnPwTrafoAdapter, BasePairwiseTransformerPanel): IEEE Computer Society, USA, 673. """ - _tags = {"symmetric": True, "pwtrafo_type": "distance"} + _tags = { + # packaging info + # -------------- + "authors": ["fkiraly"], + "python_dependencies": ["tslearn"], + # estimator type + # -------------- + "symmetric": True, + "pwtrafo_type": "distance", + } _is_cdist = False diff --git a/sktime/dists_kernels/lucky.py b/sktime/dists_kernels/lucky.py index 00ed51605f9..ce6bafa6cab 100644 --- a/sktime/dists_kernels/lucky.py +++ b/sktime/dists_kernels/lucky.py @@ -26,6 +26,11 @@ class LuckyDtwDist(_DelegatedPairwiseTransformerPanel): """ _tags = { + # packaging info + # -------------- + "authors": ["fkiraly", "Kristian A Buza"], + # estimator type + # -------------- "symmetric": True, # is the transformer symmetric, i.e., t(x,y)=t(y,x) always? "capability:missing_values": False, # can estimator handle missing data? "capability:multivariate": True, # can estimator handle multivariate data? diff --git a/sktime/dists_kernels/scipy_dist.py b/sktime/dists_kernels/scipy_dist.py index bece6f361c1..bdda0a6da39 100644 --- a/sktime/dists_kernels/scipy_dist.py +++ b/sktime/dists_kernels/scipy_dist.py @@ -52,6 +52,7 @@ class ScipyDist(BasePairwiseTransformer): """ _tags = { + "authors": "fkiraly", "symmetric": True, # all the distances are symmetric } diff --git a/sktime/dists_kernels/signature_kernel.py b/sktime/dists_kernels/signature_kernel.py index 267f4a3f154..1d0d9962222 100644 --- a/sktime/dists_kernels/signature_kernel.py +++ b/sktime/dists_kernels/signature_kernel.py @@ -903,7 +903,11 @@ class SignatureKernel(BasePairwiseTransformerPanel): Journal of Machine Learning Research. """ - _tags = {"X_inner_mtype": "numpy3D", "pwtrafo_type": "kernel"} + _tags = { + "authors": "fkiraly", + "X_inner_mtype": "numpy3D", + "pwtrafo_type": "kernel", + } def __init__( self, diff --git a/sktime/forecasting/adapters/_hcrystalball.py b/sktime/forecasting/adapters/_hcrystalball.py index 248c28aca16..3bd290b792c 100644 --- a/sktime/forecasting/adapters/_hcrystalball.py +++ b/sktime/forecasting/adapters/_hcrystalball.py @@ -106,10 +106,16 @@ class HCrystalBallAdapter(BaseForecaster): """ _tags = { + # packaging info + # -------------- + "authors": "MichalChromcak", + "maintainers": "MichalChromcak", + "python_dependencies": "hcrystalball", + # estimator type + # -------------- "ignores-exogeneous-X": True, "requires-fh-in-fit": False, "handles-missing-data": False, - "python_dependencies": "hcrystalball", } def __init__(self, model): diff --git a/sktime/forecasting/all/__init__.py b/sktime/forecasting/all/__init__.py index 2d72d78b827..7ac4e1a8e9f 100644 --- a/sktime/forecasting/all/__init__.py +++ b/sktime/forecasting/all/__init__.py @@ -11,13 +11,6 @@ from sktime.datasets import load_airline, load_longley, load_lynx, load_shampoo_sales from sktime.forecasting.base import ForecastingHorizon from sktime.forecasting.model_evaluation import evaluate -from sktime.forecasting.model_selection import ( - CutoffSplitter, - ExpandingWindowSplitter, - SingleWindowSplitter, - SlidingWindowSplitter, - temporal_train_test_split, -) from sktime.performance_metrics.forecasting import ( GeometricMeanRelativeAbsoluteError, GeometricMeanRelativeSquaredError, @@ -58,6 +51,13 @@ relative_loss, ) from sktime.registry import all_estimators +from sktime.split import ( + CutoffSplitter, + ExpandingWindowSplitter, + SingleWindowSplitter, + SlidingWindowSplitter, + temporal_train_test_split, +) from sktime.transformations.series.detrend import Deseasonalizer, Detrender from sktime.utils.plotting import plot_series diff --git a/sktime/forecasting/arch/_statsforecast_arch.py b/sktime/forecasting/arch/_statsforecast_arch.py index 9a19f5bb421..e27ca9de3e2 100644 --- a/sktime/forecasting/arch/_statsforecast_arch.py +++ b/sktime/forecasting/arch/_statsforecast_arch.py @@ -33,6 +33,12 @@ class StatsForecastGARCH(_GeneralisedStatsForecastAdapter): """ _tags = { + # packaging info + # -------------- + "authors": ["eyjo"], + "maintainers": ["eyjo"], + # estimator type + # -------------- "ignores-exogeneous-X": False, "capability:pred_int": True, "capability:pred_int:insample": True, diff --git a/sktime/forecasting/arch/_uarch.py b/sktime/forecasting/arch/_uarch.py index c637902150e..f23700a61d4 100644 --- a/sktime/forecasting/arch/_uarch.py +++ b/sktime/forecasting/arch/_uarch.py @@ -152,12 +152,18 @@ class ARCH(BaseForecaster): """ _tags = { + # packaging info + # -------------- + "authors": "Vasudeva-bit", + "maintainers": "Vasudeva-bit", + "python_dependencies": "arch", + # estimator type + # -------------- "scitype:y": "univariate", "y_inner_mtype": "pd.Series", "X_inner_mtype": "pd.DataFrame", "requires-fh-in-fit": False, "handles-missing-data": False, - "python_dependencies": "arch", "capability:pred_int": True, "ignores-exogeneous-X": True, } @@ -251,6 +257,7 @@ def _fit(self, y, X=None, fh=None): if fh: self._horizon = fh + y_name = y.name self._forecaster = _ARCH( y=y, x=X, @@ -277,6 +284,7 @@ def _fit(self, y, X=None, fh=None): options=self.options, backcast=self.backcast, ) + y.name = y_name return self def _get_arch_result_object(self, fh=None, X=None): @@ -344,7 +352,7 @@ def _predict(self, fh, X=None): y_pred = pd.Series( ArchResultObject.mean.values[-1], index=full_range, - name=str(self._y.name), + name=self._y.name, ) y_pred = y_pred.loc[abs_idx.to_pandas()] y_pred.index = self._horizon.to_absolute_index(self.cutoff) @@ -398,11 +406,15 @@ def _predict_interval(self, fh, X, coverage): upper_int = mean_forecast + (z_critical * std_err) lower_df = pd.DataFrame( lower_int, - columns=[y_col_name + " " + str(alpha) + " " + "lower"], + columns=[ + y_col_name if y_col_name else "0" + " " + str(alpha) + " " + "lower" + ], ) upper_df = pd.DataFrame( upper_int, - columns=[y_col_name + " " + str(alpha) + " " + "upper"], + columns=[ + y_col_name if y_col_name else "0" + " " + str(alpha) + " " + "upper" + ], ) df_list.append(pd.concat((lower_df, upper_df), axis=1)) concat_df = pd.concat(df_list, axis=1) @@ -410,7 +422,7 @@ def _predict_interval(self, fh, X, coverage): OrderedDict.fromkeys( [ col_df - for col in y_col_name + for col in (y_col_name if y_col_name else "0") for col_df in concat_df.columns if col in col_df ] @@ -425,7 +437,7 @@ def _predict_interval(self, fh, X, coverage): final_columns = list( itertools.product( *[ - [y_col_name], + [y_col_name if y_col_name else 0], coverage, df.columns.get_level_values(2).unique(), ] diff --git a/sktime/forecasting/ardl.py b/sktime/forecasting/ardl.py index 3f678d706f8..6950f290cdb 100644 --- a/sktime/forecasting/ardl.py +++ b/sktime/forecasting/ardl.py @@ -193,6 +193,13 @@ class ARDL(_StatsModelsAdapter): """ _tags = { + # packaging info + # -------------- + "authors": "kcc-lion", + "maintainers": "kcc-lion", + "python_dependencies": "statsmodels>=0.13.0", + # estimator type + # -------------- "scitype:y": "univariate", # which y are fine? univariate/multivariate/both "ignores-exogeneous-X": False, # does estimator ignore the exogeneous X? "handles-missing-data": False, # can estimator handle missing data? @@ -202,8 +209,6 @@ class ARDL(_StatsModelsAdapter): "X-y-must-have-same-index": True, # can estimator handle different X/y index? "enforce_index_type": None, # index type that needs to be enforced in X/y "capability:pred_int": False, # does forecaster implement proba forecasts? - "python_version": None, # PEP 440 python version specifier to limit versions - "python_dependencies": "statsmodels>=0.13.0", } def __init__( diff --git a/sktime/forecasting/arima/__init__.py b/sktime/forecasting/arima/__init__.py new file mode 100644 index 00000000000..e248ffb30b4 --- /dev/null +++ b/sktime/forecasting/arima/__init__.py @@ -0,0 +1,10 @@ +"""Time series forecasting with ARIMA models.""" + +__all__ = [ + "AutoARIMA", + "ARIMA", + "StatsModelsARIMA", +] + +from sktime.forecasting.arima._pmdarima import ARIMA, AutoARIMA +from sktime.forecasting.arima._statsmodels import StatsModelsARIMA diff --git a/sktime/forecasting/arima.py b/sktime/forecasting/arima/_pmdarima.py similarity index 97% rename from sktime/forecasting/arima.py rename to sktime/forecasting/arima/_pmdarima.py index bb45f9816ca..9b88479a6c7 100644 --- a/sktime/forecasting/arima.py +++ b/sktime/forecasting/arima/_pmdarima.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -u # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) -"""Implements autoregressive integrated moving average (ARIMA) models.""" +"""Interface to ARIMA and AutoARIMA models from pmdarima package.""" __author__ = ["mloning", "hyang1996", "fkiraly", "ilkersigirci"] __all__ = ["AutoARIMA", "ARIMA"] @@ -9,7 +9,7 @@ class AutoARIMA(_PmdArimaAdapter): - """Wrapper of the pmdarima implementation of fitting Auto-(S)ARIMA(X) models. + """Auto-(S)ARIMA(X) forecaster, from pmdarima package. Includes automated fitting of (S)ARIMA(X) hyper-parameters (p, d, q, P, D, Q). @@ -248,6 +248,7 @@ class AutoARIMA(_PmdArimaAdapter): See Also -------- ARIMA + StatsForecastAutoARIMA References ---------- @@ -264,7 +265,16 @@ class AutoARIMA(_PmdArimaAdapter): >>> y_pred = forecaster.predict(fh=[1,2,3]) # doctest: +SKIP """ # noqa: E501 - _tags = {"handles-missing-data": True} + _tags = { + # packaging info + # -------------- + "authors": ["mloning", "hyang1996", "fkiraly", "ilkersigirci"], + "maintainers": ["hyang1996"], + # python_dependencies: "pmdarima" - inherited from _PmdArimaAdapter + # estimator type + # -------------- + "handles-missing-data": True, + } SARIMAX_KWARGS_KEYS = [ "time_varying_regression", @@ -478,7 +488,7 @@ def get_test_params(cls, parameter_set="default"): class ARIMA(_PmdArimaAdapter): - """Wrapper of the pmdarima implementation of fitting (S)ARIMA(X) models. + """(S)ARIMA(X) forecaster, from pmdarima package. Exposes `pmdarima.arima.ARIMA` [1]_ under the `sktime` interface. Seasonal ARIMA models and exogeneous input is supported, hence this estimator is @@ -661,7 +671,11 @@ def foo_loss(y_true, y_pred) >>> y_pred = forecaster.predict(fh=[1,2,3]) # doctest: +SKIP """ # noqa: E501 - _tags = {"handles-missing-data": True} + _tags = { + "authors": ["mloning", "hyang1996", "fkiraly", "ilkersigirci"], + "maintainers": ["hyang1996"], + "handles-missing-data": True, + } SARIMAX_KWARGS_KEYS = [ "time_varying_regression", diff --git a/sktime/forecasting/arima/_statsmodels.py b/sktime/forecasting/arima/_statsmodels.py new file mode 100644 index 00000000000..4325604f8d6 --- /dev/null +++ b/sktime/forecasting/arima/_statsmodels.py @@ -0,0 +1,327 @@ +# !/usr/bin/env python3 -u +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Interface to ARIMA from statsmodels package.""" + +__all__ = ["StatsModelsARIMA"] +__author__ = ["arnaujc91"] + +from typing import Iterable, Optional, Tuple, Union + +import numpy as np +import pandas as pd + +from sktime.forecasting.base.adapters import _StatsModelsAdapter + + +class StatsModelsARIMA(_StatsModelsAdapter): + """ARIMA forecaster, from statsmodels package. + + Direct interface for `statsmodels.tsa.arima.model.ARIMA`. + + Parameters + ---------- + order : tuple, optional + The (p,d,q) order of the model for the autoregressive, differences, and + moving average components. d is always an integer, while p and q may + either be integers or lists of integers. + seasonal_order : tuple, optional + The (P,D,Q,s) order of the seasonal component of the model for the + AR parameters, differences, MA parameters, and periodicity. Default + is (0, 0, 0, 0). D and s are always integers, while P and Q + may either be integers or lists of positive integers. + trend : str{'n','c','t','ct'} or iterable, optional + Parameter controlling the deterministic trend. Can be specified as a + string where 'c' indicates a constant term, 't' indicates a + linear trend in time, and 'ct' includes both. Can also be specified as + an iterable defining a polynomial, as in `numpy.poly1d`, where + `[1,1,0,1]` would denote :math:`a + bt + ct^3`. Default is 'c' for + models without integration, and no trend for models with integration. + Note that all trend terms are included in the model as exogenous + regressors, which differs from how trends are included in ``SARIMAX`` + models. See the Notes section for a precise definition of the + treatment of trend terms. + enforce_stationarity : bool, optional + Whether or not to require the autoregressive parameters to correspond + to a stationarity process. + enforce_invertibility : bool, optional + Whether or not to require the moving average parameters to correspond + to an invertible process. + concentrate_scale : bool, optional + Whether or not to concentrate the scale (variance of the error term) + out of the likelihood. This reduces the number of parameters by one. + This is only applicable when considering estimation by numerical + maximum likelihood. + trend_offset : int, optional + The offset at which to start time trend values. Default is 1, so that + if `trend='t'` the trend is equal to 1, 2, ..., nobs. Typically is only + set when the model created by extending a previous dataset. + dates : array_like of datetime, optional + If no index is given by `endog` or `exog`, an array-like object of + datetime objects can be provided. + freq : str, optional + If no index is given by `endog` or `exog`, the frequency of the + time-series may be specified here as a Pandas offset or offset string. + missing : str + Available options are 'none', 'drop', and 'raise'. If 'none', no nan + checking is done. If 'drop', any observations with nans are dropped. + If 'raise', an error is raised. Default is 'none'. + start_params : array_like, optional + Initial guess of the solution for the loglikelihood maximization. + If None, the default is given by Model.start_params. + transformed : bool, optional + Whether or not `start_params` is already transformed. Default is + True. + includes_fixed : bool, optional + If parameters were previously fixed with the `fix_params` method, + this argument describes whether or not `start_params` also includes + the fixed parameters, in addition to the free parameters. Default + is False. + method : str, optional + The method used for estimating the parameters of the model. Valid + options include 'statespace', 'innovations_mle', 'hannan_rissanen', + 'burg', 'innovations', and 'yule_walker'. Not all options are + available for every specification (for example 'yule_walker' can + only be used with AR(p) models). + method_kwargs : dict, optional + Arguments to pass to the fit function for the parameter estimator + described by the `method` argument. + gls : bool, optional + Whether or not to use generalized least squares (GLS) to estimate + regression effects. The default is False if `method='statespace'` + and is True otherwise. + gls_kwargs : dict, optional + Arguments to pass to the GLS estimation fit method. Only applicable + if GLS estimation is used (see `gls` argument for details). + cov_type : str, optional + The `cov_type` keyword governs the method for calculating the + covariance matrix of parameter estimates. Can be one of: + + - 'opg' for the outer product of gradient estimator + - 'oim' for the observed information matrix estimator, calculated + using the method of Harvey (1989) + - 'approx' for the observed information matrix estimator, + calculated using a numerical approximation of the Hessian matrix. + - 'robust' for an approximate (quasi-maximum likelihood) covariance + matrix that may be valid even in the presence of some + misspecifications. Intermediate calculations use the 'oim' + method. + - 'robust_approx' is the same as 'robust' except that the + intermediate calculations use the 'approx' method. + - 'none' for no covariance matrix calculation. + + Default is 'opg' unless memory conservation is used to avoid + computing the loglikelihood values for each observation, in which + case the default is 'oim'. + cov_kwds : dict or None, optional + A dictionary of arguments affecting covariance matrix computation. + + **opg, oim, approx, robust, robust_approx** + + - 'approx_complex_step' : bool, optional - If True, numerical + approximations are computed using complex-step methods. If False, + numerical approximations are computed using finite difference + methods. Default is True. + - 'approx_centered' : bool, optional - If True, numerical + approximations computed using finite difference methods use a + centered approximation. Default is False. + return_params : bool, optional + Whether or not to return only the array of maximizing parameters. + Default is False. + low_memory : bool, optional + If set to True, techniques are applied to substantially reduce + memory usage. If used, some features of the results object will + not be available (including smoothed results and in-sample + prediction), although out-of-sample forecasting is possible. + Default is False. + + See Also + -------- + ARIMA + SARIMAX + AutoARIMA + StatsForecastAutoARIMA + + Examples + -------- + >>> from sktime.datasets import load_airline + >>> from sktime.forecasting.arima import StatsModelsARIMA + >>> y = load_airline() + >>> forecaster = StatsModelsARIMA(order=(0, 0, 12)) # doctest: +SKIP + >>> forecaster.fit(y) # doctest: +SKIP + >>> y_pred = forecaster.predict(fh=[1,2,3]) # doctest: +SKIP + """ + + _tags = { + # packaging info + # -------------- + "authors": ["arnaujc91"], + "maintainers": ["arnaujc91"], + "ignores-exogeneous-X": False, + "capability:pred_int": True, + "capability:pred_int:insample": True, + "python_dependencies": ["statsmodels"], + } + + def __init__( + self, + order: Tuple[int, int, int] = (0, 0, 0), + seasonal_order: Tuple[int, int, int, int] = (0, 0, 0, 0), + trend: Optional[Union[str, Iterable]] = None, + enforce_stationarity: bool = True, + enforce_invertibility: bool = True, + concentrate_scale: bool = False, + trend_offset: int = 1, + dates: Optional[np.ndarray] = None, + freq: Optional[str] = None, + missing: Optional[str] = None, + validate_specification: bool = True, + start_params: Optional[np.ndarray] = None, + transformed: bool = True, + includes_fixed: bool = False, + method: Optional[str] = None, + method_kwargs: Optional[dict] = None, + gls: bool = False, + gls_kwargs: Optional[dict] = None, + cov_type: str = "opg", + cov_kwds: Optional[dict] = None, + return_params: bool = False, + low_memory: bool = False, + ): + self.order = order + self.seasonal_order = seasonal_order + self.trend = trend + self.enforce_stationarity = enforce_stationarity + self.enforce_invertibility = enforce_invertibility + self.concentrate_scale = concentrate_scale + self.trend_offset = trend_offset + self.dates = dates + self.freq = freq + self.missing = missing + self.validate_specification = validate_specification + + # Fit params + self.start_params = start_params + self.transformed = transformed + self.includes_fixed = includes_fixed + self.method = method + self.method_kwargs = method_kwargs + self.gls = gls + self.gls_kwargs = gls_kwargs + self.cov_type = cov_type + self.cov_kwds = cov_kwds + self.return_params = return_params + self.low_memory = low_memory + + super().__init__() + + def _fit_forecaster(self, y, X=None): + from statsmodels.tsa.arima.model import ARIMA as _ARIMA + + self._forecaster = _ARIMA( + endog=y, + exog=X, + order=self.order, + seasonal_order=self.seasonal_order, + trend=self.trend, + enforce_stationarity=self.enforce_stationarity, + enforce_invertibility=self.enforce_invertibility, + concentrate_scale=self.concentrate_scale, + trend_offset=self.trend_offset, + dates=self.dates, + freq=self.freq, + missing=self.missing, + validate_specification=self.validate_specification, + ) + self._fitted_forecaster = self._forecaster.fit( + start_params=self.start_params, + transformed=self.transformed, + includes_fixed=self.includes_fixed, + method=self.method, + method_kwargs=self.method_kwargs, + gls=self.gls, + gls_kwargs=self.gls_kwargs, + cov_type=self.cov_type, + cov_kwds=self.cov_kwds, + return_params=self.return_params, + low_memory=self.low_memory, + ) + + def summary(self): + """Get a summary of the fitted forecaster. + + This is the same as the implementation in statsmodels: + https://www.statsmodels.org/dev/examples/notebooks/generated/statespace_structural_harvey_jaeger.html + """ + return self._fitted_forecaster.summary() + + @staticmethod + def _extract_conf_int(prediction_results, alpha) -> pd.DataFrame: + """Construct confidence interval at specified `alpha` for each timestep. + + Parameters + ---------- + prediction_results : PredictionResults + results class, as returned by ``self._fitted_forecaster.get_prediction`` + alpha : float + one minus nominal coverage + + Returns + ------- + pd.DataFrame + confidence intervals at each timestep + + The dataframe must have at least two columns ``lower`` and ``upper``, and + the row indices must be integers relative to ``self.cutoff``. Order of + columns do not matter, and row indices must be a superset of relative + integer horizon of ``fh``. + """ + conf_int = prediction_results.conf_int(alpha=alpha) + conf_int.columns = ["lower", "upper"] + + return conf_int + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are currently no reserved values for forecasters. + + Returns + ------- + params : list of dict, default = [] + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + return [ + { + "order": (0, 1, 2), + "trend": "n", + "enforce_stationarity": False, + "enforce_invertibility": False, + "concentrate_scale": True, + "method": "statespace", + }, + { + "order": (1, 1, 2), + "trend": "t", + "enforce_stationarity": False, + "enforce_invertibility": False, + "method": "statespace", + }, + { + "order": (0, 0, 1), + "trend": "ct", + "seasonal_order": (1, 0, 1, 2), + "cov_type": "opg", + "gls": True, + "method": "statespace", + }, + {"cov_type": "robust", "gls": True, "method": "burg"}, + ] diff --git a/sktime/forecasting/auto_reg.py b/sktime/forecasting/auto_reg.py index c27ab585c6c..dd56bfeae64 100644 --- a/sktime/forecasting/auto_reg.py +++ b/sktime/forecasting/auto_reg.py @@ -93,13 +93,18 @@ class AutoREG(_StatsModelsAdapter): """ _tags = { + # packaging info + # -------------- + "authors": ["jonathanbechtel", "mgazian000", "CTFallon"], + "maintainers": ["jonathanbechtel", "mgazian000", "CTFallon"], + "python_dependencies": "statsmodels>=0.13.0", + # estimator type + # -------------- "y_inner_mtype": "pd.Series", "X_inner_mtype": "pd.DataFrame", "scitype:y": "univariate", "ignores-exogeneous-X": False, "requires-fh-in-fit": False, - "python_version": None, - "python_dependencies": "statsmodels>=0.13.0", } def __init__( diff --git a/sktime/forecasting/base/__init__.py b/sktime/forecasting/base/__init__.py index a101e3a62cc..9ab500c8094 100644 --- a/sktime/forecasting/base/__init__.py +++ b/sktime/forecasting/base/__init__.py @@ -1,4 +1,3 @@ -# !/usr/bin/env python3 -u # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) """Implements base classes for forecasting in sktime.""" diff --git a/sktime/forecasting/base/_base.py b/sktime/forecasting/base/_base.py index 46d22661732..acc6e58227d 100644 --- a/sktime/forecasting/base/_base.py +++ b/sktime/forecasting/base/_base.py @@ -45,6 +45,7 @@ class name: BaseForecaster from sktime.base import BaseEstimator from sktime.datatypes import ( VectorizedDF, + check_is_error_msg, check_is_scitype, convert_to, get_cutoff, @@ -82,6 +83,14 @@ class BaseForecaster(BaseEstimator): # default tag values - these typically make the "safest" assumption # for more extensive documentation, see extension_templates/forecasting.py _tags = { + # packaging info + # -------------- + "authors": "sktime developers", # author(s) of the object + "maintainers": "sktime developers", # current maintainer(s) of the object + "python_version": None, # PEP 440 python version specifier to limit versions + "python_dependencies": None, # str or list of str, package soft dependencies + # estimator type + # -------------- "object_type": "forecaster", # type of object "scitype:y": "univariate", # which y are fine? univariate/multivariate/both "ignores-exogeneous-X": False, # does estimator ignore the exogeneous X? @@ -95,41 +104,30 @@ class BaseForecaster(BaseEstimator): "X-y-must-have-same-index": True, # can estimator handle different X/y index? "enforce_index_type": None, # index type that needs to be enforced in X/y "fit_is_empty": False, # is fit empty and can be skipped? - "python_version": None, # PEP 440 python version specifier to limit versions - "python_dependencies": None, # str or list of str, package soft dependencies } # configs and default config values + # see set_config documentation for details _config = { "backend:parallel": None, # parallelization backend for broadcasting # {None, "dask", "loky", "multiprocessing", "threading"} # None: no parallelization # "loky", "multiprocessing" and "threading": uses `joblib` Parallel loops + # "joblib": uses custom joblib backend, set via `joblib_backend` tag # "dask": uses `dask`, requires `dask` package in environment "backend:parallel:params": None, # params for parallelization backend + "remember_data": True, # whether to remember data in fit - self._X, self._y } _config_doc = { - "backend:parallel": """ - backend:parallel : str, optional, default="None" - backend to use for parallelization when broadcasting/vectorizing, one of - - - "None": executes loop sequentally, simple list comprehension - - "loky", "multiprocessing" and "threading": uses ``joblib`` ``Parallel`` - - "dask": uses ``dask``, requires ``dask`` package in environment - """, - "backend:parallel:params": """ - backend:parallel:params : dict, optional, default={} (no parameters passed) - additional parameters passed to the parallelization backend as config. - Valid keys depend on the value of ``backend:parallel``: - - - "None": no additional parameters, ``backend_params`` is ignored - - "loky", "multiprocessing" and "threading": - any valid keys for ``joblib.Parallel`` can be passed here, - e.g., ``n_jobs``, with the exception of ``backend`` which is directly - controlled by ``backend:parallel`` - - "dask": any valid keys for ``dask.compute`` - can be passed, e.g., ``scheduler`` + "remember_data": """ + remember_data : bool, default=True + whether self._X and self._y are stored in fit, and updated + in update. If True, self._X and self._y are stored and updated. + If False, self._X and self._y are not stored and updated. + This reduces serialization size when using save, + but the update will default to "do nothing" rather than + "refit to all data seen". """, } @@ -390,11 +388,7 @@ def fit(self, y, X=None, fh=None): return self - def predict( - self, - fh=None, - X=None, - ): + def predict(self, fh=None, X=None): """Forecast time series at future horizon. State required: @@ -443,16 +437,20 @@ def predict( # convert to output mtype, identical with last y mtype seen y_out = convert_to( y_pred, - self._y_mtype_last_seen, + self._y_metadata["mtype"], store=self._converter_store_y, store_behaviour="freeze", ) return y_out - def fit_predict(self, y, X=None, fh=None): + def fit_predict(self, y, X=None, fh=None, X_pred=None): """Fit and forecast time series at future horizon. + Same as ``fit(y, X, fh).predict(X_pred)``. + If ``X_pred`` is not passed, same as + ``fit(y, fh, X).predict(X)``. + State change: Changes state to "fitted". @@ -466,32 +464,38 @@ def fit_predict(self, y, X=None, fh=None): Parameters ---------- y : time series in sktime compatible data container format - Time series to which to fit the forecaster. + Time series to which to fit the forecaster. y can be in one of the following formats: Series scitype: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D) - for vanilla forecasting, one time series + for vanilla forecasting, one time series Panel scitype: pd.DataFrame with 2-level row MultiIndex, - 3D np.ndarray, list of Series pd.DataFrame, or nested pd.DataFrame - for global or panel forecasting + 3D np.ndarray, list of Series pd.DataFrame, or nested pd.DataFrame + for global or panel forecasting Hierarchical scitype: pd.DataFrame with 3 or more level row MultiIndex - for hierarchical forecasting + for hierarchical forecasting Number of columns admissible depend on the "scitype:y" tag: - if self.get_tag("scitype:y")=="univariate": - y must have a single column/variable - if self.get_tag("scitype:y")=="multivariate": - y must have 2 or more columns - if self.get_tag("scitype:y")=="both": no restrictions on columns apply + if self.get_tag("scitype:y")=="univariate": + y must have a single column/variable + if self.get_tag("scitype:y")=="multivariate": + y must have 2 or more columns + if self.get_tag("scitype:y")=="both": no restrictions on columns apply For further details: - on usage, see forecasting tutorial examples/01_forecasting.ipynb - on specification of formats, examples/AA_datatypes_and_datasets.ipynb + on usage, see forecasting tutorial examples/01_forecasting.ipynb + on specification of formats, examples/AA_datatypes_and_datasets.ipynb fh : int, list, np.array or ForecastingHorizon (not optional) The forecasting horizon encoding the time stamps to forecast at. if has not been passed in fit, must be passed, not optional X : time series in sktime compatible format, optional (default=None) - Exogeneous time series to fit to + Exogeneous time series to fit to Should be of same scitype (Series, Panel, or Hierarchical) as y in fit - if self.get_tag("X-y-must-have-same-index"), - X.index must contain fh.index and y.index both + If ``self.get_tag("X-y-must-have-same-index")`` is True, + X.index must contain y.index. + If, in addition, X_pred is not passed, X must also contain fh.index. + X_pred : time series in sktime compatible format, optional (default=None) + Exogeneous time series to use in predict + If passed, will be used in predict instead of X. + If ``self.get_tag("X-y-must-have-same-index")`` is True, + X_pred.index must contain fh.index. Returns ------- @@ -500,6 +504,12 @@ def fit_predict(self, y, X=None, fh=None): y_pred has same type as the y that has been passed most recently: Series, Panel, Hierarchical scitype, same format (see above) """ + # if X_pred is passed, run fit/predict with different X + if X_pred is not None: + return self.fit(y=y, X=X, fh=fh).predict(X=X_pred) + # otherwise, we use the same X for fit and predict + # below code carries out conversion and checks for X only once + # if fit is called, fitted state is re-set self._is_fitted = False @@ -1091,7 +1101,7 @@ def update_predict_single( # convert to output mtype, identical with last y mtype seen y_pred = convert_to( y_pred, - self._y_mtype_last_seen, + self._y_metadata["mtype"], store=self._converter_store_y, store_behaviour="freeze", ) @@ -1148,7 +1158,7 @@ def predict_residuals(self, y=None, X=None): fh_orig = None # if no y is passed, the so far observed y is used - if y is None: + if y is None and self.get_config()["remember_data"]: y = self._y # we want residuals, so fh must be the index of y @@ -1171,8 +1181,8 @@ def predict_residuals(self, y=None, X=None): y_pred = self.predict(fh=fh, X=X) - if not type(y_pred) is type(y): - y = convert_to(y, self._y_mtype_last_seen) + if type(y_pred) is not type(y): + y = convert_to(y, self._y_metadata["mtype"]) y_res = y - y_pred @@ -1319,7 +1329,7 @@ def _check_X_y(self, X=None, y=None): Writes to self -------------- - _y_mtype_last_seen : str, mtype of y + _y_metadata : dict with str keys, metadata from checking y _converter_store_y : dict, metadata from conversion for back-conversion """ if X is None and y is None: @@ -1334,7 +1344,11 @@ def _most_complex_scitype(scitypes, smaller_equal_than=None): elif "Series" in scitypes: return "Series" else: - raise ValueError("no series scitypes supported, bug in estimator") + raise ValueError( + f"Error in {type(self).__name__}, no series scitypes supported, " + "likely a bug in estimator: scitypes arg passed to " + f"_most_complex_scitype are {scitypes}" + ) def _check_missing(metadata, obj_name): """Check input metadata against self's missing capability tag.""" @@ -1370,34 +1384,40 @@ def _check_missing(metadata, obj_name): # checking y if y is not None: # request only required metadata from checks - y_metadata_required = [] + y_metadata_required = ["n_features", "feature_names"] if self.get_tag("scitype:y") != "both": y_metadata_required += ["is_univariate"] if not self.get_tag("handles-missing-data"): y_metadata_required += ["has_nans"] - y_valid, _, y_metadata = check_is_scitype( + y_valid, y_msg, y_metadata = check_is_scitype( y, scitype=ALLOWED_SCITYPES, return_metadata=y_metadata_required, var_name="y", ) - msg = ( - "y must be in an sktime compatible format, " - f"of scitype {', '.join(ALLOWED_SCITYPES)}, " + + msg_start = ( + f"Unsupported input data type in {self.__class__.__name__}, input y" + ) + allowed_msg = ( + "Allowed scitypes for y in forecasting are " + f"{', '.join(ALLOWED_SCITYPES)}, " "for instance a pandas.DataFrame with sktime compatible time indices, " "or with MultiIndex and last(-1) level an sktime compatible time index." " See the forecasting tutorial examples/01_forecasting.ipynb, or" - " the data format tutorial examples/AA_datatypes_and_datasets.ipynb," - "If you think y is already in an sktime supported input format, " - "run sktime.datatypes.check_raise(y, mtype) to diagnose the error, " - "where mtype is the string of the type specification you want for y. " - "Possible mtype specification strings are as follows: " + " the data format tutorial examples/AA_datatypes_and_datasets.ipynb" ) if not y_valid: - raise TypeError(msg + ", ".join(mtypes_messages)) + check_is_error_msg( + y_msg, + var_name=msg_start, + allowed_msg=allowed_msg, + raise_exception=True, + ) y_scitype = y_metadata["scitype"] + self._y_metadata = y_metadata self._y_mtype_last_seen = y_metadata["mtype"] req_vec_because_rows = y_scitype not in y_inner_scitype @@ -1412,7 +1432,9 @@ def _check_missing(metadata, obj_name): and y_metadata["is_univariate"] ): raise ValueError( - "y must have two or more variables, but found only one" + f"Unsupported input data type in {type(self).__name__}, " + "this forecaster accepts only strictly multivariate data. " + "y must have two or more variables, but found only one." ) _check_missing(y_metadata, "y") @@ -1430,27 +1452,31 @@ def _check_missing(metadata, obj_name): if not self.get_tag("handles-missing-data"): X_metadata_required += ["has_nans"] - X_valid, _, X_metadata = check_is_scitype( + X_valid, X_msg, X_metadata = check_is_scitype( X, scitype=ALLOWED_SCITYPES, return_metadata=X_metadata_required, var_name="X", ) - msg = ( - "X must be either None, or in an sktime compatible format, " - "of scitype Series, Panel or Hierarchical, " + msg_start = ( + f"Unsupported input data type in {self.__class__.__name__}, input X" + ) + allowed_msg = ( + "Allowed scitypes for X in forecasting are None, " + f"{', '.join(ALLOWED_SCITYPES)}, " "for instance a pandas.DataFrame with sktime compatible time indices, " "or with MultiIndex and last(-1) level an sktime compatible time index." " See the forecasting tutorial examples/01_forecasting.ipynb, or" " the data format tutorial examples/AA_datatypes_and_datasets.ipynb" - "If you think X is already in an sktime supported input format, " - "run sktime.datatypes.check_raise(X, mtype) to diagnose the error, " - "where mtype is the string of the type specification you want for X. " - "Possible mtype specification strings are as follows. " ) if not X_valid: - raise TypeError(msg + ", ".join(mtypes_messages)) + check_is_error_msg( + X_msg, + var_name=msg_start, + allowed_msg=allowed_msg, + raise_exception=True, + ) X_scitype = X_metadata["scitype"] X_requires_vectorization = X_scitype not in X_inner_scitype @@ -1533,10 +1559,9 @@ def _check_X(self, X=None): return self._check_X_y(X=X)[0] def _update_X(self, X, enforce_index_type=None): - if X is not None: + if X is not None and self.get_config()["remember_data"]: X = check_X(X, enforce_index_type=enforce_index_type) - if X is len(X) > 0: - self._X = X.combine_first(self._X) + self._X = update_data(self._X, X) def _update_y_X(self, y, X=None, enforce_index_type=None): """Update internal memory of seen training data. @@ -1565,7 +1590,7 @@ def _update_y_X(self, y, X=None, enforce_index_type=None): X : pd.DataFrame or 2D np.ndarray, optional (default=None) Exogeneous time series """ - if y is not None: + if y is not None and self.get_config()["remember_data"]: # unwrap y if VectorizedDF if isinstance(y, VectorizedDF): y = y.X_multiindex @@ -1578,7 +1603,7 @@ def _update_y_X(self, y, X=None, enforce_index_type=None): # set cutoff to the end of the observation horizon self._set_cutoff_from_y(y) - if X is not None: + if X is not None and self.get_config()["remember_data"]: # unwrap X if VectorizedDF if isinstance(X, VectorizedDF): X = X.X_multiindex @@ -1914,7 +1939,7 @@ def _update(self, y, X=None, update_params=True): ------- self : reference to self """ - if update_params: + if update_params and self.get_config()["remember_data"]: # default to re-fitting if update is not implemented warn( f"NotImplementedWarning: {self.__class__.__name__} " @@ -1928,12 +1953,14 @@ def _update(self, y, X=None, update_params=True): # we need to overwrite the mtype last seen and converter store, since the _y # may have been converted mtype_last_seen = self._y_mtype_last_seen + y_metadata = self._y_metadata _converter_store_y = self._converter_store_y # refit with updated data, not only passed data self.fit(y=self._y, X=self._X, fh=self._fh) # todo: should probably be self._fit, not self.fit # but looping to self.fit for now to avoid interface break self._y_mtype_last_seen = mtype_last_seen + self._y_metadata = y_metadata self._converter_store_y = _converter_store_y # if update_params=False, and there are no components, do nothing @@ -2026,14 +2053,9 @@ def _predict_interval(self, fh, X, coverage): # change the column labels (multiindex) to the format for intervals # idx returned by _predict_quantiles is # 2-level MultiIndex with variable names, alpha - idx = pred_int.columns - # variable names (unique, in same order) - var_names = idx.get_level_values(0).unique() - # idx returned by _predict_interval should be # 3-level MultiIndex with variable names, coverage, lower/upper - int_idx = pd.MultiIndex.from_product([var_names, coverage, ["lower", "upper"]]) - + int_idx = self._get_columns(method="predict_interval", coverage=coverage) pred_int.columns = int_idx return pred_int @@ -2099,14 +2121,9 @@ def _predict_quantiles(self, fh, X, alpha): # change the column labels (multiindex) to the format for intervals # idx returned by _predict_interval is # 3-level MultiIndex with variable names, coverage, lower/upper - idx = pred_int.columns - # variable names (unique, in same order) - var_names = idx.get_level_values(0).unique() - # idx returned by _predict_quantiles should be # is 2-level MultiIndex with variable names, alpha - int_idx = pd.MultiIndex.from_product([var_names, alpha]) - + int_idx = self._get_columns(method="predict_quantiles", alpha=alpha) pred_int.columns = int_idx elif implements_proba: @@ -2176,8 +2193,9 @@ def _predict_var(self, fh=None, X=None, cov=False): if fh.is_relative: fh = fh.to_absolute(self.cutoff) pred_var.index = fh.to_pandas() - if isinstance(self._y, pd.DataFrame): - pred_var.columns = self._y.columns + + if isinstance(pred_var, pd.DataFrame): + pred_var.columns = self._get_columns(method="predict_var") return pred_var @@ -2345,29 +2363,78 @@ def _predict_moving_cutoff( for i in range(len(y_preds)): y_preds[i] = convert_to( y_preds[i], - self._y_mtype_last_seen, + self._y_metadata["mtype"], store=self._converter_store_y, store_behaviour="freeze", ) return _format_moving_cutoff_predictions(y_preds, cutoffs) - def _get_varnames(self): + def _get_varnames(self, y=None): """Return variable column for DataFrame-like returns. - Developer note: currently a helper for predict_interval, predict_quantiles, - valid only in the univariate case. Can be extended later. + Primarily used as helper for probabilistic predict-like methods. + Assumes that _check_X_y has been called, and self._y_metadata set. + + Parameter + --------- + y : ignored, present for downwards compatibility + + Returns + ------- + varnames : iterable of integer or str variable names + can be list or pd.Index + variable names for DataFrame-like returns + identical to self._y_varnames if this attribute exists + """ + featnames = self._y_metadata["feature_names"] + return featnames + + def _get_columns(self, method="predict", **kwargs): + """Return column names for DataFrame-like returns. + + Primarily used as helper for probabilistic predict-like methods. + Assumes that _check_X_y has been called, and self._y_metadata set. + + Parameter + --------- + method : str, optional (default="predict") + method for which to return column names + one of "predict", "predict_interval", "predict_quantiles", "predict_var" + kwargs : dict + additional keyword arguments passed to private method + important: args to private method, e.g., _predict, _predict_interval + + Returns + ------- + columns : pd.Index + column names """ - y = self._y - if isinstance(y, pd.Series): - var_name = self._y.name - elif isinstance(y, pd.DataFrame): - return y.columns + featnames = self._get_varnames() + + if method in ["predict", "predict_var"]: + return featnames else: - var_name = 0 - if var_name is None: - var_name = 0 + assert method in ["predict_interval", "predict_quantiles"] + + if method == "predict_interval": + coverage = kwargs.get("coverage", None) + if coverage is None: + raise ValueError( + "coverage must be passed to _get_columns for predict_interval" + ) + return pd.MultiIndex.from_product([featnames, coverage, ["lower", "upper"]]) + + if method == "predict_quantiles": + alpha = kwargs.get("alpha", None) + if alpha is None: + raise ValueError( + "alpha must be passed to _get_columns for predict_quantiles" + ) + return pd.MultiIndex.from_product([featnames, alpha]) + - return [var_name] +# initialize dynamic docstrings +BaseForecaster._init_dynamic_doc() def _format_moving_cutoff_predictions(y_preds, cutoffs): diff --git a/sktime/forecasting/base/_delegate.py b/sktime/forecasting/base/_delegate.py index 1a9a9af3fb0..05d001aed4b 100644 --- a/sktime/forecasting/base/_delegate.py +++ b/sktime/forecasting/base/_delegate.py @@ -38,6 +38,57 @@ class _DelegatedForecaster(BaseForecaster): def _get_delegate(self): return getattr(self, self._delegate_name) + def _set_delegated_tags(self, delegate=None): + """Set delegated tags, only tags for boilerplate control. + + Writes tags to self. + Can be used by descendant classes to set dependent tags. + Makes safe baseline assumptions about tags, which can be overwritten. + + * data mtype tags are set to the most general value. + This is to ensure that conversion is left to the inner estimator. + * packaging tags such as "author" or "python_depedencies" are not cloned. + * other boilerplate tags are cloned. + + Parameters + ---------- + delegate : object, optional (default=None) + object to get tags from, if None, uses self._get_delegate() + + Returns + ------- + self : reference to self + """ + from sktime.datatypes import ALL_TIME_SERIES_MTYPES + + if delegate is None: + delegate = self._get_delegate() + + TAGS_TO_DELEGATE = [ + "requires-fh-in-fit", + "handles-missing-data", + "ignores-exogeneous-X", + "capability:insample", + "capability:pred_int", + "capability:pred_int:insample", + "handles-missing-data", + "requires-fh-in-fit", + "X-y-must-have-same-index", + "enforce_index_type", + "fit_is_empty", + ] + + TAGS_TO_SET = { + "scitype:y": "both", + "y_inner_mtype": ALL_TIME_SERIES_MTYPES, + "X_inner_mtype": ALL_TIME_SERIES_MTYPES, + } + + self.clone_tags(delegate, tag_names=TAGS_TO_DELEGATE) + self.set_tags(**TAGS_TO_SET) + + return self + def _fit(self, y, X, fh): """Fit forecaster to training data. diff --git a/sktime/forecasting/base/_fh.py b/sktime/forecasting/base/_fh.py index 4754cf6d548..d06286d0286 100644 --- a/sktime/forecasting/base/_fh.py +++ b/sktime/forecasting/base/_fh.py @@ -674,6 +674,87 @@ def to_indexer(self, cutoff=None, from_cutoff=True): relative = self.to_relative(cutoff) return relative - relative.to_pandas()[0] + def get_expected_pred_idx(self, y=None, cutoff=None, sort_by_time=False): + """Construct DataFrame Index expected in y_pred, return of _predict. + + Parameters + ---------- + y : pd.DataFrame, pd.Series, pd.Index, or None + data to compute fh relative to, + assumed in sktime pandas based mtype or index thereof + if None, assumes no MultiIndex + cutoff : pd.Period, pd.Timestamp, int, or pd.Index, optional (default=None) + Cutoff value to use in computing resulting index. + If cutoff is not provided, is computed from ``y`` via ``get_cutoff``. + sort_by_time : bool, optional (default=False) + for MultiIndex returns, whether to sort by time index (level -1) + - If True, result Index is sorted by time index (level -1) + - If False, result Index is sorted overall + + Returns + ------- + fh_idx : pd.Index, expected index of y_pred returned by predict + assumes pandas based return mtype + """ + from sktime.datatypes import get_cutoff + + def _make_y_pred(y_single): + """Make y_pred from single instance y, used in list comprehension.""" + cutoff = get_cutoff(y_single) + return pd.Index(self.to_absolute_index(cutoff)) + + if hasattr(y, "index"): + y_index = y.index + elif isinstance(y, pd.Index): + y_index = y + y = pd.DataFrame(index=y_index) + elif cutoff is None: + y_index = pd.Index(y) + y = pd.DataFrame(index=y_index) + else: + y_index = None + + if cutoff is None and not isinstance(y_index, pd.MultiIndex): + _cutoff = get_cutoff(y) + else: + _cutoff = cutoff + + if cutoff is not None or not isinstance(y_index, pd.MultiIndex): + fh_idx = pd.Index(self.to_absolute_index(_cutoff)) + + if cutoff is not None and isinstance(y_index, pd.MultiIndex): + y_inst_idx = y_index.droplevel(-1).unique().sort_values() + if isinstance(y_inst_idx, pd.MultiIndex) and sort_by_time: + fh_list = [x + (y,) for y in fh_idx for x in y_inst_idx] + elif isinstance(y_inst_idx, pd.MultiIndex) and not sort_by_time: + fh_list = [x + (y,) for x in y_inst_idx for y in fh_idx] + elif sort_by_time: # and not isinstance(y_inst_idx, pd.MultiIndex): + fh_list = [(x, y) for y in fh_idx for x in y_inst_idx] + else: # not sort_by_time and not isinstance(y_inst_idx, pd.MultiIndex): + fh_list = [(x, y) for x in y_inst_idx for y in fh_idx] + + fh_idx = pd.Index(fh_list) + + elif isinstance(y_index, pd.MultiIndex): + y_inst_idx = y_index.droplevel(-1).unique() + + if isinstance(y_inst_idx, pd.MultiIndex): + fh_list = [x + (z,) for x in y_inst_idx for z in _make_y_pred(y.loc[x])] + else: + fh_list = [(x, z) for x in y_inst_idx for z in _make_y_pred(y.loc[x])] + + fh_idx = pd.Index(fh_list) + + if sort_by_time: + fh_df = pd.DataFrame(index=fh_idx) + fh_idx = fh_df.sort_index(level=-1).index + + # replicating index names + if hasattr(y_index, "names") and y_index.names is not None: + fh_idx.names = y_index.names + + return fh_idx + def __repr__(self): """Generate repr based on wrapped index repr.""" class_name = self.__class__.__name__ @@ -728,7 +809,7 @@ def _to_relative(fh: ForecastingHorizon, cutoff=None) -> ForecastingHorizon: absolute = _coerce_to_period(absolute, freq=fh.freq) cutoff = _coerce_to_period(cutoff, freq=fh.freq) - # TODO: 0.25.0: + # TODO: 0.27.0: # Check at every minor release whether lower pandas bound >=0.15.0 # if yes, can remove the workaround in the "else" condition and the check # @@ -792,6 +873,12 @@ def _to_absolute(fh: ForecastingHorizon, cutoff) -> ForecastingHorizon: _check_cutoff(cutoff, relative) is_timestamp = isinstance(cutoff, pd.DatetimeIndex) + # remember timezone to restore it later + if hasattr(cutoff, "tz"): + old_tz = cutoff.tz + else: + old_tz = None + if is_timestamp: # coerce to pd.Period for reliable arithmetic operations and # computations of time deltas @@ -806,6 +893,9 @@ def _to_absolute(fh: ForecastingHorizon, cutoff) -> ForecastingHorizon: # coerce back to DatetimeIndex after operation absolute = absolute.to_timestamp(fh.freq) + if old_tz is not None: + absolute = absolute.tz_localize(old_tz) + return fh._new(absolute, is_relative=False, freq=fh.freq) diff --git a/sktime/forecasting/base/_sktime.py b/sktime/forecasting/base/_sktime.py index 681f7b80808..47755bd3d28 100644 --- a/sktime/forecasting/base/_sktime.py +++ b/sktime/forecasting/base/_sktime.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from sktime.forecasting.base._base import DEFAULT_ALPHA, BaseForecaster +from sktime.forecasting.base._base import BaseForecaster from sktime.split import CutoffSplitter from sktime.utils.datetime import _shift @@ -21,41 +21,50 @@ def __init__(self, window_length=None): self.window_length = window_length self.window_length_ = None - def _predict(self, fh, X): - """Predict core logic.""" - kwargs = {"X": X} + def _predict_boilerplate(self, fh, **kwargs): + """Dispatcher to in-sample and out-of-sample logic. + + In-sample logic is implemented in _predict_in_sample. + Out-of-sample logic is implemented in _predict_fixed_cutoff. + """ + cutoff = self._cutoff # all values are out-of-sample - if fh.is_all_out_of_sample(self.cutoff): - y_pred = self._predict_fixed_cutoff( - fh.to_out_of_sample(self.cutoff), **kwargs - ) + if fh.is_all_out_of_sample(cutoff): + y_pred = self._predict_fixed_cutoff(fh.to_out_of_sample(cutoff), **kwargs) # all values are in-sample elif fh.is_all_in_sample(self.cutoff): - y_pred = self._predict_in_sample(fh.to_in_sample(self.cutoff), **kwargs) + y_pred = self._predict_in_sample(fh.to_in_sample(cutoff), **kwargs) # both in-sample and out-of-sample values else: - y_ins = self._predict_in_sample(fh.to_in_sample(self.cutoff), **kwargs) - y_oos = self._predict_fixed_cutoff( - fh.to_out_of_sample(self.cutoff), **kwargs - ) + y_ins = self._predict_in_sample(fh.to_in_sample(cutoff), **kwargs) + y_oos = self._predict_fixed_cutoff(fh.to_out_of_sample(cutoff), **kwargs) if isinstance(y_ins, pd.DataFrame) and isinstance(y_oos, pd.Series): y_oos = y_oos.to_frame(y_ins.columns[0]) y_pred = pd.concat([y_ins, y_oos]) + return y_pred + + def _predict(self, fh, X): + """Predict core logic.""" + kwargs = {"X": X} + + y_pred = self._predict_boilerplate(fh, **kwargs) + # ensure pd.Series name attribute is preserved if isinstance(y_pred, pd.Series) and isinstance(self._y, pd.Series): y_pred.name = self._y.name + if isinstance(y_pred, pd.DataFrame) and isinstance(self._y, pd.Series): + y_pred = y_pred.iloc[:, 0] + y_pred.name = self._y.name return y_pred - def _predict_fixed_cutoff( - self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA - ): + def _predict_fixed_cutoff(self, fh, X=None, **kwargs): """Make single-step or multi-step fixed cutoff predictions. Parameters @@ -63,26 +72,20 @@ def _predict_fixed_cutoff( fh : np.array all positive (> 0) X : pd.DataFrame - return_pred_int : bool - alpha : float or array-like Returns ------- y_pred = pd.Series or pd.DataFrame """ # assert all(fh > 0) - y_pred = self._predict_last_window( - fh, X, return_pred_int=return_pred_int, alpha=alpha - ) + y_pred = self._predict_last_window(fh, X=X, **kwargs) if isinstance(y_pred, pd.Series) or isinstance(y_pred, pd.DataFrame): return y_pred else: index = fh.to_absolute_index(self.cutoff) return pd.Series(y_pred, index=index) - def _predict_in_sample( - self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA - ): + def _predict_in_sample(self, fh, X=None, **kwargs): """Make in-sample prediction using single-step moving-cutoff predictions. Parameters @@ -90,16 +93,11 @@ def _predict_in_sample( fh : np.array all non-positive (<= 0) X : pd.DataFrame - return_pred_int : bool - alpha : float or array-like Returns ------- y_pred : pd.DataFrame or pd.Series """ - if return_pred_int: - raise NotImplementedError() - y_train = self._y # generate cutoffs from forecasting horizon, note that cutoffs are @@ -108,17 +106,13 @@ def _predict_in_sample( cv = CutoffSplitter(cutoffs, fh=1, window_length=self.window_length_) return self._predict_moving_cutoff(y_train, cv, X, update_params=False) - def _predict_last_window( - self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA - ): + def _predict_last_window(self, fh, X=None, **kwargs): """Predict core logic. Parameters ---------- fh : np.array X : pd.DataFrame - return_pred_int : bool - alpha : float or list of floats Returns ------- @@ -141,7 +135,32 @@ def _get_last_window(self): return y, X - @staticmethod - def _predict_nan(fh): - """Predict nan if predictions are not possible.""" - return np.full(len(fh), np.nan) + def _predict_nan(self, fh=None, method="predict", **kwargs): + """Create a return DataFrame for predict-like method, with all np.nan entries. + + Parameters + ---------- + fh : ForecastingHorizon of self, optional (default=None) + retrieved from self.fh if None + method : str, optional (default="predict") + method name to generate return DataFrame for + name of one of the BaseForecaster predict-like methods + **kwargs : optional + further kwargs to predict-like methods, e.g., coverage for predict_interval + passed to self._get_columns + + Returns + ------- + y_pred : pd.DataFrame + return DataFrame + index, columns are as expected + all entries are np.nan + """ + if fh is None: + fh = self.fh + + index = fh.get_expected_pred_idx(y=self._y, cutoff=self.cutoff) + columns = self._get_columns(method=method, **kwargs) + + y_pred = pd.DataFrame(np.nan, index=index, columns=columns) + return y_pred diff --git a/sktime/forecasting/base/adapters/_fbprophet.py b/sktime/forecasting/base/adapters/_fbprophet.py index acfd91f4707..a27bc2cd3f3 100644 --- a/sktime/forecasting/base/adapters/_fbprophet.py +++ b/sktime/forecasting/base/adapters/_fbprophet.py @@ -16,6 +16,7 @@ class _ProphetAdapter(BaseForecaster): """Base class for interfacing prophet and neuralprophet.""" _tags = { + "authors": ["mloning", "aiwalter", "fkiraly"], "ignores-exogeneous-X": False, "capability:pred_int": True, "capability:pred_int:insample": True, @@ -116,11 +117,15 @@ def _fit(self, y, X, fh): df["cap"] = self.growth_cap df["floor"] = self.growth_floor + if hasattr(self, "fit_kwargs") and isinstance(self.fit_kwargs, dict): + fit_kwargs = self.fit_kwargs + else: + fit_kwargs = {} if self.verbose: - self._forecaster.fit(df=df) + self._forecaster.fit(df=df, **fit_kwargs) else: with _suppress_stdout_stderr(): - self._forecaster.fit(df=df) + self._forecaster.fit(df=df, **fit_kwargs) return self diff --git a/sktime/forecasting/base/adapters/_generalised_statsforecast.py b/sktime/forecasting/base/adapters/_generalised_statsforecast.py index 3055b7a0449..21477e3cbea 100644 --- a/sktime/forecasting/base/adapters/_generalised_statsforecast.py +++ b/sktime/forecasting/base/adapters/_generalised_statsforecast.py @@ -16,6 +16,14 @@ class _GeneralisedStatsForecastAdapter(BaseForecaster): """Base adapter class for StatsForecast models.""" _tags = { + # packaging info + # -------------- + "authors": ["yarnabrina"], + "maintainers": ["yarnabrina"], + "python_version": ">=3.8", + "python_dependencies": ["statsforecast"], + # estimator type + # -------------- "y_inner_mtype": "pd.Series", "X_inner_mtype": "pd.DataFrame", "scitype:y": "univariate", @@ -23,8 +31,6 @@ class _GeneralisedStatsForecastAdapter(BaseForecaster): # "X-y-must-have-same-index": True, # TODO: need to check (how?) # "enforce_index_type": None, # TODO: need to check (how?) "handles-missing-data": False, - "python_version": ">=3.8", - "python_dependencies": ["statsforecast"], } def __init__(self): @@ -504,7 +510,7 @@ def predict(self, h, X=None, level=None): self.prediction_intervals = ConformalIntervals(h=h) level = sorted(level) - coverage = [round(1 - (_l / 100), 2) for _l in level] + coverage = [round(_l / 100, 2) for _l in level] pred_int = self.estimator.predict_interval( fh=range(1, h + 1), X=X, coverage=coverage @@ -532,7 +538,7 @@ def predict_in_sample(self, level=None): return {"fitted": fitted} level = sorted(level) - coverage = [round(1 - (_l / 100), 2) for _l in level] + coverage = [round(_l / 100, 2) for _l in level] pred_int = self.estimator.predict_interval( fh=self.estimator._y.index, X=self.estimator._X, coverage=coverage ) diff --git a/sktime/forecasting/base/adapters/_pmdarima.py b/sktime/forecasting/base/adapters/_pmdarima.py index 4ff3d162b2b..0733b1fbfd4 100644 --- a/sktime/forecasting/base/adapters/_pmdarima.py +++ b/sktime/forecasting/base/adapters/_pmdarima.py @@ -16,6 +16,8 @@ class _PmdArimaAdapter(BaseForecaster): """Base class for interfacing pmdarima.""" _tags = { + "authors": ["mloning", "hyang1996", "kejsitake", "fkiraly"], + "maintainers": "hyang1996", "ignores-exogeneous-X": False, "capability:pred_int": True, "capability:pred_int:insample": True, diff --git a/sktime/forecasting/base/adapters/_pytorch.py b/sktime/forecasting/base/adapters/_pytorch.py new file mode 100644 index 00000000000..76b1f7799f7 --- /dev/null +++ b/sktime/forecasting/base/adapters/_pytorch.py @@ -0,0 +1,245 @@ +from abc import ABC + +import numpy as np +import pandas as pd + +from sktime.forecasting.base import BaseForecaster +from sktime.utils.validation._dependencies import _check_soft_dependencies + +if _check_soft_dependencies("torch", severity="none"): + import torch + + +class BaseDeepNetworkPyTorch(BaseForecaster, ABC): + """Abstract base class for deep learning networks using torch.nn.""" + + _tags = { + "python_dependencies": "torch", + "y_inner_mtype": "pd.DataFrame", + "capability:insample": False, + "capability:pred_int:insample": False, + "scitype:y": "both", + "ignores-exogeneous-X": True, + } + + def __init__( + self, + num_epochs=16, + batch_size=8, + in_channels=1, + individual=False, + criterion_kwargs=None, + optimizer=None, + optimizer_kwargs=None, + lr=0.001, + ): + self.num_epochs = num_epochs + self.batch_size = batch_size + self.in_channels = in_channels + self.individual = individual + self.criterion_kwargs = criterion_kwargs + self.optimizer = optimizer + self.optimizer_kwargs = optimizer_kwargs + self.lr = lr + + super().__init__() + + def _fit(self, y, fh, X=None): + """Fit the network. + + Changes to state: + writes to self._network.state_dict + + Parameters + ---------- + X : iterable-style or map-style dataset + see (https://pytorch.org/docs/stable/data.html) for more information + """ + from sktime.forecasting.base import ForecastingHorizon + + fh = fh.to_relative(self.cutoff) + + if type(fh) is ForecastingHorizon: + self.network = self._build_network(fh._values[-1]) + else: + self.network = self._build_network(fh) + + if self.criterion: + if self.criterion in self.criterions.keys(): + if self.criterion_kwargs: + self._criterion = self.criterions[self.criterion]( + **self.criterion_kwargs + ) + else: + self._criterion = self.criterions[self.criterion]() + else: + raise TypeError( + f"Please pass one of {self.criterions.keys()} for `criterion`." + ) + else: + # default criterion + self._criterion = torch.nn.MSELoss() + + if self.optimizer: + if self.optimizer in self.optimizers.keys(): + if self.optimizer_kwargs: + self._optimizer = self.optimizers[self.optimizer]( + self.network.parameters(), lr=self.lr, **self.optimizer_kwargs + ) + else: + self._optimizer = self.optimizers[self.optimizer]( + self.network.parameters(), lr=self.lr + ) + else: + raise TypeError( + f"Please pass one of {self.optimizers.keys()} for `optimizer`." + ) + else: + # default optimizer + self._optimizer = torch.optim.Adam(self.network.parameters(), lr=self.lr) + + dataloader = self.build_pytorch_train_dataloader(y) + self.network.train() + + for _ in range(self.num_epochs): + for x, y in dataloader: + y_pred = self.network(x) + loss = self._criterion(y_pred, y) + self._optimizer.zero_grad() + loss.backward() + self._optimizer.step() + + def _predict(self, X=None, fh=None): + """Predict with fitted model.""" + from torch import cat + + if fh is None: + fh = self._fh + fh = fh.to_relative(self.cutoff) + + if max(fh._values) > self.network.pred_len or min(fh._values) < 0: + raise ValueError( + f"fh of {fh} passed to {self.__class__.__name__} is not " + "within `pred_len`. Please use a fh that aligns with the `pred_len` of " + "the forecaster." + ) + + if X is None: + dataloader = self.build_pytorch_pred_dataloader(self._y, fh) + else: + dataloader = self.build_pytorch_pred_dataloader(X, fh) + + y_pred = [] + for x, _ in dataloader: + y_pred.append(self.network(x).detach()) + y_pred = cat(y_pred, dim=0).view(-1, y_pred[0].shape[-1]).numpy() + y_pred = y_pred[fh._values.values - 1] + y_pred = pd.DataFrame( + y_pred, columns=self._y.columns, index=fh.to_absolute_index(self.cutoff) + ) + + return y_pred + + def build_pytorch_train_dataloader(self, y): + """Build PyTorch DataLoader for training.""" + from torch.utils.data import DataLoader + + if self.custom_dataset_train: + if hasattr(self.custom_dataset_train, "build_dataset") and callable( + self.custom_dataset_train.build_dataset + ): + self.custom_dataset_train.build_dataset(y) + dataset = self.custom_dataset_train + else: + raise NotImplementedError( + "Custom Dataset `build_dataset` method is not available. Please " + f"refer to the {self.__class__.__name__}.build_dataset " + "documentation." + ) + else: + dataset = PyTorchTrainDataset( + y=y, + seq_len=self.network.seq_len, + fh=self._fh.to_relative(self.cutoff)._values[-1], + ) + + return DataLoader( + dataset, + self.batch_size, + ) + + def build_pytorch_pred_dataloader(self, y, fh): + """Build PyTorch DataLoader for prediction.""" + from torch.utils.data import DataLoader + + if self.custom_dataset_pred: + if hasattr(self.custom_dataset_pred, "build_dataset") and callable( + self.custom_dataset_pred.build_dataset + ): + self.custom_dataset_train.build_dataset(y) + dataset = self.custom_dataset_train + else: + raise NotImplementedError( + "Custom Dataset `build_dataset` method is not available. Please" + f"refer to the {self.__class__.__name__}.build_dataset" + "documentation." + ) + else: + dataset = PyTorchPredDataset( + y=y[-self.network.seq_len :], + seq_len=self.network.seq_len, + ) + + return DataLoader( + dataset, + self.batch_size, + ) + + def get_y_true(self, y): + """Get y_true values for validation.""" + dataloader = self.build_pytorch_pred_dataloader(y) + y_true = [y.flatten().numpy() for _, y in dataloader] + return np.concatenate(y_true, axis=0) + + +class PyTorchTrainDataset: + """Dataset for use in sktime deep learning forecasters.""" + + def __init__(self, y, seq_len, fh): + self.y = y.values + self.seq_len = seq_len + self.fh = fh + + def __len__(self): + """Return length of dataset.""" + return len(self.y) - self.seq_len - self.fh + 1 + + def __getitem__(self, i): + """Return data point.""" + from torch import from_numpy, tensor + + return ( + tensor(self.y[i : i + self.seq_len]).float(), + from_numpy(self.y[i + self.seq_len : i + self.seq_len + self.fh]).float(), + ) + + +class PyTorchPredDataset: + """Dataset for use in sktime deep learning forecasters.""" + + def __init__(self, y, seq_len): + self.y = y.values + self.seq_len = seq_len + + def __len__(self): + """Return length of dataset.""" + return 1 + + def __getitem__(self, i): + """Return data point.""" + from torch import from_numpy, tensor + + return ( + tensor(self.y[i : i + self.seq_len]).float(), + from_numpy(self.y[i + self.seq_len : i + self.seq_len]).float(), + ) diff --git a/sktime/forecasting/base/adapters/_statsforecast.py b/sktime/forecasting/base/adapters/_statsforecast.py index 3da1f4e33ac..1c1a87f5e8d 100644 --- a/sktime/forecasting/base/adapters/_statsforecast.py +++ b/sktime/forecasting/base/adapters/_statsforecast.py @@ -15,6 +15,8 @@ class _StatsForecastAdapter(BaseForecaster): """Base class for interfacing StatsForecast.""" _tags = { + "authors": ["FedericoGarza"], + "maintainers": ["FedericoGarza"], "scitype:y": "univariate", # which y are fine? univariate/multivariate/both "ignores-exogeneous-X": False, # does estimator ignore the exogeneous X? "handles-missing-data": False, # can estimator handle missing data? diff --git a/sktime/forecasting/base/adapters/_statsmodels.py b/sktime/forecasting/base/adapters/_statsmodels.py index 0e3d91ecbc6..1f00fe1066f 100644 --- a/sktime/forecasting/base/adapters/_statsmodels.py +++ b/sktime/forecasting/base/adapters/_statsmodels.py @@ -2,7 +2,7 @@ # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) """Implements adapter for statsmodels forecasters to be used in sktime framework.""" -__author__ = ["mloning"] +__author__ = ["mloning", "ciaran-g"] __all__ = ["_StatsModelsAdapter"] import inspect @@ -19,10 +19,16 @@ class _StatsModelsAdapter(BaseForecaster): _fitted_param_names = () _tags = { + # packaging info + # -------------- + "authors": ["mloning", "ciaran-g"], + "maintainers": ["ciaran-g"], + "python_dependencies": "statsmodels", + # estimator type + # -------------- "ignores-exogeneous-X": True, "requires-fh-in-fit": False, "handles-missing-data": False, - "python_dependencies": "statsmodels", } def __init__(self, random_state=None): @@ -99,7 +105,7 @@ def _predict(self, fh, X): # statsmodels requires zero-based indexing starting at the # beginning of the training series when passing integers start, end = fh.to_absolute_int(self._y.index[0], self.cutoff)[[0, -1]] - fh_abs = fh.to_absolute_index(self.cutoff) + fh_int = fh.to_absolute_int(self._y.index[0], self.cutoff) - len(self._y) # bug fix for evaluate function as test_plus_train indices are passed # statsmodels exog must contain test indices only. @@ -108,7 +114,7 @@ def _predict(self, fh, X): ind_drop = self._X.index X = X.loc[~X.index.isin(ind_drop)] # Entire range of the forecast horizon is required - X = X[: fh_abs[-1]] + X = X.iloc[: (fh_int[-1] + 1)] # include end point if "exog" in inspect.signature(self._forecaster.__init__).parameters.keys(): y_pred = self._fitted_forecaster.predict(start=start, end=end, exog=X) @@ -117,7 +123,9 @@ def _predict(self, fh, X): # statsmodels forecasts all periods from start to end of forecasting # horizon, but only return given time points in forecasting horizon - y_pred = y_pred.loc[fh_abs] + # if fh[0] > 1 steps ahead of cutoff then make relative to `start` + fh_int = fh_int - fh_int[0] + y_pred = y_pred.iloc[fh_int] # ensure that name is not added nor removed # otherwise this may upset conversion to pd.DataFrame y_pred.name = self._y.name @@ -186,7 +194,9 @@ def _predict_interval(self, fh, X, coverage): return BaseForecaster._predict_interval(self, fh, X=X, coverage=coverage) start, end = fh.to_absolute_int(self._y.index[0], self.cutoff)[[0, -1]] - valid_indices = fh.to_absolute(self.cutoff).to_pandas() + fh_int = fh.to_absolute_int(self._y.index[0], self.cutoff) - len(self._y) + # if fh > 1 steps ahead of cutoff + fh_int = fh_int - fh_int[0] get_prediction_arguments = {"start": start, "end": end} @@ -205,17 +215,15 @@ def _predict_interval(self, fh, X, coverage): var_names = self._get_varnames() var_name = var_names[0] columns = pd.MultiIndex.from_product([var_names, coverage, ["lower", "upper"]]) - pred_int = pd.DataFrame(index=valid_indices, columns=columns) + preds_index = self._extract_conf_int(prediction_results, (1 - coverage[0])) + preds_index = preds_index.iloc[fh_int].index + pred_int = pd.DataFrame(index=preds_index, columns=columns) for c in coverage: pred_statsmodels = self._extract_conf_int(prediction_results, (1 - c)) - pred_int[(var_name, c, "lower")] = pred_statsmodels.loc[ - valid_indices, "lower" - ] - pred_int[(var_name, c, "upper")] = pred_statsmodels.loc[ - valid_indices, "upper" - ] + pred_int[(var_name, c, "lower")] = pred_statsmodels.iloc[fh_int]["lower"] + pred_int[(var_name, c, "upper")] = pred_statsmodels.iloc[fh_int]["upper"] return pred_int diff --git a/sktime/forecasting/base/adapters/_tbats.py b/sktime/forecasting/base/adapters/_tbats.py index d0043faa3ea..b3dc70ffa7c 100644 --- a/sktime/forecasting/base/adapters/_tbats.py +++ b/sktime/forecasting/base/adapters/_tbats.py @@ -17,6 +17,7 @@ class _TbatsAdapter(BaseForecaster): """Base class for interfacing tbats forecasting algorithms.""" _tags = { + "authors": ["mloning", "aiwalter", "k1m190r", "fkiraly"], "ignores-exogeneous-X": True, "capability:pred_int": True, "capability:pred_int:insample": True, diff --git a/sktime/forecasting/base/tests/test_base.py b/sktime/forecasting/base/tests/test_base.py index b759bcf8a45..17a22bf027d 100644 --- a/sktime/forecasting/base/tests/test_base.py +++ b/sktime/forecasting/base/tests/test_base.py @@ -14,23 +14,28 @@ from sktime.datatypes import check_is_mtype, convert from sktime.datatypes._utilities import get_cutoff, get_window from sktime.forecasting.arima import ARIMA +from sktime.forecasting.compose import YfromX from sktime.forecasting.naive import NaiveForecaster from sktime.forecasting.theta import ThetaForecaster from sktime.forecasting.var import VAR from sktime.utils._testing.hierarchical import _make_hierarchical from sktime.utils._testing.panel import _make_panel from sktime.utils._testing.series import _make_series +from sktime.utils.parallel import _get_parallel_test_fixtures from sktime.utils.validation._dependencies import _check_estimator_deps PANEL_MTYPES = ["pd-multiindex", "nested_univ", "numpy3D"] HIER_MTYPES = ["pd_multiindex_hier"] +# list of parallelization backends to test +BACKENDS = _get_parallel_test_fixtures("config") + @pytest.mark.skipif( not _check_estimator_deps(ARIMA, severity="none"), reason="skip test if required soft dependency for ARIMA not available", ) -@pytest.mark.parametrize("backend", [None, "joblib", "loky", "threading"]) +@pytest.mark.parametrize("backend", BACKENDS) @pytest.mark.parametrize("mtype", PANEL_MTYPES) def test_vectorization_series_to_panel(mtype, backend): """Test that forecaster vectorization works for Panel data. @@ -43,9 +48,11 @@ def test_vectorization_series_to_panel(mtype, backend): y = _make_panel(n_instances=n_instances, random_state=42, return_mtype=mtype) f = ARIMA() - f.set_config(**{"backend:parallel": backend}) + f.set_config(**backend.copy()) y_pred = f.fit(y).predict([1, 2, 3]) - valid, _, metadata = check_is_mtype(y_pred, mtype, return_metadata=True) + valid, _, metadata = check_is_mtype( + y_pred, mtype, return_metadata=True, msg_return_dict="list" + ) msg = ( f"vectorization of forecasters does not work for test example " @@ -81,7 +88,7 @@ def test_vectorization_series_to_panel(mtype, backend): not _check_estimator_deps(ARIMA, severity="none"), reason="skip test if required soft dependency for ARIMA not available", ) -@pytest.mark.parametrize("backend", [None, "joblib", "loky", "threading"]) +@pytest.mark.parametrize("backend", BACKENDS) @pytest.mark.parametrize("mtype", HIER_MTYPES) def test_vectorization_series_to_hier(mtype, backend): """Test that forecaster vectorization works for Hierarchical data. @@ -96,9 +103,11 @@ def test_vectorization_series_to_hier(mtype, backend): y = convert(y, from_type="pd_multiindex_hier", to_type=mtype) f = ARIMA() - f.set_config(**{"backend:parallel": backend}) + f.set_config(**backend.copy()) y_pred = f.fit(y).predict([1, 2, 3]) - valid, _, metadata = check_is_mtype(y_pred, mtype, return_metadata=True) + valid, _, metadata = check_is_mtype( + y_pred, mtype, return_metadata=True, msg_return_dict="list" + ) msg = ( f"vectorization of forecasters does not work for test example " @@ -158,7 +167,9 @@ def test_vectorization_series_to_panel_proba(method, mtype): else: RuntimeError(f"bug in test, unreachable state, method {method} queried") - valid, _, _ = check_is_mtype(y_pred, expected_mtype, return_metadata=True) + valid, _, _ = check_is_mtype( + y_pred, expected_mtype, return_metadata=True, msg_return_dict="list" + ) msg = ( f"vectorization of forecaster method {method} does not work for test example " @@ -194,7 +205,9 @@ def test_vectorization_series_to_hier_proba(method, mtype): else: RuntimeError(f"bug in test, unreachable state, method {method} queried") - valid, _, _ = check_is_mtype(y_pred, expected_mtype, return_metadata=True) + valid, _, _ = check_is_mtype( + y_pred, expected_mtype, return_metadata=True, msg_return_dict="list" + ) msg = ( f"vectorization of forecaster method {method} does not work for test example " @@ -251,7 +264,9 @@ def test_vectorization_multivariate(mtype, exogeneous): est = ARIMA().fit(y=y_fit, X=X_fit, fh=[1, 2, 3]) y_pred = est.predict(X=X_pred) - valid, _, metadata = check_is_mtype(y_pred, mtype, return_metadata=True) + valid, _, metadata = check_is_mtype( + y_pred, mtype, return_metadata=True, msg_return_dict="list" + ) msg = ( f"vectorization of forecasters does not work for test example " @@ -441,3 +456,25 @@ def test_range_fh_in_predict(): assert isinstance(var_predictions, pd.DataFrame) assert var_predictions.shape == (10 * 2, 5) + + +def test_remember_data(): + """Test that the ``remember_data`` flag works as expected.""" + from sktime.datasets import load_airline + + y = load_airline() + X = load_airline() + f = YfromX.create_test_instance() + + # turn off remembering _X, _y by config + f.set_config(**{"remember_data": False}) + f.fit(y, X, fh=[1, 2, 3]) + + assert f._X is None + assert f._y is None + + f.set_config(**{"remember_data": True}) + f.fit(y, X, fh=[1, 2, 3]) + + assert f._X is not None + assert f._y is not None diff --git a/sktime/forecasting/base/tests/test_fh.py b/sktime/forecasting/base/tests/test_fh.py index 873313edaa9..5241634391f 100644 --- a/sktime/forecasting/base/tests/test_fh.py +++ b/sktime/forecasting/base/tests/test_fh.py @@ -319,11 +319,20 @@ def test_get_duration(n_timepoints, index_type): _make_index(n_timepoints, index_type) -FIXED_FREQUENCY_STRINGS = ["10T", "H", "D", "2D"] +FIXED_FREQUENCY_STRINGS = ["10min", "H", "D", "2D"] NON_FIXED_FREQUENCY_STRINGS = ["W-WED", "W-SUN", "W-SAT", "M"] FREQUENCY_STRINGS = [*FIXED_FREQUENCY_STRINGS, *NON_FIXED_FREQUENCY_STRINGS] +def _get_expected_freqstr(freqstr): + # special case for 10min, T is being deprecated and replaced by min + if freqstr == "10min": + fh_freqstr_expected = "10T" + else: + fh_freqstr_expected = freqstr + return fh_freqstr_expected + + @pytest.mark.parametrize("freqstr", FREQUENCY_STRINGS) def test_to_absolute_freq(freqstr): """Test conversion when anchorings included in frequency.""" @@ -332,7 +341,7 @@ def test_to_absolute_freq(freqstr): fh = ForecastingHorizon([1, 2, 3]) abs_fh = fh.to_absolute(cutoff) - assert abs_fh._values.freqstr == freqstr + assert abs_fh._values.freqstr == _get_expected_freqstr(freqstr) @pytest.mark.parametrize("freqstr", FREQUENCY_STRINGS) @@ -346,7 +355,8 @@ def test_absolute_to_absolute_with_integer_horizon(freqstr): converted_abs_fh = abs_fh.to_relative(cutoff).to_absolute(cutoff) assert_array_equal(abs_fh, converted_abs_fh) - assert converted_abs_fh._values.freqstr == freqstr + fh_freqstr = converted_abs_fh._values.freqstr + assert fh_freqstr == _get_expected_freqstr(freqstr) @pytest.mark.parametrize("freqstr", FIXED_FREQUENCY_STRINGS) @@ -363,7 +373,8 @@ def test_absolute_to_absolute_with_timedelta_horizon(freqstr): converted_abs_fh = abs_fh.to_relative(cutoff).to_absolute(cutoff) assert_array_equal(abs_fh, converted_abs_fh) - assert converted_abs_fh._values.freqstr == freqstr + + assert converted_abs_fh._values.freqstr == _get_expected_freqstr(freqstr) @pytest.mark.parametrize("freqstr", FREQUENCY_STRINGS) @@ -728,3 +739,60 @@ def test_empty_range_in_fh(): """Test when ``range`` has zero length.""" empty_range = ForecastingHorizon(values=range(-5)) assert (empty_range == ForecastingHorizon(values=[])).all() + + +def test_fh_expected_pred(): + """Test for expected prediction index method.""" + fh = ForecastingHorizon([1, 2, 3]) + y_pred_idx = fh.get_expected_pred_idx(pd.Index([2, 3, 4])) + + assert y_pred_idx.equals(pd.Index([5, 6, 7])) + + y_df = pd.DataFrame([1, 2, 3], index=[2, 3, 4]) + y_pred_idx = fh.get_expected_pred_idx(y_df) + + assert y_pred_idx.equals(pd.Index([5, 6, 7])) + + # pd.MultiIndex case, 2 levels + idx = pd.MultiIndex.from_tuples([("a", 3), ("a", 5), ("b", 4), ("b", 5), ("b", 6)]) + y_pred_idx = fh.get_expected_pred_idx(idx) + + y_pred_idx_expected = pd.MultiIndex.from_tuples( + [("a", 6), ("a", 7), ("a", 8), ("b", 7), ("b", 8), ("b", 9)] + ) + assert y_pred_idx.equals(y_pred_idx_expected) + + y_pred_idx = fh.get_expected_pred_idx(idx, sort_by_time=True) + y_pred_idx_expected = pd.MultiIndex.from_tuples( + [("a", 6), ("a", 7), ("b", 7), ("a", 8), ("b", 8), ("b", 9)] + ) + assert y_pred_idx.equals(y_pred_idx_expected) + + # pd.MultiIndex case, 3 levels + idx = pd.MultiIndex.from_tuples( + [("a", 3, 4), ("a", 3, 5), ("b", 5, 4), ("b", 5, 5), ("b", 5, 6)] + ) + y_pred_idx = fh.get_expected_pred_idx(idx) + + y_pred_idx_expected = pd.MultiIndex.from_tuples( + [("a", 3, 6), ("a", 3, 7), ("a", 3, 8), ("b", 5, 7), ("b", 5, 8), ("b", 5, 9)] + ) + assert y_pred_idx.equals(y_pred_idx_expected) + + y_pred_idx = fh.get_expected_pred_idx(idx, sort_by_time=True) + + y_pred_idx_expected = pd.MultiIndex.from_tuples( + [("a", 3, 6), ("a", 3, 7), ("b", 5, 7), ("a", 3, 8), ("b", 5, 8), ("b", 5, 9)] + ) + assert y_pred_idx.equals(y_pred_idx_expected) + + +def test_tz_preserved(): + """Test that time zone information is preserved in to_absolute. + + Failure case in issue #5584. + """ + cutoff = pd.Timestamp("2020-01-01", tz="utc") + fh_absolute = ForecastingHorizon(range(100), freq="h").to_absolute(cutoff) + + assert fh_absolute[0].tz == cutoff.tz diff --git a/sktime/forecasting/bats.py b/sktime/forecasting/bats.py index 0f1413fdd9a..5ea5d68b35a 100644 --- a/sktime/forecasting/bats.py +++ b/sktime/forecasting/bats.py @@ -77,6 +77,7 @@ class BATS(_TbatsAdapter): See Also -------- TBATS + StatsForecastAutoTBATS References ---------- diff --git a/sktime/forecasting/compose/__init__.py b/sktime/forecasting/compose/__init__.py index 537afc45512..30b0c1971c3 100644 --- a/sktime/forecasting/compose/__init__.py +++ b/sktime/forecasting/compose/__init__.py @@ -6,6 +6,7 @@ "HierarchyEnsembleForecaster", "ColumnEnsembleForecaster", "EnsembleForecaster", + "FallbackForecaster", "AutoEnsembleForecaster", "TransformedTargetForecaster", "ForecastingPipeline", @@ -18,6 +19,7 @@ "RecursiveTimeSeriesRegressionForecaster", "DirRecTabularRegressionForecaster", "DirRecTimeSeriesRegressionForecaster", + "DirectReductionForecaster", "StackingForecaster", "MultiplexForecaster", "make_reduction", @@ -26,6 +28,7 @@ "ForecastByLevel", "Permute", "YfromX", + "IgnoreX", ] from sktime.forecasting.compose._bagging import BaggingForecaster @@ -34,9 +37,11 @@ AutoEnsembleForecaster, EnsembleForecaster, ) +from sktime.forecasting.compose._fallback import FallbackForecaster from sktime.forecasting.compose._fhplex import FhPlexForecaster from sktime.forecasting.compose._grouped import ForecastByLevel from sktime.forecasting.compose._hierarchy_ensemble import HierarchyEnsembleForecaster +from sktime.forecasting.compose._ignore_x import IgnoreX from sktime.forecasting.compose._multiplexer import MultiplexForecaster from sktime.forecasting.compose._pipeline import ( ForecastingPipeline, @@ -45,6 +50,7 @@ TransformedTargetForecaster, ) from sktime.forecasting.compose._reduce import ( + DirectReductionForecaster, DirectTabularRegressionForecaster, DirectTimeSeriesRegressionForecaster, DirRecTabularRegressionForecaster, diff --git a/sktime/forecasting/compose/_bagging.py b/sktime/forecasting/compose/_bagging.py index 3a257f1564a..72b9d036c92 100644 --- a/sktime/forecasting/compose/_bagging.py +++ b/sktime/forecasting/compose/_bagging.py @@ -8,19 +8,13 @@ import numpy as np import pandas as pd -from sklearn import clone from sklearn.utils import check_random_state from sktime.datatypes._utilities import update_data from sktime.forecasting.base import BaseForecaster from sktime.forecasting.ets import AutoETS from sktime.transformations.base import BaseTransformer -from sktime.transformations.bootstrap import ( - MovingBlockBootstrapTransformer, - STLBootstrapTransformer, -) -from sktime.utils.estimators import MockForecaster -from sktime.utils.random_state import set_random_state +from sktime.transformations.bootstrap import STLBootstrapTransformer class BaggingForecaster(BaseForecaster): @@ -38,15 +32,15 @@ class BaggingForecaster(BaseForecaster): Parameters ---------- - bootstrap_transformer : BaseTransformer - (sktime.transformations.bootstrap.STLBootstrapTransformer) + bootstrap_transformer : sktime transformer BaseTransformer descendant instance + (default = sktime.transformations.bootstrap.STLBootstrapTransformer) Bootstrapping Transformer that takes a series (with tag scitype:transform-input=Series) as input and returns a panel (with tag scitype:transform-input=Panel) of bootstrapped time series if not specified sktime.transformations.bootstrap.STLBootstrapTransformer is used. - forecaster : BaseForecaster (sktime.forecating.ets.AutoETS) - A valid sktime Forecaster. If not specified sktime.forecating.ets.AutoETS is - used. + forecaster : sktime forecaster, BaseForecaster descendant instance, optional + (default = sktime.forecating.ets.AutoETS) + If not specified, sktime.forecating.ets.AutoETS is used. sp: int (default=2) Seasonal period for default Forecaster and Transformer. Must be 2 or greater. Ignored for the bootstrap_transformer and forecaster if they are specified. @@ -88,6 +82,7 @@ class BaggingForecaster(BaseForecaster): """ _tags = { + "authors": ["ltsaprounis"], "scitype:y": "univariate", # which y are fine? univariate/multivariate/both "ignores-exogeneous-X": True, # does estimator ignore the exogeneous X? "handles-missing-data": False, # can estimator handle missing data? @@ -161,13 +156,17 @@ def _fit(self, y, X, fh): """ if self.bootstrap_transformer is None: self.bootstrap_transformer_ = STLBootstrapTransformer(sp=self.sp) + elif hasattr(self.bootstrap_transformer, "clone"): + self.bootstrap_transformer_ = self.bootstrap_transformer.clone() else: + from sklearn import clone + self.bootstrap_transformer_ = clone(self.bootstrap_transformer) if self.forecaster is None: self.forecaster_ = AutoETS(sp=self.sp) else: - self.forecaster_ = clone(self.forecaster) + self.forecaster_ = self.forecaster.clone() if ( self.bootstrap_transformer_.get_tag( @@ -192,8 +191,6 @@ def _fit(self, y, X, fh): # random state handling passed into input estimators self.random_state_ = check_random_state(self.random_state) - set_random_state(self.bootstrap_transformer_, random_state=self.random_state_) - set_random_state(self.forecaster_, random_state=self.random_state_) self.bootstrap_transformer_.fit(X=y) y_bootstraps = self.bootstrap_transformer_.transform(X=y) self.forecaster_.fit(y=y_bootstraps, fh=fh, X=None) @@ -303,6 +300,8 @@ def get_test_params(cls): `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params` """ + from sktime.transformations.bootstrap import MovingBlockBootstrapTransformer + from sktime.utils.estimators import MockForecaster from sktime.utils.validation._dependencies import _check_soft_dependencies params = [ diff --git a/sktime/forecasting/compose/_column_ensemble.py b/sktime/forecasting/compose/_column_ensemble.py index a700c64cec7..7a362ee7fe1 100644 --- a/sktime/forecasting/compose/_column_ensemble.py +++ b/sktime/forecasting/compose/_column_ensemble.py @@ -88,6 +88,7 @@ class ColumnEnsembleForecaster(_HeterogenousEnsembleForecaster, _ColumnEstimator """ _tags = { + "authors": ["GuzalBulatova", "mloning", "fkiraly"], "scitype:y": "both", "ignores-exogeneous-X": False, "y_inner_mtype": PANDAS_MTYPES, diff --git a/sktime/forecasting/compose/_ensemble.py b/sktime/forecasting/compose/_ensemble.py index f523479d63f..5f3c46e7d75 100644 --- a/sktime/forecasting/compose/_ensemble.py +++ b/sktime/forecasting/compose/_ensemble.py @@ -104,6 +104,7 @@ class AutoEnsembleForecaster(_HeterogenousEnsembleForecaster): """ _tags = { + "authors": ["mloning", "GuzalBulatova", "aiwalter", "RNKuhns", "AnH0ang"], "ignores-exogeneous-X": False, "requires-fh-in-fit": False, "handles-missing-data": False, @@ -310,6 +311,7 @@ class EnsembleForecaster(_HeterogenousEnsembleForecaster): """ _tags = { + "authors": ["mloning", "GuzalBulatova", "aiwalter", "RNKuhns", "AnH0ang"], "ignores-exogeneous-X": False, "requires-fh-in-fit": False, "handles-missing-data": False, @@ -364,8 +366,14 @@ def _predict(self, fh, X): """ names, _ = self._check_forecasters() y_pred = pd.concat(self._predict_forecasters(fh, X), axis=1, keys=names) - y_pred = y_pred.groupby(level=1, axis=1).agg( - _aggregate, self.aggfunc, self.weights + y_pred = ( + y_pred.T.groupby(level=1) + .agg( + lambda y, aggfunc, weights: _aggregate(y.T, aggfunc, weights), + self.aggfunc, + self.weights, + ) + .T ) return y_pred diff --git a/sktime/forecasting/compose/_fallback.py b/sktime/forecasting/compose/_fallback.py new file mode 100644 index 00000000000..e23f5f5beac --- /dev/null +++ b/sktime/forecasting/compose/_fallback.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python3 -u +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file). +""" +Implements the FallbackForecaster. + +The FallbackForecaster works with a list of forecasters and tries to fit them in order. +If the active forecaster fails during prediction, it proceeds to the next. This ensures +a robust forecasting mechanism by providing fallback options. +""" + +__author__ = ["ninedigits"] +__all__ = ["FallbackForecaster"] + +from sktime.base import _HeterogenousMetaEstimator +from sktime.datatypes import ALL_TIME_SERIES_MTYPES +from sktime.forecasting.base._delegate import _DelegatedForecaster +from sktime.utils.warnings import warn + + +class FallbackForecaster(_HeterogenousMetaEstimator, _DelegatedForecaster): + """Forecaster that sequentially tries a list of forecasting models. + + Attempts to fit the provided forecasters in the order they are given. If a + forecaster fails during fitting or prediction, it proceeds to the next one. This + class is useful in scenarios where the reliability of individual forecasting models + may be in question, and a fallback mechanism is desired. + + Parameters + ---------- + forecasters : list of forecasters, or + list of tuples (str, estimator) of sktime forecasters + Forecasters to be tried sequentially. + These are "blueprint" transformers resp forecasters, + forecaster states do not change when `fit` is called + + verbose : bool, default=False + If True, raises warnings when a forecaster fails to fit or predict. + + Attributes + ---------- + forecasters_ : list of (str, estimator) tuples + The forecasters to be tried sequentially. + Forecasters that have been fitted successfully are stored in this list. + first_nonfailing_forecaster_index_ : int + Index of the first non-failing forecaster in the list of forecasters. + current_forecaster_ : sktime forecaster + pointer to the first forecaster that was successfully fitted + same as ``forecasters_[first_nonfailing_forecaster_index_][1]`` + current_name_ : str + name of the current forecaster + same as ``forecasters_[first_nonfailing_forecaster_index_][0]`` + exceptions_raised_ : dict + dictionary of exceptions raised by forecasters during fitting or prediction + keys are int indices of forecasters in the list of forecasters + values are dicts with keys "failed_at_step", "exception", "forecaster_name" + failed_at_step is either "fit" or "predict" + exception is the exception raised by the forecaster + forecaster_name is the name of the forecaster + + Examples + -------- + >>> from sktime.forecasting.naive import NaiveForecaster + >>> from sktime.forecasting.compose import FallbackForecaster + >>> from sktime.forecasting.compose import EnsembleForecaster + >>> from sktime.forecasting.trend import PolynomialTrendForecaster + >>> from sktime.datasets import load_airline + >>> y = load_airline() + >>> # first fit polyomial trend, if fails make naive forecast + >>> forecasters = [ + ... ("poly", PolynomialTrendForecaster()), + ... ("naive", NaiveForecaster()) + ... ] + >>> forecaster = FallbackForecaster(forecasters=forecasters) + >>> forecaster.fit(y=y, fh=[1, 2, 3]) + FallbackForecaster(...) + >>> y_pred = forecaster.predict() + """ + + _tags = { + "authors": ["ninedigits"], + "maintainers": ["ninedigits"], + "handles-missing-data": True, + "scitype:y": "both", + "y_inner_mtype": ALL_TIME_SERIES_MTYPES, + "X_inner_mtype": ALL_TIME_SERIES_MTYPES, + "fit_is_empty": False, + } + # for default get_params/set_params from _HeterogenousMetaEstimator + # _steps_attr points to the attribute of self + # which contains the heterogeneous set of estimators + # this must be an iterable of (name: str, estimator, ...) tuples for the default + _steps_attr = "_forecasters" + # if the estimator is fittable, _HeterogenousMetaEstimator also + # provides an override for get_fitted_params for params from the fitted estimators + # the fitted estimators should be in a different attribute, _steps_fitted_attr + # this must be an iterable of (name: str, estimator, ...) tuples for the default + _steps_fitted_attr = "forecasters_" + + def __init__(self, forecasters, verbose=False): + super().__init__() + + self.forecasters = forecasters + self.current_forecaster_ = None + self.current_name_ = None + self.verbose = verbose + + self._forecasters = self._check_estimators( + forecasters, "forecasters", clone_ests=False + ) + self.forecasters_ = self._check_estimators(forecasters, "forecasters") + + self._anytagis_then_set("requires-fh-in-fit", True, False, self._forecasters) + self._anytagis_then_set("capability:pred_int", False, True, self._forecasters) + + def _get_delegate(self): + return self.current_forecaster_ + + def _fit(self, y, X=None, fh=None): + """Fit the forecasters in the given order until one succeeds. + + Parameters + ---------- + y : array-like + Target time series to which to fit the forecasters. + X : array-like, optional (default=None) + Exogenous variables. + fh : array-like, optional (default=None) + The forecasting horizon. + + Returns + ------- + self : an instance of self + + Raises + ------ + RuntimeError + If all forecasters fail to fit. + """ + self.first_nonfailing_forecaster_index_ = 0 + self.exceptions_raised_ = dict() + return self._try_fit_forecasters(y=y, X=X, fh=fh) + + def _try_fit_forecasters(self, y, X, fh): + """ + Attempt to fit the forecasters in sequence until one succeeds. + + This method iterates over the forecasters starting from the index + `first_nonfailing_forecaster_index_`. For each forecaster, it tries to fit it + with the current data. If the fit method of a forecaster raises an exception, + it records the exception and proceeds to the next forecaster. If a forecaster + fits successfully, it updates the current forecaster and its name. + + Returns + ------- + self : an instance of self + + Raises + ------ + RuntimeError + If all forecasters fail to fit. + """ + while True: + ix = self.first_nonfailing_forecaster_index_ + if self.first_nonfailing_forecaster_index_ >= len(self.forecasters_): + raise RuntimeError("No remaining forecasters to attempt prediction.") + name, forecaster = self.forecasters_[ix] + try: + self.current_name_ = name + self.current_forecaster_ = forecaster.clone() + self.current_forecaster_.fit(y=y, X=X, fh=fh) + return self + except Exception as e: + self.exceptions_raised_[self.first_nonfailing_forecaster_index_] = { + "failed_at_step": "fit", + "exception": e, + "forecaster_name": name, + } + self.first_nonfailing_forecaster_index_ += 1 + if self.verbose: + warn( + f"Forecaster {name} failed to fit with error: {e}", + stacklevel=2, + obj=self, + ) + + def _predict(self, fh, X=None): + """Predict using the current forecaster. + + If predict fails, fit and predict with the next forecaster. + + Parameters + ---------- + fh : array-like + The forecasting horizon. + X : array-like, optional (default=None) + Exogenous variables. + + Returns + ------- + y_pred : array-like + The predicted values. + + Raises + ------ + RuntimeError + If no forecaster is fitted or all forecasters fail to predict. + """ + if self.current_forecaster_ is None: + raise RuntimeError("No forecaster has been successfully fitted yet.") + + try: + return self.current_forecaster_.predict(fh, X) + except Exception as e: + self.exceptions_raised_[self.first_nonfailing_forecaster_index_] = { + "failed_at_step": "predict", + "exception": e, + "forecaster_name": self.current_name_, + } + if self.verbose: + warn( + f"Current forecaster failed at prediction with error: {e}", + stacklevel=2, + obj=self, + ) + self.first_nonfailing_forecaster_index_ += 1 + + # Fit the next forecaster and retry prediction + self.current_forecaster_ = None + self._try_fit_forecasters(self._y, self._X, self._fh) + return self.predict(fh, X) + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict + """ + from sktime.forecasting.compose._reduce import YfromX + from sktime.forecasting.naive import NaiveForecaster + + # univariate case + FORECASTER = NaiveForecaster() + params = [{"forecasters": [("f1", FORECASTER), ("f2", FORECASTER)]}] + + # test multivariate case, i.e., ensembling multiple variables at same time + FORECASTER2 = YfromX.create_test_instance() + params = params + [{"forecasters": [FORECASTER2, FORECASTER]}] + + return params diff --git a/sktime/forecasting/compose/_fhplex.py b/sktime/forecasting/compose/_fhplex.py index db9b266a2d1..d48acd97d42 100644 --- a/sktime/forecasting/compose/_fhplex.py +++ b/sktime/forecasting/compose/_fhplex.py @@ -81,6 +81,7 @@ class FhPlexForecaster(BaseForecaster): """ _tags = { + "authors": "fkiraly", "requires-fh-in-fit": True, "handles-missing-data": True, "scitype:y": "both", diff --git a/sktime/forecasting/compose/_grouped.py b/sktime/forecasting/compose/_grouped.py index f88a8955570..c2c4e44a4d2 100644 --- a/sktime/forecasting/compose/_grouped.py +++ b/sktime/forecasting/compose/_grouped.py @@ -53,6 +53,7 @@ class ForecastByLevel(_DelegatedForecaster): """ _tags = { + "authors": ["fkiraly"], "requires-fh-in-fit": False, "handles-missing-data": True, "scitype:y": "both", @@ -74,7 +75,7 @@ def __init__(self, forecaster, groupby="local"): super().__init__() - self.clone_tags(self.forecaster_) + self._set_delegated_tags(self.forecaster_) self.set_tags(**{"fit_is_empty": False}) if groupby == "local": diff --git a/sktime/forecasting/compose/_hierarchy_ensemble.py b/sktime/forecasting/compose/_hierarchy_ensemble.py index 1eca3af0351..bcc47d5d262 100644 --- a/sktime/forecasting/compose/_hierarchy_ensemble.py +++ b/sktime/forecasting/compose/_hierarchy_ensemble.py @@ -93,6 +93,8 @@ class HierarchyEnsembleForecaster(_HeterogenousEnsembleForecaster): """ _tags = { + "authors": ["VyomkeshVyas"], + "maintainers": ["VyomkeshVyas"], "scitype:y": "both", "ignores-exogeneous-X": False, "y_inner_mtype": ["pd.DataFrame", "pd-multiindex", "pd_multiindex_hier"], @@ -285,7 +287,8 @@ def _get_node_dict(self, z): if counter == 0: nodes = mi else: - nodes.append(mi) + # For nlevels = 2, 'nodes' is pd.Index object (L286) + nodes = nodes.append(mi) else: node_l = [] for i in range(len(node)): @@ -495,6 +498,7 @@ def _check_forecasters(self, y, z): raise ValueError("Nodes cannot be empty.") if z.index.nlevels == 2: nodes_ix = pd.Index(nodes) + nodes_t += nodes else: nodes_l = [] for i in range(len(nodes)): diff --git a/sktime/forecasting/compose/_ignore_x.py b/sktime/forecasting/compose/_ignore_x.py new file mode 100644 index 00000000000..6ff8e8bdb5f --- /dev/null +++ b/sktime/forecasting/compose/_ignore_x.py @@ -0,0 +1,76 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Implements compositor for ignoring exogenous data.""" + +__author__ = ["fkiraly"] + +from sktime.forecasting.base._delegate import _DelegatedForecaster + + +class IgnoreX(_DelegatedForecaster): + """Compositor for ignoring exogenous variables. + + Composing with IgnoreX instructs the wrapped forecaster to ignore exogenous + data. This is useful for testing the impact of exogenous data on forecasts, + or for use in tuning hyperparameters of the forecaster. + + Parameters + ---------- + forecaster : sktime forecaster, BaseForecaster descendant instance + The forecaster to wrap. + ignore_x : bool, optional (default=True) + Whether to ignore exogenous data or not, this parameter is useful for tuning. + True: ignore exogenous data, X is not passed on to ``forecaster`` + False: use exogenous data, X is passed on to ``forecaster`` + + Attributes + ---------- + forecaster_ : clone of forecaster + The fitted forecaster. + """ + + # attribute for _DelegatedForecaster, which then delegates + # all non-overridden methods are same as of getattr(self, _delegate_name) + # see further details in _DelegatedForecaster docstring + _delegate_name = "forecaster_" + + _tags = { + "ignores-exogeneous-X": True, + } + + def __init__(self, forecaster, ignore_x=True): + self.forecaster = forecaster + self.ignore_x = ignore_x + + super().__init__() + + self.forecaster_ = forecaster.clone() + + self._set_delegated_tags(self.forecaster_) + self.set_tags(**{"ignores-exogeneous-X": True}) + + if not ignore_x: + self.set_tags(**{"ignores-exogeneous-X": ignore_x}) + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + + Returns + ------- + params : dict or list of dict + """ + from sktime.forecasting.naive import NaiveForecaster + + f = NaiveForecaster() + + params1 = {"forecaster": f} + params2 = {"forecaster": f, "ignore_x": False} + + return [params1, params2] diff --git a/sktime/forecasting/compose/_multiplexer.py b/sktime/forecasting/compose/_multiplexer.py index 3bd81ee8ba0..4b48bcc2db7 100644 --- a/sktime/forecasting/compose/_multiplexer.py +++ b/sktime/forecasting/compose/_multiplexer.py @@ -77,6 +77,7 @@ class MultiplexForecaster(_HeterogenousMetaEstimator, _DelegatedForecaster): """ _tags = { + "authors": ["kkoralturk", "aiwalter", "fkiraly", "miraep8"], "requires-fh-in-fit": False, "handles-missing-data": False, "scitype:y": "both", @@ -118,7 +119,7 @@ def __init__( ) self._set_forecaster() - self.clone_tags(self.forecaster_) + self._set_delegated_tags() self.set_tags(**{"fit_is_empty": False}) # this ensures that we convert in the inner estimator, not in the multiplexer self.set_tags(**{"y_inner_mtype": ALL_TIME_SERIES_MTYPES}) diff --git a/sktime/forecasting/compose/_pipeline.py b/sktime/forecasting/compose/_pipeline.py index a86242c75f9..7c73f252094 100644 --- a/sktime/forecasting/compose/_pipeline.py +++ b/sktime/forecasting/compose/_pipeline.py @@ -170,7 +170,7 @@ def _get_inverse_transform(self, transformers, y, X=None, mode=None): if len(levels) == 1: levels = levels[0] yt[ix] = y.xs(ix, level=levels, axis=1) - # todo 0.25.0 - check why this cannot be easily removed + # todo 0.27.0 - check why this cannot be easily removed # in theory, we should get rid of the "Coverage" case treatment # (the legacy naming convention was removed in 0.23.0) # deal with the "Coverage" case, we need to get rid of this @@ -391,6 +391,7 @@ class ForecastingPipeline(_Pipeline): """ _tags = { + "authors": ["mloning", "fkiraly", "aiwalter"], "scitype:y": "both", "y_inner_mtype": SUPPORTED_MTYPES, "X_inner_mtype": SUPPORTED_MTYPES, @@ -814,6 +815,7 @@ class TransformedTargetForecaster(_Pipeline): """ _tags = { + "authors": ["mloning", "fkiraly", "aiwalter"], "scitype:y": "both", "y_inner_mtype": SUPPORTED_MTYPES, "X_inner_mtype": SUPPORTED_MTYPES, @@ -843,6 +845,21 @@ def __init__(self, steps): self.clone_tags(self.forecaster_, tags_to_clone) self._anytagis_then_set("fit_is_empty", False, True, self.steps_) + # above, we cloned the ignores-exogeneous-X tag, + # but we also need to check whether X is used as y in some transformer + # in this case X is not ignored by the pipe, even if the forecaster ignores it + # logic below checks whether there is at least one such transformer + # if there is, we override the ignores-exogeneous-X tag to False + # also see discussion in bug issue #5518 + pre_ts = self.transformers_pre_ + post_ts = self.transformers_post_ + pre_use_y = [est.get_tag("y_inner_mtype") != "None" for _, est in pre_ts] + post_use_y = [est.get_tag("y_inner_mtype") != "None" for _, est in post_ts] + any_t_use_y = any(pre_use_y) or any(post_use_y) + + if any_t_use_y: + self.set_tags(**{"ignores-exogeneous-X": False}) + @property def forecaster_(self): """Return reference to the forecaster in the pipeline. @@ -1259,6 +1276,7 @@ class ForecastX(BaseForecaster): """ _tags = { + "authors": ["fkiraly", "benheid", "yarnabrina"], "X_inner_mtype": SUPPORTED_MTYPES, "y_inner_mtype": SUPPORTED_MTYPES, "scitype:y": "both", @@ -1692,6 +1710,7 @@ class Permute(_DelegatedForecaster, BaseForecaster, _HeterogenousMetaEstimator): """ _tags = { + "authors": "aiwalter", "scitype:y": "both", "y_inner_mtype": ALL_TIME_SERIES_MTYPES, "X_inner_mtype": ALL_TIME_SERIES_MTYPES, @@ -1711,17 +1730,8 @@ def __init__(self, estimator, permutation=None, steps_arg="steps"): self.steps_arg = steps_arg super().__init__() - tags_to_clone = [ - "ignores-exogeneous-X", # does estimator ignore the exogeneous X? - "capability:insample", - "capability:pred_int", # can the estimator produce prediction intervals? - "capability:pred_int:insample", - "requires-fh-in-fit", # is forecasting horizon already required in fit? - "enforce_index_type", # index type that needs to be enforced in X/y - "fit_is_empty", - ] - self.clone_tags(self.estimator, tags_to_clone) + self._set_delegated_tags(estimator) self._set_permuted_estimator() diff --git a/sktime/forecasting/compose/_reduce.py b/sktime/forecasting/compose/_reduce.py index 4f92895012c..b383f38fc3b 100644 --- a/sktime/forecasting/compose/_reduce.py +++ b/sktime/forecasting/compose/_reduce.py @@ -5,10 +5,12 @@ __author__ = [ "mloning", "AyushmaanSeth", + "danbartl", "kAnand77", "LuisZugasti", "Lovkush-A", "fkiraly", + "benheid", ] __all__ = [ @@ -32,7 +34,6 @@ from sktime.datatypes._utilities import get_time_index from sktime.forecasting.base import BaseForecaster, ForecastingHorizon -from sktime.forecasting.base._base import DEFAULT_ALPHA from sktime.forecasting.base._fh import _index_range from sktime.forecasting.base._sktime import _BaseWindowForecaster from sktime.regression.base import BaseRegressor @@ -40,7 +41,7 @@ from sktime.transformations.series.summarize import WindowSummarizer from sktime.utils.datetime import _shift from sktime.utils.estimators.dispatch import construct_dispatch -from sktime.utils.sklearn import is_sklearn_regressor +from sktime.utils.sklearn import is_sklearn_regressor, prep_skl_df from sktime.utils.validation import check_window_length from sktime.utils.warnings import warn @@ -184,22 +185,33 @@ def _sliding_window_transform( Xt = Zt[:, :, :window_length] # Pre-allocate array for sliding windows. # If the scitype is tabular regression, we have to convert X into a 2d array. - if scitype == "tabular-regressor": - if transformers is not None: - return yt, Xt - else: - return yt, Xt.reshape(Xt.shape[0], -1) - else: - return yt, Xt + if scitype == "tabular-regressor" and transformers is None: + Xt = Xt.reshape(Xt.shape[0], -1) + + assert Xt.ndim == 2 or Xt.ndim == 3 + assert yt.ndim == 2 + + return yt, Xt class _Reducer(_BaseWindowForecaster): """Base class for reducing forecasting to regression.""" _tags = { + "authors": [ + "mloning", + "AyushmaanSeth", + "danbartl", + "kAnand77", + "LuisZugasti", + "Lovkush-A", + "fkiraly", + "benheid", + ], "ignores-exogeneous-X": False, # reduction uses X in non-trivial way "handles-missing-data": True, "capability:insample": False, + "capability:pred_int": True, } def __init__( @@ -221,6 +233,25 @@ def __init__( # therefore this is commented out until sktime and sklearn are better aligned # self.set_tags(**{"handles-missing-data": estimator._get_tags()["allow_nan"]}) + # for dealing with probabilistic regressors: + # self._est_type encodes information what type of estimator is passed + if hasattr(estimator, "get_tags"): + _est_type = estimator.get_tag("object_type", "regressor", False) + else: + _est_type = "regressor" + + if _est_type not in ["regressor", "regressor_proba"]: + raise TypeError( + f"error in {type(self).__name}, " + "estimator must be either an sklearn compatible " + "regressor, or an skpro probabilistic regressor." + ) + + # has probabilistic mode iff the estimator is of type regressor_proba + self.set_tags(**{"capability:pred_int": _est_type == "regressor_proba"}) + + self._est_type = _est_type + def _is_predictable(self, last_window): """Check if we can make predictions from last window.""" return ( @@ -229,7 +260,26 @@ def _is_predictable(self, last_window): and np.sum(np.isinf(last_window)) == 0 ) - def _predict_in_sample(self, fh, X=None, return_pred_int=False, alpha=None): + def _predict_quantiles(self, fh, X, alpha): + """Compute/return prediction quantiles for a forecast. + + Parameters + ---------- + fh : guaranteed to be ForecastingHorizon + The forecasting horizon with the steps ahead to to predict. + X : sktime time series object, optional (default=None) + guaranteed to be of an mtype in self.get_tag("X_inner_mtype") + Exogeneous time series for the forecast + alpha : list of float (guaranteed not None and floats in [0,1] interval) + A list of probabilities at which quantile forecasts are computed. + """ + kwargs = {"X": X, "alpha": alpha, "method": "predict_quantiles"} + + y_pred = self._predict_boilerplate(fh, **kwargs) + + return y_pred + + def _predict_in_sample(self, fh, X=None, **kwargs): # Note that we currently only support out-of-sample predictions. For the # direct and multioutput strategy, we need to check this already during fit, # as the fh is required for fitting. @@ -260,6 +310,7 @@ def get_test_params(cls, parameter_set="default"): from sklearn.pipeline import make_pipeline from sktime.transformations.panel.reduce import Tabularizer + from sktime.utils.validation._dependencies import _check_soft_dependencies # naming convention is as follows: # reducers with Tabular take an sklearn estimator, e.g., LinearRegressor @@ -270,7 +321,26 @@ def get_test_params(cls, parameter_set="default"): if "TimeSeries" in cls.__name__: est = make_pipeline(Tabularizer(), est) - params = {"estimator": est, "window_length": 3} + params = [{"estimator": est, "window_length": 3}] + + PROBA_IMPLEMENTED = ["DirectTabularRegressionForecaster"] + self_supports_proba = cls.__name__ in PROBA_IMPLEMENTED + + if _check_soft_dependencies("skpro", severity="none") and self_supports_proba: + from skpro.regression.residual import ResidualDouble + + params_proba_local = { + "estimator": ResidualDouble.create_test_instance(), + "pooling": "local", + "window_length": 3, + } + params_proba_global = { + "estimator": ResidualDouble.create_test_instance(), + "pooling": "global", + "window_length": 4, + } + params = params + [params_proba_local, params_proba_global] + return params def _get_shifted_window(self, shift=0, y_update=None, X_update=None): @@ -343,6 +413,9 @@ def _get_shifted_window(self, shift=0, y_update=None, X_update=None): # first observation after the window (this is what the window is summarized to). index_range = _index_range(relative_int, cutoff) + if isinstance(cutoff, pd.DatetimeIndex): + if cutoff.tzinfo is not None: + index_range = index_range.tz_localize(cutoff.tzinfo) # index_range will convert the indices to the date format of cutoff y_raw = _create_fcst_df(index_range, self._y) @@ -508,6 +581,8 @@ def _fit(self, y, X, fh): raise NotImplementedError("In-sample predictions are not implemented.") yt, Xt = self._transform(y, X) + if hasattr(Xt, "columns"): + Xt.columns = Xt.columns.astype(str) # Iterate over forecasting horizon, fitting a separate estimator for each step. self.estimators_ = [] @@ -517,23 +592,26 @@ def _fit(self, y, X, fh): if self.transformers_ is not None: fh_rel = fh.to_relative(self.cutoff) - yt = _cut_df(yt, n_timepoints - fh_rel[i] + 1) - Xt = _cut_df(Xt, n_timepoints - fh_rel[i] + 1, type="head") - estimator.fit(Xt, yt) + Xt_cut = _cut_df(Xt, n_timepoints - fh_rel[i] + 1, type="head") + yt_cut = _cut_df(yt, n_timepoints - fh_rel[i] + 1) + elif self.windows_identical is True or (fh_rel[i] - 1) == 0: + Xt_cut = Xt + yt_cut = yt[:, i] else: - if self.windows_identical is True: - estimator.fit(Xt, yt[:, i]) - else: - if (fh_rel[i] - 1) == 0: - estimator.fit(Xt, yt[:, i]) - else: - estimator.fit(Xt[: -(fh_rel[i] - 1)], yt[: -(fh_rel[i] - 1), i]) + Xt_cut = Xt[: -(fh_rel[i] - 1)] + yt_cut = yt[: -(fh_rel[i] - 1), i] + + # coercion to pandas for skpro proba regressors + if self._est_type != "regressor" and not isinstance(Xt, pd.DataFrame): + Xt_cut = pd.DataFrame(Xt_cut) + if self._est_type != "regressor" and not isinstance(yt, pd.DataFrame): + yt_cut = pd.DataFrame(yt_cut) + + estimator.fit(Xt_cut, yt_cut) self.estimators_.append(estimator) return self - def _predict_last_window( - self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA - ): + def _predict_last_window(self, fh, X=None, **kwargs): """. In recursive reduction, iteration must be done over the @@ -548,13 +626,20 @@ def _predict_last_window( Forecasting horizon X : pd.DataFrame, optional (default=None) Exogenous time series - return_pred_int : bool - alpha : float or array-like Returns ------- y_return = pd.Series or pd.DataFrame """ + if "method" in kwargs: + method = kwargs.pop("method") + else: + method = "predict" + + # estimator type for case branches + est_type = self._est_type + # "regressor" for sklearn, "regressor_proba" for skpro + if self._X is not None and X is None: raise ValueError( "`X` must be passed to `predict` if `X` is given in `fit`." @@ -564,22 +649,51 @@ def _predict_last_window( y_last, X_last = self._get_shifted_window(X_update=X) ys = np.array(y_last) if not np.sum(np.isnan(ys)) == 0 and np.sum(np.isinf(ys)) == 0: - return self._predict_nan(fh) + return self._predict_nan(fh, method=method, **kwargs) else: y_last, X_last = self._get_last_window() if not self._is_predictable(y_last): - return self._predict_nan(fh) + return self._predict_nan(fh, method=method, **kwargs) # Get last window of available data. # If we cannot generate a prediction from the available data, return nan. + if isinstance(X_last, pd.DataFrame): + X_last = prep_skl_df(X_last) + + def pool_preds(y_preds): + """Pool predictions from different estimators. + + Parameters + ---------- + y_preds : list of pd.DataFrame + List of predictions from different estimators. + """ + y_pred = y_preds.pop(0) + for y_pred_i in y_preds: + y_pred = y_pred.combine_first(y_pred_i) + return y_pred + + def _coerce_to_numpy(y_pred): + """Coerce predictions to numpy array, assumes pd.DataFram or numpy.""" + if isinstance(y_pred, pd.DataFrame): + return y_pred.values + else: + return y_pred + if self.pooling == "global": fh_abs = fh.to_absolute_index(self.cutoff) - y_pred = _create_fcst_df(fh_abs, self._y) + y_preds = [] for i, estimator in enumerate(self.estimators_): - y_pred_short = estimator.predict(X_last) - y_pred_curr = _create_fcst_df([fh_abs[i]], self._y, fill=y_pred_short) - y_pred.update(y_pred_curr) + y_pred_est = getattr(estimator, method)(X_last, **kwargs) + if est_type == "regressor": + y_pred_i = _create_fcst_df([fh_abs[i]], self._y, fill=y_pred_est) + else: # est_type == "regressor_proba" + y_pred_v = _coerce_to_numpy(y_pred_est) + y_pred_i = _create_fcst_df([fh_abs[i]], y_pred_est, fill=y_pred_v) + y_preds.append(y_pred_i) + y_pred = pool_preds(y_preds) + else: # Pre-allocate arrays. if self._X is None: @@ -603,11 +717,33 @@ def _predict_last_window( X_pred = X_pred.reshape(1, -1) # Allocate array for predictions. - y_pred = np.zeros(len(fh)) + if est_type == "regressor": + y_pred = np.zeros(len(fh)) + else: # est_type == "regressor_proba" + y_preds = [] # Iterate over estimators/forecast horizon for i, estimator in enumerate(self.estimators_): - y_pred[i] = estimator.predict(X_pred) + y_pred_est = getattr(estimator, method)(X_pred, **kwargs) + if est_type == "regressor": + y_pred[i] = y_pred_est + else: # est_type == "regressor_proba" + y_pred_v = _coerce_to_numpy(y_pred_est) + y_pred_i = _create_fcst_df([fh[i]], y_pred_est, fill=y_pred_v) + y_preds.append(y_pred_i) + + if est_type != "regressor": + y_pred = pool_preds(y_preds) + + # coerce index and columns to expected + index = fh.get_expected_pred_idx(y=self._y, cutoff=self.cutoff) + columns = self._get_columns(method=method, **kwargs) + if isinstance(y_pred, pd.DataFrame): + y_pred.index = index + y_pred.columns = columns + else: + y_pred = pd.DataFrame(y_pred, index=index, columns=columns) + return y_pred @@ -660,9 +796,7 @@ def _fit(self, y, X, fh): self.estimator_.fit(Xt, yt) return self - def _predict_last_window( - self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA - ): + def _predict_last_window(self, fh, X=None, **kwargs): """Predict to training data. Parameters @@ -671,8 +805,6 @@ def _predict_last_window( Forecasting horizon X : pd.DataFrame, optional (default=None) Exogenous time series - return_pred_int : bool - alpha : float or array-like Returns ------- @@ -823,9 +955,7 @@ def _fit(self, y, X, fh): self.estimator_.fit(Xt, yt) return self - def _predict_last_window( - self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA - ): + def _predict_last_window(self, fh, X=None, **kwargs): """. In recursive reduction, iteration must be done over the @@ -869,6 +999,9 @@ def _predict_last_window( fh_max = fh.to_relative(self.cutoff)[-1] relative = pd.Index(list(map(int, range(1, fh_max + 1)))) index_range = _index_range(relative, self.cutoff) + if isinstance(self.cutoff, pd.DatetimeIndex): + if self.cutoff.tzinfo is not None: + index_range = index_range.tz_localize(self.cutoff.tzinfo) y_pred = _create_fcst_df(index_range, self._y) @@ -895,15 +1028,27 @@ def _predict_last_window( fh_max = fh.to_relative(self.cutoff)[-1] y_pred = np.zeros(fh_max) + + # Array with input data for prediction. last = np.zeros((1, n_columns, window_length + fh_max)) # Fill pre-allocated arrays with available data. last[:, 0, :window_length] = y_last if X is not None: - last[:, 1:, :window_length] = X_last.T - last[:, 1:, window_length:] = X.iloc[ - -(last.shape[2] - window_length) :, : - ].T + X_to_use = np.concatenate( + [X_last.T, X.iloc[-(last.shape[2] - window_length) :, :].T], axis=1 + ) + if X_to_use.shape[1] < window_length + fh_max: + X_to_use = np.pad( + X_to_use, + ((0, 0), (0, window_length + fh_max - X_to_use.shape[1])), + "edge", + ) + elif X_to_use.shape[1] > window_length + fh_max: + X_to_use = X_to_use[:, : window_length + fh_max] + # else X_to_use.shape[1] == window_length + fh_max + # and there are no additional steps to take + last[:, 1:] = X_to_use # Recursively generate predictions by iterating over forecasting horizon. for i in range(fh_max): @@ -1016,9 +1161,7 @@ def _fit(self, y, X, fh): self.estimators_.append(estimator) return self - def _predict_last_window( - self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA - ): + def _predict_last_window(self, fh, X=None, **kwargs): """Fit to training data. Parameters @@ -1027,8 +1170,6 @@ def _predict_last_window( Forecasting horizon X : pd.DataFrame, optional (default=None) Exogenous time series - return_pred_int : bool - alpha : float or array-like Returns ------- @@ -1388,20 +1529,27 @@ def make_reduction( Parameters ---------- - estimator : an estimator instance - Either a tabular regressor from scikit-learn or a time series regressor from - sktime. + estimator : an estimator instance, can be: + + * scikit-learn regressor or interface compatible + * sktime time series regressor + * skpro tabular probabilistic supervised regressor, only for direct reduction + this will result in a probabilistic forecaster + strategy : str, optional (default="recursive") The strategy to generate forecasts. Must be one of "direct", "recursive" or "multioutput". + window_length : int, optional (default=10) Window length used in sliding window transformation. + scitype : str, optional (default="infer") Legacy argument for downwards compatibility, should not be used. `make_reduction` will automatically infer the correct type of `estimator`. This internal inference can be force-overridden by the `scitype` argument. Must be one of "infer", "tabular-regressor" or "time-series-regressor". If the scitype cannot be inferred, this is a bug and should be reported. + transformers: list of transformers (default = None) A suitable list of transformers that allows for using an en-bloc approach with make_reduction. This means that instead of using the raw past observations of @@ -1409,10 +1557,12 @@ def make_reduction( the past raw observations. Currently only supports WindowSummarizer (or a list of WindowSummarizers) to generate features e.g. the mean of the past 7 observations. Currently only works for RecursiveTimeSeriesRegressionForecaster. + pooling: str {"local", "global"}, optional Specifies whether separate models will be fit at the level of each instance (local) of if you wish to fit a single model to all instances ("global"). Currently only works for RecursiveTimeSeriesRegressionForecaster. + windows_identical: bool, (default = True) Direct forecasting only. Specifies whether all direct models use the same X windows from y (True: Number @@ -1423,8 +1573,9 @@ def make_reduction( Returns ------- - estimator : an Estimator instance - A reduction forecaster + forecaster : an sktime forecaster object + the reduction forecaster, wrapping ``estimator`` + class is determined by the ``strategy`` argument and type of ``estimator``. Examples -------- @@ -1527,11 +1678,11 @@ def _cut_df(X, n_obs=1, type="tail"): if n_obs == 0: return X.copy() if isinstance(X.index, pd.MultiIndex): - Xi_grp = X.index.names[0:-1] + levels = list(range(X.index.nlevels - 1)) if type == "tail": - X = X.groupby(Xi_grp, as_index=False).tail(n_obs) + X = X.groupby(level=levels, as_index=False).tail(n_obs) elif type == "head": - X = X.groupby(Xi_grp, as_index=False).head(n_obs) + X = X.groupby(level=levels, as_index=False).head(n_obs) else: if type == "tail": X = X.tail(n_obs) @@ -1566,75 +1717,31 @@ def _create_fcst_df(target_date, origin_df, fill=None): ------- A pandas dataframe or series """ - oi = origin_df.index - if not isinstance(oi, pd.MultiIndex): - if isinstance(origin_df, pd.Series): - if fill is None: - template = pd.Series(np.zeros(len(target_date)), index=target_date) - else: - template = pd.Series(fill, index=target_date) - template.name = origin_df.name - return template - else: - if fill is None: - template = pd.DataFrame( - np.zeros((len(target_date), len(origin_df.columns))), - index=target_date, - columns=origin_df.columns.to_list(), - ) - else: - template = pd.DataFrame( - fill, index=target_date, columns=origin_df.columns.to_list() - ) - return template + if not isinstance(target_date, ForecastingHorizon): + ix = pd.Index(target_date) + fh = ForecastingHorizon(ix, is_relative=False) else: - idx = origin_df.index.to_frame(index=False) - instance_names = idx.columns[0:-1].to_list() - time_names = idx.columns[-1] - idx = idx[instance_names].drop_duplicates() - - timeframe = pd.DataFrame(target_date, columns=[time_names]) - target_frame = idx.merge(timeframe, how="cross") - if hasattr(target_date, "freq"): - freq_inferred = target_date.freq - mi = ( - target_frame.groupby(instance_names, as_index=True) - .apply( - lambda df: df.drop(instance_names, axis=1) - .set_index(time_names) - .asfreq(freq_inferred) - ) - .index - ) - else: - mi = ( - target_frame.groupby(instance_names, as_index=True) - .apply(lambda df: df.drop(instance_names, axis=1).set_index(time_names)) - .index - ) + fh = target_date.to_absolute() - if fill is None: - template = pd.DataFrame( - np.zeros((len(target_date) * idx.shape[0], len(origin_df.columns))), - index=mi, - columns=origin_df.columns.to_list(), - ) - else: - template = pd.DataFrame( - fill, - index=mi, - columns=origin_df.columns.to_list(), - ) + index = fh.get_expected_pred_idx(origin_df) + + if isinstance(origin_df, pd.Series): + columns = [origin_df.name] + else: + columns = origin_df.columns.to_list() - template = template.astype(origin_df.dtypes.to_dict()) - return template + if fill is None: + values = 0 + else: + values = fill + res = pd.DataFrame(values, index=index, columns=columns) -def _coerce_col_str(X): - """Coerce columns to string, to satisfy sklearn convention.""" - X = X.copy() - X.columns = [str(x) for x in X.columns] - return X + if isinstance(origin_df, pd.Series) and not isinstance(index, pd.MultiIndex): + res = res.iloc[:, 0] + res.name = origin_df.name + + return res def slice_at_ix(df, ix): @@ -1744,7 +1851,7 @@ class DirectReductionForecaster(BaseForecaster, _ReducerMixin): to obtain a prediction for `y(c+h)`, for each `h` in the forecasting horizon if `X_treatment = "shifted": applies fitted estimator's predict to - features = `y(c)`, `y(c-1)`, ..., `y(c-window_size)`, if provided: `X(t)` + features = `y(c)`, `y(c-1)`, ..., `y(c-window_size)`, if provided: `X(c)` to obtain prediction for `y(c+h_1)`, ..., `y(c+h_k)` for `h_j` in forec. horizon Parameters @@ -1773,6 +1880,7 @@ class DirectReductionForecaster(BaseForecaster, _ReducerMixin): """ _tags = { + "authors": "fkiraly", "requires-fh-in-fit": True, # is the forecasting horizon required in fit? "ignores-exogeneous-X": False, "X_inner_mtype": ["pd.DataFrame", "pd-multiindex", "pd_multiindex_hier"], @@ -1873,8 +1981,8 @@ def _fit_shifted(self, y, X=None, fh=None): Xt = lagger_y_to_X.fit_transform(X=y, y=X) Xt = Xt.loc[y_notna_idx] - Xt = _coerce_col_str(Xt) - yt = _coerce_col_str(yt) + Xt = prep_skl_df(Xt) + yt = prep_skl_df(yt) estimator = clone(self.estimator) if not estimator._get_tags()["multioutput"]: @@ -1899,7 +2007,7 @@ def _predict_shifted(self, fh=None, X=None): Xt = lagger_y_to_X.transform(X=self._y, y=self._X) Xt_lastrow = slice_at_ix(Xt, self.cutoff) - Xt_lastrow = _coerce_col_str(Xt_lastrow) + Xt_lastrow = prep_skl_df(Xt_lastrow) estimator = self.estimator_ # 2D numpy array with col index = (fh, var) and 1 row @@ -1963,8 +2071,8 @@ def _fit_concurrent(self, y, X=None, fh=None): yt = yt.loc[notna_idx] Xtt = Xtt.loc[notna_idx] - Xtt = _coerce_col_str(Xtt) - yt = _coerce_col_str(yt) + Xtt = prep_skl_df(Xtt) + yt = prep_skl_df(yt) # we now check whether the set of full lags is empty # if yes, we set a flag, since we cannot fit the reducer @@ -2009,7 +2117,7 @@ def _predict_concurrent(self, X=None, fh=None): Xt = lagger_y_to_X[-lag].transform(X=self._y, y=X_pool) Xtt = lag_plus.fit_transform(Xt) Xtt_predrow = slice_at_ix(Xtt, predict_idx) - Xtt_predrow = _coerce_col_str(Xtt_predrow) + Xtt_predrow = prep_skl_df(Xtt_predrow) estimator = self.estimators_[i] @@ -2112,6 +2220,7 @@ class RecursiveReductionForecaster(BaseForecaster, _ReducerMixin): """ _tags = { + "authors": "fkiraly", "requires-fh-in-fit": False, # is the forecasting horizon required in fit? "ignores-exogeneous-X": False, "X_inner_mtype": ["pd.DataFrame", "pd-multiindex", "pd_multiindex_hier"], @@ -2208,8 +2317,8 @@ def _fit(self, y, X, fh): if X is not None: Xtt = pd.concat([X.loc[notna_idx], Xtt], axis=1) - Xtt = _coerce_col_str(Xtt) - yt = _coerce_col_str(yt) + Xtt = prep_skl_df(Xtt) + yt = prep_skl_df(yt) estimator = clone(self.estimator) estimator.fit(Xtt, yt) @@ -2304,7 +2413,7 @@ def _predict_out_of_sample(self, X_pool, fh): [slice_at_ix(X_pool, predict_idx), Xtt_predrow], axis=1 ) - Xtt_predrow = _coerce_col_str(Xtt_predrow) + Xtt_predrow = prep_skl_df(Xtt_predrow) estimator = self.estimator_ @@ -2355,7 +2464,7 @@ def _predict_in_sample(self, X_pool, fh): [slice_at_ix(X_pool, fh_abs), Xtt_predrows], axis=1 ) - Xtt_predrows = _coerce_col_str(Xtt_predrows) + Xtt_predrows = prep_skl_df(Xtt_predrows) estimator = self.estimator_ @@ -2404,32 +2513,38 @@ def get_test_params(cls, parameter_set="default"): class YfromX(BaseForecaster, _ReducerMixin): """Simple reduction predicting endogeneous from concurrent exogeneous variables. - Tabulates all seen `X` and `y` by time index and applies + Tabulates all seen ``X`` and ``y`` by time index and applies tabular supervised regression. - In `fit`, given endogeneous time series `y` and exogeneous `X`: - fits `estimator` to feature-label pairs as defined as follows. + In ``fit``, given endogeneous time series ``y`` and exogeneous ``X``: + fits ``estimator`` to feature-label pairs as defined as follows. + + features = :math:`y(t)`, labels: :math:`X(t)` + ranging over all :math:`t` where the above have been observed (are in the index) - features = :math:`y(t)`, labels: :math:`X(t)` - ranging over all :math:`t` where the above have been observed (are in the index) + In ``predict``, at a time :math:`t` in the forecasting horizon, uses ``estimator`` + to predict :math:`y(t)`, from labels: :math:`X(t)` - In `predict`, at a time :math:`t` in the forecasting horizon, uses `estimator` - to predict :math:`y(t)`, from labels: :math:`X(t)` + If regressor is ``skpro`` probabilistic regressor, and has ``predict_interval`` etc, + uses ``estimator`` to predict :math:`y(t)`, from labels: :math:`X(t)`, + passing on the ``predict_interval`` etc arguments. - If no exogeneous data is provided, will predict the mean of `y` seen in `fit`. + If no exogeneous data is provided, will predict the mean of ``y`` seen in ``fit``. In order to use a fit not on the entire historical data - and update periodically, combine this with `UpdateRefitsEvery`. + and update periodically, combine this with ``UpdateRefitsEvery``. - In order to deal with missing data, combine this with `Imputer`. + In order to deal with missing data, combine this with ``Imputer``. To construct an custom direct reducer, - combine with `YtoX`, `Lag`, or `ReducerTransform`. + combine with ``YtoX``, ``Lag``, or ``ReducerTransform``. Parameters ---------- - estimator : sklearn regressor, must be compatible with sklearn interface + estimator : sklearn regressor or skpro probabilistic regressor, + must be compatible with sklearn or skpro interface tabular regression algorithm used in reduction algorithm + if skpro regressor, resulting forecaster will have probabilistic capability pooling : str, one of ["local", "global", "panel"], optional, default="local" level on which data are pooled to fit the supervised regression model "local" = unit/instance level, one reduced model per lowest hierarchy level @@ -2545,13 +2660,13 @@ def _fit(self, y, X, fh): dummy = DummyRegressor() estimator = ResidualDouble(dummy) - X = _coerce_col_str(y) + X = prep_skl_df(y, copy_df=True) else: - X = _coerce_col_str(X) + X = prep_skl_df(X, copy_df=True) estimator = clone(self.estimator) if _est_type == "regressor": - y = _coerce_col_str(y) + y = prep_skl_df(y, copy_df=True) y = y.values.flatten() estimator.fit(X, y) @@ -2756,7 +2871,7 @@ def _get_pred_X(self, X, fh_idx): else: X_pool = pd.DataFrame(0, index=fh_idx, columns=y_cols) - X_pool = _coerce_col_str(X_pool) + X_pool = prep_skl_df(X_pool, copy_df=True) X_idx = X_pool.loc[fh_idx] return X_idx diff --git a/sktime/forecasting/compose/_stack.py b/sktime/forecasting/compose/_stack.py index 8bcc8e163af..3171bd33a12 100644 --- a/sktime/forecasting/compose/_stack.py +++ b/sktime/forecasting/compose/_stack.py @@ -60,6 +60,7 @@ class StackingForecaster(_HeterogenousEnsembleForecaster): """ _tags = { + "authors": ["mloning", "fkiraly", "indinewton"], "ignores-exogeneous-X": False, "requires-fh-in-fit": True, "handles-missing-data": True, diff --git a/sktime/forecasting/compose/tests/test_fallback.py b/sktime/forecasting/compose/tests/test_fallback.py new file mode 100644 index 00000000000..61c8c7bce61 --- /dev/null +++ b/sktime/forecasting/compose/tests/test_fallback.py @@ -0,0 +1,531 @@ +#!/usr/bin/env python3 -u +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file). +"""Unit tests for the FallbackForecaster functionality. + +Tests cover the basic operations of the FallbackForecaster, ensuring proper +functionality of fitting, predicting, updating, and handling of errors in the +forecasting process. +""" + +__author__ = ["ninedigits"] + +import pandas as pd +import pytest + +from sktime.base import _HeterogenousMetaEstimator +from sktime.forecasting.base._base import BaseForecaster +from sktime.forecasting.compose import EnsembleForecaster, FallbackForecaster +from sktime.forecasting.naive import NaiveForecaster +from sktime.forecasting.trend import PolynomialTrendForecaster +from sktime.utils._testing.forecasting import make_forecasting_problem + + +class ForecastingError(Exception): + """Exception raised for errors in the forecasting process. + + Attributes + ---------- + message : str + Explanation of the error. + + Methods + ------- + __init__(message) + Constructs the ForecastingError with the provided message.""" + + def __init__(self, message): + self.message = message + super().__init__(message) + + +class DummyForecaster(_HeterogenousMetaEstimator, BaseForecaster): + """Dummy forecaster used for testing the FallbackForecaster. + + This forecaster is intentionally designed to fail at specified stages of the + forecasting process (fit, predict, or update) to test the robustness and fallback + mechanisms of the FallbackForecaster. + + Parameters + ---------- + raise_at : str + Stage at which the forecaster should fail. Options are "fit", "predict", + "update". + + Raises + ------ + AttributeError + If `raise_at` is not one of the valid options. + """ + + def __init__(self, raise_at="fit"): + super().__init__() + __valid__ = ["fit", "predict", "update"] + if raise_at not in __valid__: + raise AttributeError(f"`raise_at` must choose from {__valid__}") + self.forecaster = NaiveForecaster() + self.raise_at = raise_at + self._is_fitted = False + + def _fit(self, y, X=None, fh=None): + """Fit to training data. Optionally fail here.""" + if self.raise_at == "fit": + raise ForecastingError("Intentional failure in fit.") + self.forecaster.fit(y, X, fh) + self._is_fitted = True + return self + + def _predict(self, fh, X=None): + """Make predictions. Optionally fail here.""" + if not self._is_fitted: + raise ForecastingError("The forecaster is not fitted yet.") + if self.raise_at == "predict": + raise ForecastingError("Intentional failure in predict.") + return self.forecaster.predict(fh, X) + + def _update(self, y, X=None, update_params=True): + """Update the forecaster. Optionally fail here.""" + if self.raise_at == "update": + raise ForecastingError("Intentional failure in update.") + self.forecaster.update(y, X, update_params) + return self + + +def test_raises_at_fit(): + """Test dummy forecaster raises at fit""" + # Start with negative time series, Theta model will fail here + y = make_forecasting_problem(random_state=42) + forecaster = DummyForecaster(raise_at="fit") + with pytest.raises(ForecastingError): + forecaster.fit(y=y, fh=[1, 2, 3]) + + +def test_raises_at_predict(): + """Test dummy forecaster raises at predict""" + # Start with negative time series, Theta model will fail here + y = make_forecasting_problem(random_state=42) + forecaster = DummyForecaster(raise_at="predict") + forecaster.fit(y=y, fh=[1, 2, 3]) + with pytest.raises(ForecastingError): + forecaster.predict() + + +def test_raises_at_update(): + """Test dummy forecaster raises at update""" + # Start with negative time series, Theta model will fail here + y = make_forecasting_problem(random_state=42) + forecaster = DummyForecaster(raise_at="update") + forecaster.fit(y=y, fh=[1, 2, 3]) + forecaster.predict() + with pytest.raises(ForecastingError): + forecaster.update(y) + + +def test_fallbackforecaster_fails_at_fit(): + """Test FallbackForecaster fails at first fit, second forecaster succeeds""" + y = make_forecasting_problem(random_state=42) + forecaster1 = EnsembleForecaster( + [ + ("trend", PolynomialTrendForecaster()), + ("fail_fit", DummyForecaster(raise_at="fit")), + ] + ) + forecaster2 = EnsembleForecaster( + [ + ("trend", PolynomialTrendForecaster()), + ("naive", NaiveForecaster()), + ] + ) + forecaster = FallbackForecaster( + [ + ( + "ensemble1_fails_at_fit", + forecaster1, + ), + ( + "ensemble2", + forecaster2, + ), + ] + ) + forecaster.fit(y=y, fh=[1, 2, 3]) + y_pred_actual = forecaster.predict() + + forecaster2.fit(y=y, fh=[1, 2, 3]) + y_pred_expected = forecaster2.predict() + + # Assert that the first valid forecaster is trained + name = forecaster.current_name_ + assert name == "ensemble2" + + # Assert that the first valid forecaster produces the same results as it would + # on its own + pd.testing.assert_series_equal(y_pred_expected, y_pred_actual) + + # Count the number of exceptions raised + exceptions_raised = forecaster.exceptions_raised_ + assert len(exceptions_raised) == 1 + + +def test_fallbackforecaster_fails_at_predict(): + """Test FallbackForecaster fails at predict, second forecaster succeeds""" + y = make_forecasting_problem(random_state=42) + forecaster1 = EnsembleForecaster( + [ + ("trend", PolynomialTrendForecaster()), + ("raise_at_predict", DummyForecaster(raise_at="predict")), + ] + ) + forecaster2 = EnsembleForecaster( + [ + ("trend", PolynomialTrendForecaster()), + ("naive", NaiveForecaster()), + ] + ) + + forecaster = FallbackForecaster( + [ + ("forecaster1_fails_predict", forecaster1), + ("forecaster2_succeeded", forecaster2), + ] + ) + forecaster.fit(y=y, fh=[1, 2, 3]) + + # Assert predictions line up with the correct forecaster + y_pred_actual = forecaster.predict() + + forecaster2.fit(y=y, fh=[1, 2, 3]) + y_pred_expected = forecaster2.predict() + + # Assert correct forecaster name + name = forecaster.current_name_ + assert name == "forecaster2_succeeded" + + # Assert correct y_pred + pd.testing.assert_series_equal(y_pred_expected, y_pred_actual) + + +def test_fallbackforecaster_fails_twice(): + """First two FallbackForecasters fail, third succeeds""" + y = make_forecasting_problem(random_state=42) + forecaster1 = EnsembleForecaster( + [ + ("trend", PolynomialTrendForecaster()), + ("fails", DummyForecaster(raise_at="fit")), + ] + ) + forecaster2 = EnsembleForecaster( + [ + ("trend", PolynomialTrendForecaster()), + ("fails", DummyForecaster(raise_at="predict")), + ] + ) + forecaster3 = EnsembleForecaster( + [ + ("trend", PolynomialTrendForecaster()), + ("naive", NaiveForecaster()), + ] + ) + forecaster4 = PolynomialTrendForecaster() + + forecaster = FallbackForecaster( + [ + ("forecaster1_fails_fit", forecaster1), + ("forecaster2_fails_prd", forecaster2), + ("forecaster3_succeeded", forecaster3), + ("forecaster4_notcalled", forecaster4), + ] + ) + forecaster.fit(y=y, fh=[1, 2, 3]) + y_pred_actual = forecaster.predict() + + forecaster3.fit(y=y, fh=[1, 2, 3]) + y_pred_expected = forecaster3.predict() + + # Assert correct forecaster name + name = forecaster.current_name_ + assert name == "forecaster3_succeeded" + + # Assert correct y_pred + pd.testing.assert_series_equal(y_pred_expected, y_pred_actual) + + # Assert correct number of expected exceptions + exceptions_raised = forecaster.exceptions_raised_ + assert len(exceptions_raised) == 2 + + # Assert the correct forecasters failed + names_raised_actual = [ + vals["forecaster_name"] for vals in exceptions_raised.values() + ] + names_raised_expected = ["forecaster1_fails_fit", "forecaster2_fails_prd"] + assert names_raised_actual == names_raised_expected + + +def test_fallbackforecaster_fails_fit_twice(): + """First two FallbackForecasters fail at fit step, third forecaster succeeds""" + y = make_forecasting_problem(random_state=42) + forecaster1 = EnsembleForecaster( + [ + ("trend", PolynomialTrendForecaster()), + ("fails", DummyForecaster(raise_at="fit")), + ] + ) + forecaster2 = EnsembleForecaster( + [ + ("trend", PolynomialTrendForecaster()), + ("fails", DummyForecaster(raise_at="fit")), + ] + ) + forecaster3 = EnsembleForecaster( + [ + ("trend", PolynomialTrendForecaster()), + ("naive", NaiveForecaster()), + ] + ) + forecaster4 = PolynomialTrendForecaster() + + forecaster = FallbackForecaster( + [ + ("forecaster1_fails_fit", forecaster1), + ("forecaster2_fails_fit", forecaster2), + ("forecaster3_succeeded", forecaster3), + ("forecaster4_notcalled", forecaster4), + ] + ) + forecaster.fit(y=y, fh=[1, 2, 3]) + + # Assert predictions line up with the correct forecaster + y_pred_actual = forecaster.predict() + + forecaster3.fit(y=y, fh=[1, 2, 3]) + y_pred_expected = forecaster3.predict() + + # Assert correct forecaster name + name = forecaster.current_name_ + assert name == "forecaster3_succeeded" + + # Assert correct y_pred + pd.testing.assert_series_equal(y_pred_expected, y_pred_actual) + + # Assert correct number of expected exceptions + exceptions_raised = forecaster.exceptions_raised_ + assert len(exceptions_raised) == 2 + + # Assert the correct forecasters failed + names_raised_actual = [ + vals["forecaster_name"] for vals in exceptions_raised.values() + ] + names_raised_expected = ["forecaster1_fails_fit", "forecaster2_fails_fit"] + assert names_raised_actual == names_raised_expected + + +def test_all_forecasters_fail1(): + """All forecasters fail; predict and fit""" + # Start with negative time series, Theta model will fail here + y = make_forecasting_problem(random_state=42) + forecaster1 = ("raise_predict1", DummyForecaster(raise_at="predict")) + forecaster2 = ("raise_fit1", DummyForecaster(raise_at="fit")) + forecaster3 = ("raise_fit2", DummyForecaster(raise_at="fit")) + forecaster = FallbackForecaster([forecaster1, forecaster2, forecaster3]) + with pytest.raises(RuntimeError): + forecaster.fit(y=y, fh=[1, 2, 3]) + forecaster.predict() + + +def test_all_forecasters_fail2(): + """All forecasters fail at fit step""" + # Start with negative time series, Theta model will fail here + y = make_forecasting_problem(random_state=42) + forecaster1 = ("raise_fit1", DummyForecaster(raise_at="fit")) + forecaster2 = ("raise_fit2", DummyForecaster(raise_at="fit")) + forecaster3 = ("raise_fit3", DummyForecaster(raise_at="fit")) + forecaster = FallbackForecaster([forecaster1, forecaster2, forecaster3]) + with pytest.raises(RuntimeError): + forecaster.fit(y=y, fh=[1, 2, 3]) + + +def test_all_forecasters_fail3(): + """All forecasters fail at predict""" + # Start with negative time series, Theta model will fail here + y = make_forecasting_problem(random_state=42) + forecaster1 = ("raise_predict1", DummyForecaster(raise_at="predict")) + forecaster2 = ("raise_predict2", DummyForecaster(raise_at="predict")) + forecaster3 = ("raise_predict3", DummyForecaster(raise_at="predict")) + forecaster = FallbackForecaster([forecaster1, forecaster2, forecaster3]) + with pytest.raises(RuntimeError): + forecaster.fit(y=y, fh=[1, 2, 3]) + forecaster.predict() + + +def test_many_forecasters_fail1(): + """All forecasters fail at predict""" + # Start with negative time series, Theta model will fail here + y = make_forecasting_problem(random_state=42) + forecaster1 = ("raise_predict1", DummyForecaster(raise_at="predict")) + forecaster2 = ("raise_fit2", DummyForecaster(raise_at="fit")) + forecaster3 = ("raise_fit3", DummyForecaster(raise_at="fit")) + forecaster4 = ("raise_predict4", DummyForecaster(raise_at="predict")) + forecaster5 = ("forecaster5", PolynomialTrendForecaster()) + forecaster = FallbackForecaster( + [forecaster1, forecaster2, forecaster3, forecaster4, forecaster5] + ) + forecaster.fit(y, fh=[1, 2, 3]) + y_pred_actual = forecaster.predict() + y_name_actual = forecaster.current_name_ + expected_forecaster = forecaster5[1] + expected_forecaster.fit(y, fh=[1, 2, 3]) + y_pred_expected = expected_forecaster.predict() + y_name_expected = forecaster5[0] + pd.testing.assert_series_equal(y_pred_actual, y_pred_expected) + assert y_name_actual == y_name_expected + + # Assert correct forecaster name + name = forecaster.current_name_ + assert name == "forecaster5" + + # Assert correct y_pred + pd.testing.assert_series_equal(y_pred_expected, y_pred_actual) + + # Assert correct number of expected exceptions + exceptions_raised = forecaster.exceptions_raised_ + assert len(exceptions_raised) == 4 + + # Assert the correct forecasters failed + names_raised_actual = [ + vals["forecaster_name"] for vals in exceptions_raised.values() + ] + names_raised_expected = [ + "raise_predict1", + "raise_fit2", + "raise_fit3", + "raise_predict4", + ] + assert names_raised_actual == names_raised_expected + + +def test_fallbackforecaster_fails_twice_simple(): + """First two FallbackForecasters fail, third succeeds""" + y = make_forecasting_problem(random_state=42) + forecaster1 = DummyForecaster(raise_at="fit") + forecaster2 = DummyForecaster(raise_at="predict") + forecaster3 = PolynomialTrendForecaster() + forecaster4 = NaiveForecaster() + + forecaster = FallbackForecaster( + [ + ("forecaster1_fails_fit", forecaster1), + ("forecaster2_fails_prd", forecaster2), + ("forecaster3_succeeded", forecaster3), + ("forecaster4_notcalled", forecaster4), + ] + ) + forecaster.fit(y=y, fh=[1, 2, 3]) + y_pred_actual = forecaster.predict() + + forecaster3.fit(y=y, fh=[1, 2, 3]) + y_pred_expected = forecaster3.predict() + + # Assert correct forecaster name + name = forecaster.current_name_ + assert name == "forecaster3_succeeded" + + # Assert correct y_pred + pd.testing.assert_series_equal(y_pred_expected, y_pred_actual) + + # Assert correct number of expected exceptions + exceptions_raised = forecaster.exceptions_raised_ + assert len(exceptions_raised) == 2 + + # Assert the correct forecasters failed + names_raised_actual = [ + vals["forecaster_name"] for vals in exceptions_raised.values() + ] + names_raised_expected = ["forecaster1_fails_fit", "forecaster2_fails_prd"] + assert names_raised_actual == names_raised_expected + + +def test_fallbackforecaster_fails_many_simple(): + """First two FallbackForecasters fail, third succeeds""" + y = make_forecasting_problem(random_state=42) + forecaster1 = DummyForecaster(raise_at="predict") + forecaster2 = DummyForecaster(raise_at="predict") + forecaster3 = DummyForecaster(raise_at="fit") + forecaster4 = DummyForecaster(raise_at="fit") + forecaster5 = DummyForecaster(raise_at="fit") + forecaster6 = DummyForecaster(raise_at="predict") + forecaster7 = DummyForecaster(raise_at="fit") + forecaster8 = DummyForecaster(raise_at="predict") + forecaster9 = PolynomialTrendForecaster() + forecaster10 = NaiveForecaster() + + forecaster = FallbackForecaster( + [ + ("f1", forecaster1), + ("f2", forecaster2), + ("f3", forecaster3), + ("f4", forecaster4), + ("f5", forecaster5), + ("f6", forecaster6), + ("f7", forecaster7), + ("f8", forecaster8), + ("target", forecaster9), + ("notcalled", forecaster10), + ] + ) + forecaster.fit(y=y, fh=[1, 2, 3]) + y_pred_actual = forecaster.predict() + + forecaster9.fit(y=y, fh=[1, 2, 3]) + y_pred_expected = forecaster9.predict() + + # Assert correct forecaster name + name = forecaster.current_name_ + assert name == "target" + + # Assert correct y_pred + pd.testing.assert_series_equal(y_pred_expected, y_pred_actual) + + # Assert correct number of expected exceptions + exceptions_raised = forecaster.exceptions_raised_ + assert len(exceptions_raised) == 8 + + # Assert the correct forecasters failed + names_raised_actual = [ + vals["forecaster_name"] for vals in exceptions_raised.values() + ] + names_raised_expected = ["f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8"] + assert names_raised_actual == names_raised_expected + + +def test_fallbackforecaster_pred_int(): + """Predict interval works bc all forecasters have them enabled, first forecaster + expected + """ + y = make_forecasting_problem(random_state=42) + forecaster1 = NaiveForecaster("mean") + forecaster2 = NaiveForecaster("last") + forecaster = FallbackForecaster( + [("naive_mean", forecaster1), ("naive_last", forecaster2)] + ) + fh = [1, 2, 3] + forecaster.fit(y, fh=fh) + pred_int_actual = forecaster.predict_interval() + + forecaster1.fit(y, fh=fh) + pred_int_expected = forecaster1.predict_interval() + pd.testing.assert_frame_equal(pred_int_expected, pred_int_actual) + + +def test_fallbackforecaster_pred_int_raises(): + """Predict int raises because EnsembleForecaster does not have this capability""" + y = make_forecasting_problem(random_state=42) + forecaster1 = NaiveForecaster("mean") + forecaster2 = EnsembleForecaster( + [("naive_last", NaiveForecaster("last")), ("poly", PolynomialTrendForecaster())] + ) + forecaster = FallbackForecaster( + [("naive_mean", forecaster1), ("ensemble", forecaster2)] + ) + fh = [1, 2, 3] + forecaster.fit(y, fh=fh) + with pytest.raises(NotImplementedError): + forecaster.predict_interval() diff --git a/sktime/forecasting/compose/tests/test_hierarchy_ensemble.py b/sktime/forecasting/compose/tests/test_hierarchy_ensemble.py index b3a655fb439..e56194314e1 100644 --- a/sktime/forecasting/compose/tests/test_hierarchy_ensemble.py +++ b/sktime/forecasting/compose/tests/test_hierarchy_ensemble.py @@ -7,10 +7,11 @@ import numpy as np import pytest +from sktime.base._meta import flatten from sktime.datatypes._utilities import get_window from sktime.forecasting.compose import HierarchyEnsembleForecaster from sktime.forecasting.naive import NaiveForecaster -from sktime.forecasting.trend import PolynomialTrendForecaster +from sktime.forecasting.trend import PolynomialTrendForecaster, TrendForecaster from sktime.transformations.hierarchical.aggregate import Aggregator from sktime.utils._testing.hierarchical import _bottom_hier_datagen, _make_hierarchical from sktime.utils.validation._dependencies import _check_soft_dependencies @@ -145,3 +146,57 @@ def test_hierarchy_ensemble_exog(forecasters): estimator_instance.fit(y=y_train, X=X_train, fh=[1, 2, 3]) estimator_instance.predict(X=X_test) estimator_instance.update(y=y_test, X=X_test) + + +@pytest.mark.parametrize( + "forecasters", + [ + [ + ("trend", TrendForecaster(), ["l1_node01"]), + ("polytrend", PolynomialTrendForecaster(), ["l1_node02", "l1_node03"]), + ("naive", NaiveForecaster(), ["__total"]), + ], + [ + ( + "trend", + TrendForecaster(), + [("__total"), ("l1_node01"), ("l1_node02"), ("l1_node03")], + ), + ], + ], +) +@pytest.mark.parametrize("default", [NaiveForecaster(), None]) +def test_level_one_data(forecasters, default): + "Check for data with one level of hierarchy (excluding timepoints level)." + agg = Aggregator() + + y = _bottom_hier_datagen( + no_bottom_nodes=3, + no_levels=1, + random_seed=123, + ) + + forecaster = HierarchyEnsembleForecaster(forecasters, by="node", default=default) + + forecaster.fit(y, fh=[1, 2, 3]) + actual_pred = forecaster.predict() + + y = agg.fit_transform(y) + + for i in range(len(forecasters)): + test_frcstr = forecasters[i][1].clone() + df = y[y.index.droplevel(-1).isin(forecaster.fitted_list[i][1])] + test_frcstr.fit(df, fh=[1, 2, 3]) + test_pred = test_frcstr.predict() + msg = "Node predictions do not match" + assert np.all(actual_pred.loc[test_pred.index] == test_pred), msg + + _, _, nodes = zip(*forecasters) + nodes = set(flatten(nodes)) + if default is not None and len(nodes) != len(y.index.droplevel(-1).unique()): + def_frcstr = default + df = y[y.index.droplevel(-1).isin(forecaster.fitted_list[-1][1])] + def_frcstr.fit(df, fh=[1, 2, 3]) + def_pred = def_frcstr.predict() + msg = "Node default predictions do not match" + assert np.all(actual_pred.loc[def_pred.index] == def_pred), msg diff --git a/sktime/forecasting/compose/tests/test_ignorex.py b/sktime/forecasting/compose/tests/test_ignorex.py new file mode 100644 index 00000000000..814a4e9fa5a --- /dev/null +++ b/sktime/forecasting/compose/tests/test_ignorex.py @@ -0,0 +1,39 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Tests for IgnoreX.""" + +from unittest.mock import MagicMock + +import pytest + +from sktime.datasets import load_longley +from sktime.forecasting.compose import IgnoreX +from sktime.tests.test_switch import run_test_for_class + + +@pytest.mark.skipif( + not run_test_for_class(IgnoreX), + reason="run test only if softdeps are present and incrementally (if requested)", +) +@pytest.mark.parametrize("ignore_x", [True, False, None]) +def test_ignoreX(ignore_x): + """Test that indeed X=None is passed iff the args of IgnoreX claim to do so.""" + y, X = load_longley() + + fcst = MagicMock() + + if ignore_x is None: + igx = IgnoreX(forecaster=fcst) + else: + igx = IgnoreX(forecaster=fcst, ignore_x=ignore_x) + + igx.fit(y, fh=[1, 2, 3], X=X) + + mock_fitted = igx.forecaster_.fit + call_args_list = mock_fitted.call_args_list + all_calls_X_none = all(call_args[1]["X"] is None for call_args in call_args_list) + any_calls_X_none = any(call_args[1]["X"] is None for call_args in call_args_list) + + if ignore_x in [True, None]: + assert all_calls_X_none + else: + assert not any_calls_X_none diff --git a/sktime/forecasting/compose/tests/test_pipeline.py b/sktime/forecasting/compose/tests/test_pipeline.py index 334c445d7a3..850943f61f2 100644 --- a/sktime/forecasting/compose/tests/test_pipeline.py +++ b/sktime/forecasting/compose/tests/test_pipeline.py @@ -373,7 +373,7 @@ def test_tag_handles_missing_data(): forecaster.set_tags(**{"handles-missing-data": False}) y = _make_series() - y[10] = np.nan + y.iloc[10] = np.nan # test only TransformedTargetForecaster y_pipe = TransformedTargetForecaster( @@ -485,7 +485,7 @@ def test_forecastx_logic(): ) def test_forecastx_fit_behavior(): from sktime.forecasting.compose import ForecastX - from sktime.forecasting.model_selection import temporal_train_test_split + from sktime.split import temporal_train_test_split y, X = load_longley() y_train, y_test, X_train, X_test = temporal_train_test_split(y, X) @@ -705,3 +705,70 @@ def test_featurizer_forecastingpipeline_logic(): forecaster.fit(y_train, X=X_train, fh=[1]) # try to forecast next year forecaster.predict(X=X_test) # dummy X to predict next year + + +def test_exogenousx_ignore_tag_set(): + """Tests that TransformedTargetForecaster sets X tag for feature selection. + + If the forecaster ignores X, but the feature selector does not, then the + ignores-exogeneous-X tag should be correctly set to False, not True. + + This is the failure case in bug report #5518. + + More generally, the tag should be set to True iff all steps in the pipeline + ignore X. + """ + from sktime.forecasting.compose import YfromX + from sktime.transformations.series.feature_selection import FeatureSelection + + fcst_does_not_ignore_x = YfromX.create_test_instance() + fcst_ignores_x = NaiveForecaster() + + trafo_ignores_x = ExponentTransformer() + trafo_does_not_ignore_x = FeatureSelection() + + # check that ignores-exogeneous-X tag is set correctly + pipe1 = trafo_ignores_x * fcst_does_not_ignore_x + pipe2 = trafo_ignores_x * fcst_ignores_x + pipe3 = trafo_does_not_ignore_x * fcst_does_not_ignore_x + pipe4 = trafo_does_not_ignore_x * fcst_ignores_x + pipe5 = trafo_ignores_x * trafo_does_not_ignore_x * fcst_does_not_ignore_x + pipe6 = trafo_ignores_x * trafo_does_not_ignore_x * fcst_ignores_x + pipe7 = trafo_ignores_x * trafo_ignores_x * fcst_does_not_ignore_x + pipe8 = trafo_ignores_x * fcst_ignores_x * trafo_does_not_ignore_x + pipe9 = trafo_does_not_ignore_x * fcst_ignores_x * trafo_ignores_x + pipe10 = trafo_ignores_x * fcst_ignores_x * trafo_ignores_x + + assert not pipe1.get_tag("ignores-exogeneous-X") + assert pipe2.get_tag("ignores-exogeneous-X") + assert not pipe3.get_tag("ignores-exogeneous-X") + assert not pipe4.get_tag("ignores-exogeneous-X") + assert not pipe5.get_tag("ignores-exogeneous-X") + assert not pipe6.get_tag("ignores-exogeneous-X") + assert not pipe7.get_tag("ignores-exogeneous-X") + assert not pipe8.get_tag("ignores-exogeneous-X") + assert not pipe9.get_tag("ignores-exogeneous-X") + assert pipe10.get_tag("ignores-exogeneous-X") + + +@pytest.mark.skipif( + not _check_soft_dependencies("pmdarima", severity="none"), + reason="skip test if required soft dependency is not available", +) +def test_pipeline_exogenous_none(): + """Test ForecastingPipeline works with a transformer returning None.""" + from sktime.transformations.series.feature_selection import FeatureSelection + + y, X = load_longley() + y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, test_size=3) + + pipe = ForecastingPipeline( + [ + ("select_X", FeatureSelection(method="none")), + ("arima", ARIMA()), + ] + ) + + pipe.fit(y_train, X_train, fh=[1, 2, 3]) + y_pred = pipe.predict(X=X_test) + assert np.all(y_pred.index == y_test.index) diff --git a/sktime/forecasting/compose/tests/test_reduce.py b/sktime/forecasting/compose/tests/test_reduce.py index 13758a2e787..26b980e4ff0 100644 --- a/sktime/forecasting/compose/tests/test_reduce.py +++ b/sktime/forecasting/compose/tests/test_reduce.py @@ -588,3 +588,28 @@ def test_direct_vs_recursive(): assert pred_dir_max.head(1).equals(pred_rec_max.head(1)) assert pred_dir_max.head(1).equals(pred_rec_spec.head(1)) assert not pred_dir_max.head(1).equals(pred_dir_spec.head(1)) + + +def test_recursive_reducer_X_not_fit_to_fh(): + """Test recursive reducer with X that do not fit the fh. + + I.e., either X is longer or smaller than max_fh + """ + y = load_airline() + y_train, y_test = temporal_train_test_split(y) + X_train = y_train + X_test = y_test + + forecaster = make_reduction( + LinearRegression(), window_length=2, strategy="recursive" + ) + forecaster.fit(y_train, X_train) + + pred1 = forecaster.predict(X=X_test[:1], fh=[1, 2, 3]) + assert pred1.shape == (3,) + pred2 = forecaster.predict(X=X_test[:2], fh=[1, 2, 3]) + assert pred2.shape == (3,) + pred3 = forecaster.predict(X=X_test[:3], fh=[1, 2, 3]) + assert pred3.shape == (3,) + pred4 = forecaster.predict(X=X_test, fh=[1]) + assert pred4.shape == (1,) diff --git a/sktime/forecasting/compose/tests/test_reduce_global.py b/sktime/forecasting/compose/tests/test_reduce_global.py index 28951c12504..e2a78d278e7 100644 --- a/sktime/forecasting/compose/tests/test_reduce_global.py +++ b/sktime/forecasting/compose/tests/test_reduce_global.py @@ -21,7 +21,7 @@ from sklearn.linear_model import LinearRegression from sklearn.pipeline import make_pipeline -from sktime.datasets import load_airline +from sktime.datasets import load_airline, load_solar from sktime.datatypes import get_examples from sktime.forecasting.base import ForecastingHorizon from sktime.forecasting.compose import make_reduction @@ -106,6 +106,7 @@ def y_dict(): # Create integer index data y_numeric = y_train.copy() y_numeric.index = pd.to_numeric(y_numeric.index) + y_numeric.index.names = [None] # setting None to cover "no index name" case y_dict["y_numeric"] = y_numeric return y_dict @@ -139,7 +140,7 @@ def check_eval(test_input, expected): ), ( "y_train", - [None], + ["Period"], ), ( "y_numeric", @@ -186,7 +187,7 @@ def test_recursive_reduction(y, index_names, y_dict): ), ( "y_train", - [None], + ["Period"], ), ( "y_numeric", @@ -232,7 +233,7 @@ def test_direct_reduction(y, index_names, y_dict): ), ( "y_train", - [None], + ["Period"], ), ( "y_numeric", @@ -358,3 +359,40 @@ def test_nofreq_pass(): np.testing.assert_almost_equal( y_pred_global["c0"].values, y_pred_nofreq["c0"].values ) + + +def test_timezoneaware_index(): + y = load_solar(api_version=None) + y_notz = y.copy().tz_localize(None) + + assert y.index.tz is not None + assert y_notz.index.tz is None + + window_trafo = WindowSummarizer(n_jobs=1, **{"lag_feature": {"lag": [1, 2, 48]}}) + regressor = LinearRegression() + forecaster = make_reduction( + estimator=regressor, + strategy="recursive", + transformers=[window_trafo], + window_length=None, + pooling="global", + ) + + # check coefficients + tzaware = forecaster.clone().fit(y) + tznaive = forecaster.clone().fit(y_notz) + tzaware_coef = tzaware.get_fitted_params()["estimator__coef"] + tznaive_coef = tznaive.get_fitted_params()["estimator__coef"] + + np.testing.assert_almost_equal(tzaware_coef, tznaive_coef) + + fh = np.arange(1, 97) + pred_tzaware = tzaware.predict(fh=fh) + pred_tznaive = tznaive.predict(fh=fh) + + msg = "Time-zone of predictions not consistent with training data." + assert pred_tzaware.index.tz == y.index.tz, msg + assert pred_tznaive.index.tz == y_notz.index.tz, msg + + # These should give us identical predictions + np.testing.assert_almost_equal(pred_tzaware.values, pred_tznaive.values) diff --git a/sktime/forecasting/conformal.py b/sktime/forecasting/conformal.py index 175e7543711..dd45961d47a 100644 --- a/sktime/forecasting/conformal.py +++ b/sktime/forecasting/conformal.py @@ -119,6 +119,11 @@ class ConformalIntervals(BaseForecaster): """ _tags = { + # packaging info + # -------------- + "authors": ["fkiraly", "bethrice44"], + # estimator type + # -------------- "scitype:y": "univariate", "requires-fh-in-fit": False, "handles-missing-data": False, diff --git a/sktime/forecasting/croston.py b/sktime/forecasting/croston.py index 9c2186c782f..59fc284d0cc 100644 --- a/sktime/forecasting/croston.py +++ b/sktime/forecasting/croston.py @@ -70,6 +70,12 @@ class Croston(BaseForecaster): """ _tags = { + # packaging info + # -------------- + "authors": "Riyabelle25", + "maintainers": "Riyabelle25", + # estimator type + # -------------- "requires-fh-in-fit": False, # is forecasting horizon already required in fit? } @@ -171,6 +177,7 @@ def get_test_params(cls, parameter_set="default"): {}, {"smoothing": 0}, {"smoothing": 0.42}, + {"smoothing": 2}, ] return params diff --git a/sktime/forecasting/dummy.py b/sktime/forecasting/dummy.py index ddf9433ed4a..7f72a1cd233 100644 --- a/sktime/forecasting/dummy.py +++ b/sktime/forecasting/dummy.py @@ -56,6 +56,11 @@ class ForecastKnownValues(BaseForecaster): """ _tags = { + # packaging info + # -------------- + "authors": ["fkiraly"], + # estimator type + # -------------- "y_inner_mtype": "pd.DataFrame", "X_inner_mtype": "pd.DataFrame", "scitype:y": "both", diff --git a/sktime/forecasting/dynamic_factor.py b/sktime/forecasting/dynamic_factor.py index 37a00bc1741..4c5e476d160 100644 --- a/sktime/forecasting/dynamic_factor.py +++ b/sktime/forecasting/dynamic_factor.py @@ -121,6 +121,13 @@ class DynamicFactor(_StatsModelsAdapter): """ _tags = { + # packaging info + # -------------- + "authors": ["Ris-Bali", "lbventura"], + "maintainers": ["Ris-Bali", "lbventura"], + # python_dependencies: "statsmodels" - inherited from _StatsModelsAdapter + # estimator type + # -------------- "scitype:y": "multivariate", "ignores-exogeneous-X": False, "handles-missing-data": True, diff --git a/sktime/forecasting/ets.py b/sktime/forecasting/ets.py index 0156dd5f25f..25b13d09e27 100644 --- a/sktime/forecasting/ets.py +++ b/sktime/forecasting/ets.py @@ -2,7 +2,6 @@ # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) """Implements automatic and manually exponential time series smoothing models.""" - __author__ = ["hyang1996"] __all__ = ["AutoETS"] @@ -175,6 +174,13 @@ class AutoETS(_StatsModelsAdapter): _fitted_param_names = ("aic", "aicc", "bic", "hqic") _tags = { + # packaging info + # -------------- + "authors": ["hyang1996"], + "maintainers": ["hyang1996"], + # "python_dependencies": "statmodels" - inherited from _StatsModelsAdapter + # estimator type + # -------------- "ignores-exogeneous-X": True, "capability:pred_int": True, "capability:pred_int:insample": True, diff --git a/sktime/forecasting/exp_smoothing.py b/sktime/forecasting/exp_smoothing.py index 6535dac524e..2ee8e5a65a9 100644 --- a/sktime/forecasting/exp_smoothing.py +++ b/sktime/forecasting/exp_smoothing.py @@ -110,6 +110,14 @@ class ExponentialSmoothing(_StatsModelsAdapter): >>> y_pred = forecaster.predict(fh=[1,2,3]) # doctest: +SKIP """ + _tags = { + # packaging info + # -------------- + "authors": ["mloning", "big-o"], + # "python_dependencies": "statsmodels" - inherited from _StatsModelsAdapter + # estimator type tags inherited from _StatsModelsAdapter + } + _fitted_param_names = ( "initial_level", "initial_slope", @@ -234,7 +242,7 @@ def get_test_params(cls, parameter_set="default"): "damped_trend": False, "seasonal": "add", "sp": 2, - "use_boxcox": True, + "use_boxcox": False, "initialization_method": "estimated", "smoothing_level": 0.3, "smoothing_trend": 0.5, diff --git a/sktime/forecasting/fbprophet.py b/sktime/forecasting/fbprophet.py index e814a8b164f..06cc496b384 100644 --- a/sktime/forecasting/fbprophet.py +++ b/sktime/forecasting/fbprophet.py @@ -2,7 +2,7 @@ # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) """Implements Prophet forecaster by wrapping fbprophet.""" -__author__ = ["aiwalter"] +__author__ = ["mloning", "aiwalter", "fkiraly"] __all__ = ["Prophet"] @@ -46,7 +46,7 @@ class Prophet(_ProphetAdapter): trend. If 'logistic' specified float for 'growth_cap' must be provided. growth_floor: float, default=0 Growth saturation minimum value. - Used only if `growth="logistic"`, has no effect otherwise + Used only if `growth="logistic"`, has no effect otherwise (if `growth` is not `"logistic"`). growth_cap: float, default=None Growth saturation maximum aka carrying capacity. @@ -111,6 +111,10 @@ class Prophet(_ProphetAdapter): stan_backend: str or None, default=None str as defined in StanBackendEnum. If None, will try to iterate over all available backends and find the working one. + fit_kwargs: dict or None, default=None + Dict with args for Prophet.fit(). + These are additional arguments passed to the optimizing or sampling + functions in Stan. References ---------- @@ -158,6 +162,7 @@ def __init__( uncertainty_samples=1000, stan_backend=None, verbose=0, + fit_kwargs=None, ): self.freq = freq self.add_seasonality = add_seasonality @@ -182,6 +187,7 @@ def __init__( self.uncertainty_samples = uncertainty_samples self.stan_backend = stan_backend self.verbose = verbose + self.fit_kwargs = fit_kwargs super().__init__() @@ -233,5 +239,6 @@ def get_test_params(cls, parameter_set="default"): "daily_seasonality": False, "uncertainty_samples": 10, "verbose": False, + "fit_kwargs": {"seed": 12345}, } return params diff --git a/sktime/forecasting/ltsf.py b/sktime/forecasting/ltsf.py new file mode 100644 index 00000000000..6a8248496fb --- /dev/null +++ b/sktime/forecasting/ltsf.py @@ -0,0 +1,501 @@ +"""Deep Learning Forecasters using LTSF-Linear Models.""" + +from sktime.forecasting.base.adapters._pytorch import BaseDeepNetworkPyTorch + + +class LTSFLinearForecaster(BaseDeepNetworkPyTorch): + """LTSF-Linear Forecaster. + + Implementation of the Long-Term Short-Term Feature (LTSF) linear forecaster, + aka LTSF-Linear, by Zeng et al [1]_. + + Core logic is directly copied from the cure-lab LTSF-Linear implementation [2]_, + which is unfortunately not available as a package. + + Parameters + ---------- + seq_len : int + length of input sequence + pred_len : int + length of prediction (forecast horizon) + num_epochs : int, default=16 + number of epochs to train + batch_size : int, default=8 + number of training examples per batch + in_channels : int, default=1 + number of input channels passed to network + individual : bool, default=False + boolean flag that controls whether the network treats each channel individually" + "or applies a single linear layer across all channels. If individual=True, the" + "a separate linear layer is created for each input channel. If" + "individual=False, a single shared linear layer is used for all channels." + criterion : torch.nn Loss Function, default=torch.nn.MSELoss + loss function to be used for training + criterion_kwargs : dict, default=None + keyword arguments to pass to criterion + optimizer : torch.optim.Optimizer, default=torch.optim.Adam + optimizer to be used for training + optimizer_kwargs : dict, default=None + keyword arguments to pass to optimizer + lr : float, default=0.003 + learning rate to train model with + + References + ---------- + .. [1] Zeng A, Chen M, Zhang L, Xu Q. 2023. + Are transformers effective for time series forecasting? + Proceedings of the AAAI conference on artificial intelligence 2023 + (Vol. 37, No. 9, pp. 11121-11128). + .. [2] https://github.com/cure-lab/LTSF-Linear + + Examples + -------- + >>> from sktime.forecasting.ltsf import LTSFLinearForecaster # doctest: +SKIP + >>> from sktime.datasets import load_airline + >>> model = LTSFLinearForecaster(10, 3) # doctest: +SKIP + >>> y = load_airline() + >>> model.fit(y, fh=[1,2,3]) # doctest: +SKIP + LTSFLinearForecaster(pred_len=3, seq_len=10) + >>> y_pred = model.predict() # doctest: +SKIP + >>> y_pred # doctest: +SKIP + 1961-01 515.456726 + 1961-02 576.704712 + 1961-03 559.859680 + Freq: M, Name: Number of airline passengers, dtype: float32 + """ + + _tags = { + # packaging info + # -------------- + "authors": ["luca-miniati"], + "maintainers": ["luca-miniati"], + # "python_dependencis": "pytorch" - inherited from BaseDeepNetworkPyTorch + # estimator type vars inherited from BaseDeepNetworkPyTorch + } + + def __init__( + self, + seq_len, + pred_len, + *, + num_epochs=16, + batch_size=8, + in_channels=1, + individual=False, + criterion=None, + criterion_kwargs=None, + optimizer=None, + optimizer_kwargs=None, + lr=0.001, + custom_dataset_train=None, + custom_dataset_pred=None, + ): + self.seq_len = seq_len + self.pred_len = pred_len + self.individual = individual + self.in_channels = in_channels + self.criterion = criterion + self.optimizer = optimizer + self.criterion_kwargs = criterion_kwargs + self.optimizer_kwargs = optimizer_kwargs + self.lr = lr + self.num_epochs = num_epochs + self.custom_dataset_train = custom_dataset_train + self.custom_dataset_pred = custom_dataset_pred + self.batch_size = batch_size + + super().__init__( + num_epochs=num_epochs, + batch_size=batch_size, + in_channels=in_channels, + individual=individual, + criterion_kwargs=criterion_kwargs, + optimizer=optimizer, + optimizer_kwargs=optimizer_kwargs, + lr=lr, + ) + + from sktime.utils.validation._dependencies import _check_soft_dependencies + + if _check_soft_dependencies("torch"): + import torch + + self.criterions = { + "MSE": torch.nn.MSELoss, + "L1": torch.nn.L1Loss, + "SmoothL1": torch.nn.SmoothL1Loss, + "Huber": torch.nn.HuberLoss, + } + + self.optimizers = { + "Adadelta": torch.optim.Adadelta, + "Adagrad": torch.optim.Adagrad, + "Adam": torch.optim.Adam, + "AdamW": torch.optim.AdamW, + "SGD": torch.optim.SGD, + } + + def _build_network(self, fh): + from sktime.networks.ltsf._ltsf import LTSFLinearNetwork + + return LTSFLinearNetwork( + self.seq_len, + fh, + self.in_channels, + self.individual, + )._build() + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + + Returns + ------- + params : dict or list of dict + """ + params = [ + { + "seq_len": 2, + "pred_len": 1, + "lr": 0.005, + "optimizer": "Adam", + "batch_size": 1, + "num_epochs": 1, + "individual": True, + } + ] + + return params + + +class LTSFDLinearForecaster(BaseDeepNetworkPyTorch): + """LTSF-DLinear Forecaster. + + Implementation of the Long-Term Short-Term Feature (LTSF) decomposition linear + forecaster, aka LTSF-DLinear, by Zeng et al [1]_. + + Core logic is directly copied from the cure-lab LTSF-Linear implementation [2]_, + which is unfortunately not available as a package. + + Parameters + ---------- + seq_len : int + length of input sequence + pred_len : int + length of prediction (forecast horizon) + num_epochs : int, default=16 + number of epochs to train + batch_size : int, default=8 + number of training examples per batch + in_channels : int, default=1 + number of input channels passed to network + individual : bool, default=False + boolean flag that controls whether the network treats each channel individually" + "or applies a single linear layer across all channels. If individual=True, the" + "a separate linear layer is created for each input channel. If" + "individual=False, a single shared linear layer is used for all channels." + criterion : torch.nn Loss Function, default=torch.nn.MSELoss + loss function to be used for training + criterion_kwargs : dict, default=None + keyword arguments to pass to criterion + optimizer : torch.optim.Optimizer, default=torch.optim.Adam + optimizer to be used for training + optimizer_kwargs : dict, default=None + keyword arguments to pass to optimizer + lr : float, default=0.003 + learning rate to train model with + + References + ---------- + .. [1] Zeng A, Chen M, Zhang L, Xu Q. 2023. + Are transformers effective for time series forecasting? + Proceedings of the AAAI conference on artificial intelligence 2023 + (Vol. 37, No. 9, pp. 11121-11128). + .. [2] https://github.com/cure-lab/LTSF-Linear + + Examples + -------- + >>> from sktime.forecasting.ltsf import LTSFDLinearForecaster # doctest: +SKIP + >>> from sktime.datasets import load_airline + >>> model = LTSFDLinearForecaster(10, 3) # doctest: +SKIP + >>> y = load_airline() + >>> model.fit(y, fh=[1,2,3]) # doctest: +SKIP + LTSFDLinearForecaster(pred_len=3, seq_len=10) + >>> y_pred = model.predict() # doctest: +SKIP + >>> y_pred # doctest: +SKIP + 1961-01 436.494476 + 1961-02 433.659851 + 1961-03 479.309631 + Freq: M, Name: Number of airline passengers, dtype: float32 + """ + + def __init__( + self, + seq_len, + pred_len, + *, + num_epochs=16, + batch_size=8, + in_channels=1, + individual=False, + criterion=None, + criterion_kwargs=None, + optimizer=None, + optimizer_kwargs=None, + lr=0.001, + custom_dataset_train=None, + custom_dataset_pred=None, + ): + self.seq_len = seq_len + self.pred_len = pred_len + self.individual = individual + self.in_channels = in_channels + self.criterion = criterion + self.optimizer = optimizer + self.criterion_kwargs = criterion_kwargs + self.optimizer_kwargs = optimizer_kwargs + self.lr = lr + self.num_epochs = num_epochs + self.custom_dataset_train = custom_dataset_train + self.custom_dataset_pred = custom_dataset_pred + self.batch_size = batch_size + + super().__init__( + num_epochs=num_epochs, + batch_size=batch_size, + in_channels=in_channels, + individual=individual, + criterion_kwargs=criterion_kwargs, + optimizer=optimizer, + optimizer_kwargs=optimizer_kwargs, + lr=lr, + ) + + from sktime.utils.validation._dependencies import _check_soft_dependencies + + if _check_soft_dependencies("torch"): + import torch + + self.criterions = { + "MSE": torch.nn.MSELoss, + "L1": torch.nn.L1Loss, + "SmoothL1": torch.nn.SmoothL1Loss, + "Huber": torch.nn.HuberLoss, + } + + self.optimizers = { + "Adadelta": torch.optim.Adadelta, + "Adagrad": torch.optim.Adagrad, + "Adam": torch.optim.Adam, + "AdamW": torch.optim.AdamW, + "SGD": torch.optim.SGD, + } + + def _build_network(self, fh): + from sktime.networks.ltsf._ltsf import LTSFDLinearNetwork + + return LTSFDLinearNetwork( + self.seq_len, + fh, + self.in_channels, + self.individual, + )._build() + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + + Returns + ------- + params : dict or list of dict + """ + params = [ + { + "seq_len": 2, + "pred_len": 1, + "lr": 0.005, + "optimizer": "Adam", + "batch_size": 1, + "num_epochs": 1, + "individual": True, + } + ] + + return params + + +class LTSFNLinearForecaster(BaseDeepNetworkPyTorch): + """LTSF-NLinear Forecaster. + + Implementation of the Long-Term Short-Term Feature (LTSF) normalization linear + forecaster, aka LTSF-NLinear, by Zeng et al [1]_. + + Core logic is directly copied from the cure-lab LTSF-Linear implementation [2]_, + which is unfortunately not available as a package. + + Parameters + ---------- + seq_len : int + length of input sequence + pred_len : int + length of prediction (forecast horizon) + num_epochs : int, default=16 + number of epochs to train + batch_size : int, default=8 + number of training examples per batch + in_channels : int, default=1 + number of input channels passed to network + individual : bool, default=False + boolean flag that controls whether the network treats each channel individually" + "or applies a single linear layer across all channels. If individual=True, the" + "a separate linear layer is created for each input channel. If" + "individual=False, a single shared linear layer is used for all channels." + criterion : torch.nn Loss Function, default=torch.nn.MSELoss + loss function to be used for training + criterion_kwargs : dict, default=None + keyword arguments to pass to criterion + optimizer : torch.optim.Optimizer, default=torch.optim.Adam + optimizer to be used for training + optimizer_kwargs : dict, default=None + keyword arguments to pass to optimizer + lr : float, default=0.003 + learning rate to train model with + + References + ---------- + .. [1] Zeng A, Chen M, Zhang L, Xu Q. 2023. + Are transformers effective for time series forecasting? + Proceedings of the AAAI conference on artificial intelligence 2023 + (Vol. 37, No. 9, pp. 11121-11128). + .. [2] https://github.com/cure-lab/LTSF-Linear + + Examples + -------- + >>> from sktime.forecasting.ltsf import LTSFNLinearForecaster # doctest: +SKIP + >>> from sktime.datasets import load_airline + >>> model = LTSFNLinearForecaster(10, 3) # doctest: +SKIP + >>> y = load_airline() + >>> model.fit(y, fh=[1,2,3]) # doctest: +SKIP + LTSFNLinearForecaster(pred_len=3, seq_len=10) + >>> y_pred = model.predict() # doctest: +SKIP + >>> y_pred # doctest: +SKIP + 1961-01 455.628082 + 1961-02 433.349640 + 1961-03 437.045502 + Freq: M, Name: Number of airline passengers, dtype: float32 + """ + + def __init__( + self, + seq_len, + pred_len, + *, + num_epochs=16, + batch_size=8, + in_channels=1, + individual=False, + criterion=None, + criterion_kwargs=None, + optimizer=None, + optimizer_kwargs=None, + lr=0.001, + custom_dataset_train=None, + custom_dataset_pred=None, + ): + self.seq_len = seq_len + self.pred_len = pred_len + self.individual = individual + self.in_channels = in_channels + self.criterion = criterion + self.optimizer = optimizer + self.criterion_kwargs = criterion_kwargs + self.optimizer_kwargs = optimizer_kwargs + self.lr = lr + self.num_epochs = num_epochs + self.custom_dataset_train = custom_dataset_train + self.custom_dataset_pred = custom_dataset_pred + self.batch_size = batch_size + + super().__init__( + num_epochs=num_epochs, + batch_size=batch_size, + in_channels=in_channels, + individual=individual, + criterion_kwargs=criterion_kwargs, + optimizer=optimizer, + optimizer_kwargs=optimizer_kwargs, + lr=lr, + ) + + from sktime.utils.validation._dependencies import _check_soft_dependencies + + if _check_soft_dependencies("torch"): + import torch + + self.criterions = { + "MSE": torch.nn.MSELoss, + "L1": torch.nn.L1Loss, + "SmoothL1": torch.nn.SmoothL1Loss, + "Huber": torch.nn.HuberLoss, + } + + self.optimizers = { + "Adadelta": torch.optim.Adadelta, + "Adagrad": torch.optim.Adagrad, + "Adam": torch.optim.Adam, + "AdamW": torch.optim.AdamW, + "SGD": torch.optim.SGD, + } + + def _build_network(self, fh): + from sktime.networks.ltsf._ltsf import LTSFNLinearNetwork + + return LTSFNLinearNetwork( + self.seq_len, + fh, + self.in_channels, + self.individual, + )._build() + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + + Returns + ------- + params : dict or list of dict + """ + params = [ + { + "seq_len": 2, + "pred_len": 1, + "lr": 0.005, + "optimizer": "Adam", + "batch_size": 1, + "num_epochs": 1, + "individual": True, + } + ] + + return params diff --git a/sktime/forecasting/model_evaluation/_functions.py b/sktime/forecasting/model_evaluation/_functions.py index 155caabd8b2..9937a9030eb 100644 --- a/sktime/forecasting/model_evaluation/_functions.py +++ b/sktime/forecasting/model_evaluation/_functions.py @@ -318,8 +318,6 @@ def _evaluate_window(x, meta): return result -# todo 0.25.0: remove compute argument and docstring -# todo 0.25.0: remove kwargs and docstring def evaluate( forecaster, cv, @@ -330,10 +328,8 @@ def evaluate( return_data: bool = False, error_score: Union[str, int, float] = np.nan, backend: Optional[str] = None, - compute: bool = None, cv_X=None, backend_params: Optional[dict] = None, - **kwargs, ): r"""Evaluate forecaster using timeseries cross-validation. @@ -348,13 +344,13 @@ def evaluate( the train/test folds produced by the generator ``cv_X.split_series(X)`` (if ``X`` is ``None``, consider these to be ``None`` as well). - 1. Set ``i = 1`` + 1. Initialize the counter to ``i = 1`` 2. Fit the ``forecaster`` to :math:`y_{train, 1}`, :math:`X_{train, 1}`, - with a ``fh`` to forecast :math:`y_{test, 1}` - 3. The ``forecaster`` predict with exogeneous data :math:`X_{test, i}` - ``y_pred = forecaster.predict`` (or ``predict_proba`` or ``predict_quantiles``, - depending on ``scoring``) - 4. Compute ``scoring`` on ``y_pred`` versus :math:`y_{test, 1}` + with ``fh`` set to the absolute indices of :math:`y_{test, 1}`. + 3. Use the ``forecaster`` to make a prediction ``y_pred`` with the exogeneous + data :math:`X_{test, i}`. Predictions are made using either ``predict``, + ``predict_proba`` or ``predict_quantiles``, depending on ``scoring``. + 4. Compute the ``scoring`` function on ``y_pred`` versus :math:`y_{test, i}` 5. If ``i == K``, terminate, otherwise 6. Set ``i = i + 1`` 7. Ingest more data :math:`y_{train, i}`, :math:`X_{train, i}`, @@ -413,6 +409,7 @@ def evaluate( - "None": executes loop sequentally, simple list comprehension - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` - "dask": uses ``dask``, requires ``dask`` package in environment - "dask_lazy": same as "dask", but changes the return to (lazy) ``dask.dataframe.DataFrame``. @@ -421,9 +418,6 @@ def evaluate( "threading" is unlikely to see speed ups due to the GIL and the serialization backend (``cloudpickle``) for "dask" and "loky" is generally more robust than the standard ``pickle`` library used in "multiprocessing". - compute : bool, default=True, deprecated and will be removed in 0.25.0. - If backend="dask", whether returned DataFrame is computed. - If set to True, returns `pd.DataFrame`, otherwise `dask.dataframe.DataFrame`. cv_X : sktime BaseSplitter descendant, optional determines split of ``X`` into test and train folds default is ``X`` being split to identical ``loc`` indices as ``y`` @@ -435,12 +429,18 @@ def evaluate( Valid keys depend on the value of ``backend``: - "None": no additional parameters, ``backend_params`` is ignored - - "loky", "multiprocessing" and "threading": - any valid keys for ``joblib.Parallel`` can be passed here, - e.g., ``n_jobs``, with the exception of ``backend`` - which is directly controlled by ``backend`` + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. - "dask": any valid keys for ``dask.compute`` can be passed, - e.g., ``scheduler`` + e.g., ``scheduler`` Returns ------- @@ -518,40 +518,8 @@ def evaluate( "installed, but dask is not present in the python environment" ) - # todo 0.25.0: remove kwargs and this warning - if kwargs != {}: - warnings.warn( - "in evaluate, kwargs will no longer be supported from sktime 0.25.0. " - "to pass configuration arguments to the parallelization backend, " - "use backend_params instead. " - f"The following kwargs were found: {kwargs.keys()}, pass these as " - "dict elements to backend_params instead.", - DeprecationWarning, - stacklevel=2, - ) - - # todo 0.25.0: remove compute argument and logic, and remove this warning - if compute is not None: - warnings.warn( - "the compute argument of evaluate is deprecated and will be removed " - "in sktime 0.25.0. For the same behaviour in the future, " - 'use backend="dask_lazy"', - DeprecationWarning, - stacklevel=2, - ) - if compute is None: - compute = True - if backend == "dask" and not compute: - backend = "dask_lazy" - _check_strategy(strategy) cv = check_cv(cv, enforce_start_with_window=True) - # TODO: remove lines(four lines below) and 599-612 in v0.25.0 - if isinstance(scoring, list): - raise_warn, num = True, len(scoring) - else: - raise_warn, num = False, 1 - # removal until here scoring = _check_scores(scoring) ALLOWED_SCITYPES = ["Series", "Panel", "Hierarchical"] @@ -662,21 +630,4 @@ def gen_y_X_train_test(y, X, cv, cv_X): # final formatting of results DataFrame results = results.reset_index(drop=True) - # TODO: remove 16 lines below and 451-455 in v0.25.0 - if raise_warn: - warnings.warn( - "Starting v0.25.0 model_evaluation.evaluate module will rearrange " - "all metric columns to the left of its output result DataFrame. " - "Please use loc references when addressing the columns. You can " - "safely ignore this warning if you don't use evaluate function directly.", - DeprecationWarning, - stacklevel=2, - ) - columns = results.columns.to_list() - non_first_metrics = [] - for _ in range(1, num): - metric = columns.pop(1) - non_first_metrics.append(metric) - results = results.reindex(columns=columns + non_first_metrics) - # removal until here return results diff --git a/sktime/forecasting/model_evaluation/tests/test_evaluate.py b/sktime/forecasting/model_evaluation/tests/test_evaluate.py index 6c312e4dc77..7d3696e841a 100644 --- a/sktime/forecasting/model_evaluation/tests/test_evaluate.py +++ b/sktime/forecasting/model_evaluation/tests/test_evaluate.py @@ -47,11 +47,15 @@ from sktime.utils._testing.forecasting import make_forecasting_problem from sktime.utils._testing.hierarchical import _make_hierarchical from sktime.utils._testing.series import _make_series +from sktime.utils.parallel import _get_parallel_test_fixtures from sktime.utils.validation._dependencies import _check_soft_dependencies METRICS = [MeanAbsolutePercentageError(symmetric=True), MeanAbsoluteScaledError()] PROBA_METRICS = [CRPS(), EmpiricalCoverage(), LogLoss(), PinballLoss()] +# list of parallelization backends to test +BACKENDS = _get_parallel_test_fixtures("estimator") + def _check_evaluate_output(out, cv, y, scoring, return_data): assert isinstance(out, pd.DataFrame) @@ -103,7 +107,7 @@ def _check_evaluate_output(out, cv, y, scoring, return_data): @pytest.mark.parametrize("step_length", TEST_STEP_LENGTHS_INT) @pytest.mark.parametrize("strategy", ["refit", "update", "no-update_params"]) @pytest.mark.parametrize("scoring", METRICS) -@pytest.mark.parametrize("backend", [None, "dask", "loky", "threading"]) +@pytest.mark.parametrize("backend", BACKENDS) def test_evaluate_common_configs( CV, fh, window_length, step_length, strategy, scoring, backend ): @@ -122,7 +126,7 @@ def test_evaluate_common_configs( cv=cv, strategy=strategy, scoring=scoring, - backend=backend, + **backend, ) _check_evaluate_output(out, cv, y, scoring, False) @@ -216,14 +220,10 @@ def test_evaluate_no_exog_against_with_exog(): @pytest.mark.parametrize("error_score", [np.nan, "raise", 1000]) @pytest.mark.parametrize("return_data", [True, False]) @pytest.mark.parametrize("strategy", ["refit", "update", "no-update_params"]) -@pytest.mark.parametrize("backend", [None, "dask", "loky", "threading"]) +@pytest.mark.parametrize("backend", BACKENDS) @pytest.mark.parametrize("scores", [[MeanAbsolutePercentageError()], METRICS]) def test_evaluate_error_score(error_score, return_data, strategy, backend, scores): """Test evaluate to raise warnings and exceptions according to error_score value.""" - # skip test for dask backend if dask is not installed - if backend == "dask" and not _check_soft_dependencies("dask", severity="none"): - return None - forecaster = ExponentialSmoothing(sp=12) y = load_airline() # add NaN to make ExponentialSmoothing fail @@ -240,12 +240,12 @@ def test_evaluate_error_score(error_score, return_data, strategy, backend, score "return_data": return_data, "error_score": error_score, "strategy": strategy, - "backend": backend, } + args.update(backend) if error_score in [np.nan, 1000]: # known bug - loky backend does not pass on warnings, #5307 - if backend != "loky": + if backend["backend"] not in ["loky", "multiprocessing"]: with pytest.warns(FitFailedWarning): results = evaluate(**args) else: @@ -264,7 +264,7 @@ def test_evaluate_error_score(error_score, return_data, strategy, backend, score not run_test_for_class(evaluate), reason="run test only if softdeps are present and incrementally (if requested)", ) -@pytest.mark.parametrize("backend", [None, "dask", "loky", "threading"]) +@pytest.mark.parametrize("backend", BACKENDS) def test_evaluate_hierarchical(backend): """Check that evaluate works with hierarchical data.""" # skip test for dask backend if dask is not installed @@ -284,10 +284,10 @@ def test_evaluate_hierarchical(backend): cv = SlidingWindowSplitter() scoring = MeanAbsolutePercentageError(symmetric=True) out_exog = evaluate( - forecaster, cv, y, X=X, scoring=scoring, error_score="raise", backend=backend + forecaster, cv, y, X=X, scoring=scoring, error_score="raise", **backend ) out_no_exog = evaluate( - forecaster, cv, y, X=None, scoring=scoring, error_score="raise", backend=backend + forecaster, cv, y, X=None, scoring=scoring, error_score="raise", **backend ) scoring_name = f"test_{scoring.name}" diff --git a/sktime/forecasting/model_selection/__init__.py b/sktime/forecasting/model_selection/__init__.py index c55f7396095..8c9ca7712ad 100644 --- a/sktime/forecasting/model_selection/__init__.py +++ b/sktime/forecasting/model_selection/__init__.py @@ -2,33 +2,98 @@ # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) """Implements functionality for selecting forecasting models.""" -__author__ = ["mloning", "kkoralturk"] __all__ = [ - "CutoffSplitter", - "SameLocSplitter", - "SingleWindowSplitter", - "SlidingWindowSplitter", - "temporal_train_test_split", - "ExpandingGreedySplitter", - "ExpandingWindowSplitter", - "TestPlusTrainSplitter", "ForecastingGridSearchCV", "ForecastingRandomizedSearchCV", "ForecastingSkoptSearchCV", + "ExpandingWindowSplitter", + "SlidingWindowSplitter", + "temporal_train_test_split", ] -from sktime.forecasting.model_selection._split import ( - CutoffSplitter, - ExpandingGreedySplitter, - ExpandingWindowSplitter, - SameLocSplitter, - SingleWindowSplitter, - SlidingWindowSplitter, - TestPlusTrainSplitter, - temporal_train_test_split, -) from sktime.forecasting.model_selection._tune import ( ForecastingGridSearchCV, ForecastingRandomizedSearchCV, ForecastingSkoptSearchCV, ) + + +# todo 0.27.0 - check whether we should remove, otherwise bump +# still used in blog posts and old tutorials +def temporal_train_test_split( + y, X=None, test_size=None, train_size=None, fh=None, anchor="start" +): + """Split time series data into temporal train and test sets. + + DEPRECATED - use sktime.split.temporal_train_test_split instead. + """ + from warnings import warn + + from sktime.split import temporal_train_test_split as _tts + + warn( + "WARNING - the old location of temporal_train_test_split in " + "sktime.forecasting.model_selection is deprecated and is scheduled for " + "imminent removal in a MINOR version. " + "Please update any import statements to " + "from sktime.split import temporal_train_test_split.", + DeprecationWarning, + ) + + return _tts( + y=y, X=X, test_size=test_size, train_size=train_size, fh=fh, anchor=anchor + ) + + +# todo 0.27.0 - check whether we should remove, otherwise bump +# still used in blog posts and old tutorials +def ExpandingWindowSplitter(fh=1, initial_window=10, step_length=1): + """Legacy export of Expanding window splitter. + + DEPRECATED - use sktime.split.ExpandingWindowSplitter instead. + """ + from warnings import warn + + from sktime.split import ExpandingWindowSplitter as _EWSplitter + + warn( + "WARNING - the old location of ExpandingWindowSplitter in " + "sktime.forecasting.model_selection is deprecated and is scheduled for " + "imminent removal in a MINOR version. " + "Please update any import statements to " + "from sktime.split import ExpandingWindowSplitter.", + DeprecationWarning, + ) + + return _EWSplitter(fh=fh, initial_window=initial_window, step_length=step_length) + + +# todo 0.27.0 - check whether we should remove, otherwise bump +# still used in blog posts and old tutorials +def SlidingWindowSplitter( + fh=1, window_length=10, step_length=1, initial_window=None, start_with_window=True +): + """Legacy export of Sliding window splitter. + + DEPRECATED - use sktime.split.ExpandingWindowSplitter instead. + """ + from warnings import warn + + from sktime.split import SlidingWindowSplitter as _SWSplitter + + warn( + "WARNING - the old location of SlidingWindowSplitter in " + "sktime.forecasting.model_selection is deprecated and is scheduled for " + "imminent removal in a MINOR version. " + "Please update any import statements to " + "from sktime.split import SlidingWindowSplitter.", + DeprecationWarning, + ) + + return _SWSplitter( + fh=fh, + window_length=window_length, + step_length=step_length, + initial_window=initial_window, + start_with_window=start_with_window, + ) diff --git a/sktime/forecasting/model_selection/_split.py b/sktime/forecasting/model_selection/_split.py deleted file mode 100644 index 3d77d9d77fb..00000000000 --- a/sktime/forecasting/model_selection/_split.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python3 -u -# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) -"""Implement dataset splitting for model evaluation and selection.""" - -# This module is deprecated. Below the deprecation cycle : -# - Until the 0.25.0 release, imports are allowed from this location but raise a -# DeprecationWarning. -# - In the 0.25.0 release, this file (sktime/forecasting/model_selection/_split.py) -# will be removed. -# todo 0.25.0 : Please ensure this file is deleted. - -__all__ = [ - "ExpandingGreedySplitter", - "ExpandingWindowSplitter", - "SlidingWindowSplitter", - "CutoffSplitter", - "SingleWindowSplitter", - "SameLocSplitter", - "temporal_train_test_split", - "TestPlusTrainSplitter", -] - -import warnings - -from sktime.split import ( - CutoffSplitter, - ExpandingGreedySplitter, - ExpandingWindowSplitter, - SameLocSplitter, - SingleWindowSplitter, - SlidingWindowSplitter, - TestPlusTrainSplitter, - temporal_train_test_split, -) - -warnings.warn( - "Please import sktime splitters from `sktime.split`. Importing splitters from " - "`sktime.forecasting.model_selection` is deprecated and will be removed in release " - "0.25.0", - DeprecationWarning, - stacklevel=2, -) diff --git a/sktime/forecasting/model_selection/_tune.py b/sktime/forecasting/model_selection/_tune.py index 7b70b6d1eb0..8c32e28ca08 100644 --- a/sktime/forecasting/model_selection/_tune.py +++ b/sktime/forecasting/model_selection/_tune.py @@ -30,6 +30,7 @@ class BaseGridSearch(_DelegatedForecaster): _tags = { + "authors": ["mloning", "fkiraly", "aiwalter"], "scitype:y": "both", "requires-fh-in-fit": False, "handles-missing-data": False, @@ -38,7 +39,7 @@ class BaseGridSearch(_DelegatedForecaster): "capability:pred_int:insample": True, } - # todo 0.26.0: remove n_jobs, pre_dispatch parameters and all related logic + # todo 0.27.0: remove n_jobs, pre_dispatch parameters and all related logic def __init__( self, forecaster, @@ -75,18 +76,9 @@ def __init__( super().__init__() - tags_to_clone = [ - "requires-fh-in-fit", - "capability:pred_int", - "capability:pred_int:insample", - "capability:insample", - "ignores-exogeneous-X", - "handles-missing-data", - "y_inner_mtype", - "X_inner_mtype", - "X-y-must-have-same-index", - "enforce_index_type", - ] + self._set_delegated_tags(forecaster) + + tags_to_clone = ["y_inner_mtype", "X_inner_mtype"] self.clone_tags(forecaster, tags_to_clone) self._extend_to_all_scitypes("y_inner_mtype") self._extend_to_all_scitypes("X_inner_mtype") @@ -186,54 +178,26 @@ def _fit(self, y, X, fh): scoring = check_scoring(self.scoring, obj=self) scoring_name = f"test_{scoring.name}" - # todo 0.26.0: remove this logic and only use backend_params + # todo 0.27.0: remove this logic and only use backend_params backend = self.backend backend_params = self.backend_params if self.backend_params else {} if backend in ["threading", "multiprocessing", "loky"]: n_jobs = self.n_jobs pre_dispatch = self.pre_dispatch - backend_params["n_jobs"] = n_jobs - backend_params["pre_dispatch"] = pre_dispatch + if n_jobs is not None: + backend_params["n_jobs"] = n_jobs + if pre_dispatch is not None: + backend_params["pre_dispatch"] = pre_dispatch if n_jobs is not None or pre_dispatch is not None: warn( f"in {self.__class__.__name__}, n_jobs and pre_dispatch " - "parameters are deprecated and will be removed in 0.26.0. " + "parameters are deprecated and will be removed in 0.27.0. " "Please use n_jobs and pre_dispatch directly in the backend_params " "argument instead.", obj=self, stacklevel=2, ) - def _fit_and_score(params, meta): - # Clone forecaster. - forecaster = self.forecaster.clone() - - # Set parameters. - forecaster.set_params(**params) - - # Evaluate. - out = evaluate( - forecaster, - cv, - y, - X, - strategy=self.strategy, - scoring=scoring, - error_score=self.error_score, - ) - - # Filter columns. - out = out.filter(items=[scoring_name, "fit_time", "pred_time"], axis=1) - - # Aggregate results. - out = out.mean() - out = out.add_prefix("mean_") - - # Add parameters to output table. - out["params"] = params - - return out - def evaluate_candidates(candidate_params): candidate_params = list(candidate_params) @@ -247,9 +211,21 @@ def evaluate_candidates(candidate_params): ) ) + # Set meta variables for parallelization. + meta = {} + meta["forecaster"] = self.forecaster + meta["y"] = y + meta["X"] = X + meta["cv"] = cv + meta["strategy"] = self.strategy + meta["scoring"] = scoring + meta["error_score"] = self.error_score + meta["scoring_name"] = scoring_name + out = parallelize( fun=_fit_and_score, iter=candidate_params, + meta=meta, backend=backend, backend_params=backend_params, ) @@ -386,6 +362,35 @@ def _update(self, y, X=None, update_params=True): return self +def _fit_and_score(params, meta): + """Fit and score forecaster with given parameters. + + Root level function for parallelization, called from + BaseGridSearchCV._fit, evaluate_candidates, within parallelize. + """ + meta = meta.copy() + scoring_name = meta.pop("scoring_name") + + # Set parameters. + forecaster = meta.pop("forecaster").clone() + forecaster.set_params(**params) + + # Evaluate. + out = evaluate(forecaster, **meta) + + # Filter columns. + out = out.filter(items=[scoring_name, "fit_time", "pred_time"], axis=1) + + # Aggregate results. + out = out.mean() + out = out.add_prefix("mean_") + + # Add parameters to output table. + out["params"] = params + + return out + + class ForecastingGridSearchCV(BaseGridSearch): """Perform grid-search cross-validation to find optimal model parameters. @@ -401,16 +406,16 @@ class ForecastingGridSearchCV(BaseGridSearch): Parameters ---------- - forecaster : estimator object - The estimator should implement the sktime or scikit-learn estimator - interface. Either the estimator must contain a "score" function, - or a scoring function must be passed. + forecaster : sktime forecaster, BaseForecaster instance or interface compatible + The forecaster to tune, must implement the sktime forecaster interface. + sklearn regressors can be used, but must first be converted to forecasters + via one of the reduction compositors, e.g., via ``make_reduction`` cv : cross-validation generator or an iterable e.g. SlidingWindowSplitter() strategy : {"refit", "update", "no-update_params"}, optional, default="refit" data ingestion strategy in fitting cv, passed to `evaluate` internally defines the ingestion mode when the forecaster sees new data when window expands - "refit" = forecaster is refitted to each training window + "refit" = a new copy of the forecaster is fitted to each training window "update" = forecaster is updated with training window data, in sequence provided "no-update_params" = fit to first training window, re-used without fit or update update_behaviour : str, optional, default = "full_refit" @@ -464,6 +469,7 @@ class ForecastingGridSearchCV(BaseGridSearch): - "None": executes loop sequentally, simple list comprehension - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` - "dask": uses ``dask``, requires ``dask`` package in environment Recommendation: Use "dask" or "loky" for parallel evaluate. @@ -498,10 +504,16 @@ class ForecastingGridSearchCV(BaseGridSearch): Valid keys depend on the value of ``backend``: - "None": no additional parameters, ``backend_params`` is ignored - - "loky", "multiprocessing" and "threading": - any valid keys for ``joblib.Parallel`` can be passed here, - e.g., ``n_jobs``, with the exception of ``backend`` - which is directly controlled by ``backend`` + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. - "dask": any valid keys for ``dask.compute`` can be passed, e.g., ``scheduler`` Attributes @@ -537,9 +549,7 @@ class ForecastingGridSearchCV(BaseGridSearch): -------- >>> from sktime.datasets import load_shampoo_sales >>> from sktime.forecasting.model_selection import ForecastingGridSearchCV - >>> from sktime.split import ( - ... ExpandingWindowSplitter, - ... ExpandingWindowSplitter) + >>> from sktime.split import ExpandingWindowSplitter >>> from sktime.forecasting.naive import NaiveForecaster >>> y = load_shampoo_sales() >>> fh = [1,2,3] @@ -723,16 +733,16 @@ class ForecastingRandomizedSearchCV(BaseGridSearch): Parameters ---------- - forecaster : estimator object - The estimator should implement the sktime or scikit-learn estimator - interface. Either the estimator must contain a "score" function, - or a scoring function must be passed. + forecaster : sktime forecaster, BaseForecaster instance or interface compatible + The forecaster to tune, must implement the sktime forecaster interface. + sklearn regressors can be used, but must first be converted to forecasters + via one of the reduction compositors, e.g., via ``make_reduction`` cv : cross-validation generator or an iterable e.g. SlidingWindowSplitter() strategy : {"refit", "update", "no-update_params"}, optional, default="refit" data ingestion strategy in fitting cv, passed to `evaluate` internally defines the ingestion mode when the forecaster sees new data when window expands - "refit" = forecaster is refitted to each training window + "refit" = a new copy of the forecaster is fitted to each training window "update" = forecaster is updated with training window data, in sequence provided "no-update_params" = fit to first training window, re-used without fit or update update_behaviour: str, optional, default = "full_refit" @@ -796,6 +806,7 @@ class ForecastingRandomizedSearchCV(BaseGridSearch): - "None": executes loop sequentally, simple list comprehension - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` - "dask": uses ``dask``, requires ``dask`` package in environment Recommendation: Use "dask" or "loky" for parallel evaluate. @@ -830,10 +841,16 @@ class ForecastingRandomizedSearchCV(BaseGridSearch): Valid keys depend on the value of ``backend``: - "None": no additional parameters, ``backend_params`` is ignored - - "loky", "multiprocessing" and "threading": - any valid keys for ``joblib.Parallel`` can be passed here, - e.g., ``n_jobs``, with the exception of ``backend`` - which is directly controlled by ``backend`` + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. - "dask": any valid keys for ``dask.compute`` can be passed, e.g., ``scheduler`` Attributes @@ -961,10 +978,10 @@ class ForecastingSkoptSearchCV(BaseGridSearch): Parameters ---------- - forecaster : estimator object. - The estimator should implement the sktime or scikit-learn estimator interface. - Either the estimator must contain a "score" function, - or a scoring function must be passed. + forecaster : sktime forecaster, BaseForecaster instance or interface compatible + The forecaster to tune, must implement the sktime forecaster interface. + sklearn regressors can be used, but must first be converted to forecasters + via one of the reduction compositors, e.g., via ``make_reduction`` cv : cross-validation generator or an iterable Splitter used for generating validation folds. e.g. SlidingWindowSplitter() @@ -1022,7 +1039,7 @@ class ForecastingSkoptSearchCV(BaseGridSearch): strategy : {"refit", "update", "no-update_params"}, optional, default="refit" data ingestion strategy in fitting cv, passed to `evaluate` internally defines the ingestion mode when the forecaster sees new data when window expands - "refit" = forecaster is refitted to each training window + "refit" = a new copy of the forecaster is fitted to each training window "update" = forecaster is updated with training window data, in sequence provided "no-update_params" = fit to first training window, re-used without fit or update update_behaviour: str, optional, default = "full_refit" @@ -1055,6 +1072,7 @@ class ForecastingSkoptSearchCV(BaseGridSearch): - "None": executes loop sequentally, simple list comprehension - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` - "dask": uses ``dask``, requires ``dask`` package in environment Recommendation: Use "dask" or "loky" for parallel evaluate. @@ -1085,10 +1103,16 @@ class ForecastingSkoptSearchCV(BaseGridSearch): Valid keys depend on the value of ``backend``: - "None": no additional parameters, ``backend_params`` is ignored - - "loky", "multiprocessing" and "threading": - any valid keys for ``joblib.Parallel`` can be passed here, - e.g., ``n_jobs``, with the exception of ``backend`` - which is directly controlled by ``backend`` + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. - "dask": any valid keys for ``dask.compute`` can be passed, e.g., ``scheduler`` Attributes @@ -1141,6 +1165,8 @@ class ForecastingSkoptSearchCV(BaseGridSearch): """ _tags = { + "authors": ["HazrulAkmal"], + "maintainers": ["HazrulAkmal"], "scitype:y": "both", "requires-fh-in-fit": False, "handles-missing-data": False, diff --git a/sktime/forecasting/model_selection/tests/test_tune.py b/sktime/forecasting/model_selection/tests/test_tune.py index fa52904dfb0..e1cb65dd624 100644 --- a/sktime/forecasting/model_selection/tests/test_tune.py +++ b/sktime/forecasting/model_selection/tests/test_tune.py @@ -325,7 +325,7 @@ def test_skoptcv_multiple_forecaster(): assert len(sscv.cv_results_) == 5 -BACKEND_TEST = _get_parallel_test_fixtures() +BACKEND_TEST = _get_parallel_test_fixtures("estimator") @pytest.mark.skipif( diff --git a/sktime/forecasting/naive.py b/sktime/forecasting/naive.py index cf518b1ba3b..df2ec152d34 100644 --- a/sktime/forecasting/naive.py +++ b/sktime/forecasting/naive.py @@ -108,6 +108,20 @@ class NaiveForecaster(_BaseWindowForecaster): """ _tags = { + # packaging info + # -------------- + "authors": [ + "mloning", + "piyush1729", + "sri1419", + "Flix6x", + "aiwalter", + "IlyasMoutawwakil", + "fkiraly", + "bethrice44", + ], + # estimator type + # -------------- "y_inner_mtype": "pd.Series", "requires-fh-in-fit": False, "handles-missing-data": True, @@ -191,7 +205,7 @@ def _fit(self, y, X, fh): ) # check window length - if self.window_length_ > len(self._y): + if self.window_length_ > len(y): param = "sp" if self.strategy == "last" and sp != 1 else "window_length_" raise ValueError( f"The {param}: {self.window_length_} is larger than " @@ -348,7 +362,7 @@ def _predict_naive(self, fh=None, X=None): y_old = lagger.fit_transform(_y) y_new = pd.DataFrame(index=expected_index, columns=[0], dtype="float64") full_y = pd.concat([y_old, y_new], keys=["a", "b"]).sort_index(level=-1) - y_filled = full_y.fillna(method="ffill").fillna(method="bfill") + y_filled = full_y.ffill().bfill() # subset to rows that contain elements we wanted to fill y_pred = y_filled.loc["b"] # convert to pd.Series from pd.DataFrame @@ -361,7 +375,7 @@ def _predict_naive(self, fh=None, X=None): y_new_mask = pd.Series(index=expected_index, dtype="float64") y_new = _pivot_sp(y_new_mask, sp, anchor=_y, anchor_side="end") full_y = pd.concat([y_old, y_new], keys=["a", "b"]).sort_index(level=-1) - y_filled = full_y.fillna(method="ffill").fillna(method="bfill") + y_filled = full_y.ffill().bfill() # subset to rows that contain elements we wanted to fill y_pred = y_filled.loc["b"] # reformat to wide @@ -550,14 +564,19 @@ def _predict_var(self, fh, X=None, cov=False): se_res = np.sqrt(mse_res) window_length = self.window_length or T + + def sqrt_flr(x): + """Square root of x, floored at 1 - to deal with in-sample predictions.""" + return np.sqrt(np.maximum(x, 1)) + # Formulas from: # https://otexts.com/fpp3/prediction-intervals.html (Table 5.2) partial_se_formulas = { - "last": lambda h: np.sqrt(h) + "last": sqrt_flr if sp == 1 - else np.sqrt(np.floor((h - 1) / sp) + 1), - "mean": lambda h: np.repeat(np.sqrt(1 + (1 / window_length)), len(h)), - "drift": lambda h: np.sqrt(h * (1 + (h / (T - 1)))), + else lambda h: sqrt_flr(np.floor((h - 1) / sp) + 1), + "mean": lambda h: np.repeat(sqrt_flr(1 + (1 / window_length)), len(h)), + "drift": lambda h: sqrt_flr(h * (1 + (h / (T - 1)))), } fh_periods = np.array(fh.to_relative(self.cutoff)) @@ -649,6 +668,11 @@ class NaiveVariance(BaseForecaster): """ _tags = { + # packaging info + # -------------- + "authors": ["fkiraly", "bethrice44"], + # estimator type + # -------------- "scitype:y": "univariate", "requires-fh-in-fit": False, "handles-missing-data": False, diff --git a/sktime/forecasting/online_learning/_online_ensemble.py b/sktime/forecasting/online_learning/_online_ensemble.py index 38a60327e1d..e50b9824a23 100644 --- a/sktime/forecasting/online_learning/_online_ensemble.py +++ b/sktime/forecasting/online_learning/_online_ensemble.py @@ -24,6 +24,12 @@ class OnlineEnsembleForecaster(EnsembleForecaster): """ _tags = { + # packaging info + # -------------- + "authors": ["magittan", "mloning"], + "maintainers": ["magittan"], + # estimator type + # -------------- "ignores-exogeneous-X": True, "requires-fh-in-fit": False, "handles-missing-data": False, diff --git a/sktime/forecasting/online_learning/_prediction_weighted_ensembler.py b/sktime/forecasting/online_learning/_prediction_weighted_ensembler.py index b48690c0e3d..b3ab085e433 100644 --- a/sktime/forecasting/online_learning/_prediction_weighted_ensembler.py +++ b/sktime/forecasting/online_learning/_prediction_weighted_ensembler.py @@ -21,6 +21,12 @@ class _PredictionWeightedEnsembler: """ _tags = { + # packaging info + # -------------- + "authors": ["magittan"], + "maintainers": ["magittan"], + # estimator type + # -------------- "ignores-exogeneous-X": True, "requires-fh-in-fit": False, "handles-missing-data": False, diff --git a/sktime/forecasting/reconcile.py b/sktime/forecasting/reconcile.py index 8a34084ae45..49ad6d186f4 100644 --- a/sktime/forecasting/reconcile.py +++ b/sktime/forecasting/reconcile.py @@ -3,9 +3,7 @@ """Implements reconciled forecasters for hierarchical data.""" __all__ = ["ReconcilerForecaster"] -__author__ = [ - "ciaran-g", -] +__author__ = ["ciaran-g"] # todo: top down historical proportions? -> new _get_g_matrix_prop(self) @@ -82,6 +80,12 @@ class ReconcilerForecaster(BaseForecaster): """ _tags = { + # packaging info + # -------------- + "authors": "ciaran-g", + "maintainers": "ciaran-g", + # estimator type + # -------------- "scitype:y": "univariate", # which y are fine? univariate/multivariate/both "ignores-exogeneous-X": False, # does estimator ignore the exogeneous X? "handles-missing-data": False, # can estimator handle missing data? diff --git a/sktime/forecasting/sarimax.py b/sktime/forecasting/sarimax.py index 0f47b5f1f0a..a74437c086d 100644 --- a/sktime/forecasting/sarimax.py +++ b/sktime/forecasting/sarimax.py @@ -85,11 +85,103 @@ class SARIMAX(_StatsModelsAdapter): Whether or not to use exact diffuse initialization for non-stationary states. Default is False (in which case approximate diffuse initialization is used). - random_state : int, RandomState instance or None, optional , - default=None β If int, random_state is the seed used by the random + disp : bool, optional, default=False + Set to True to print convergence messages. + random_state : int, RandomState instance or None, optional, default=None + default=None - If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. + start_params : array_like, optional + Initial guess of the solution for the loglikelihood maximization. If None, the + default is given by SARIMAX.start_params. + transformed : bool, optional + Whether or not `start_params` is already transformed. Default is True. + includes_fixed : bool, optional + If parameters were previously fixed with the `fix_params` method, this argument + describes whether or not `start_params` also includes the fixed parameters, in + addition to the free parameters. Default is False. + cov_type : str, optional + The `cov_type` keyword governs the method for calculating the covariance matrix + of parameter estimates. Can be one of: + + - 'opg' for the outer product of gradient estimator + - 'oim' for the observed information matrix estimator, calculated + using the method of Harvey (1989) + - 'approx' for the observed information matrix estimator, calculated using a + numerical approximation of the Hessian matrix. + - 'robust' for an approximate (quasi-maximum likelihood) covariance matrix that + may be valid even in the presence of some misspecifications. Intermediate + calculations use the 'oim' method. + - 'robust_approx' is the same as 'robust' except that the intermediate + calculations use the 'approx' method. + - 'none' for no covariance matrix calculation. + + Default is 'opg' unless memory conservation is used to avoid + computing the loglikelihood values for each observation, in which + case the default is 'approx'. + cov_kwds : dict or None, optional + A dictionary of arguments affecting covariance matrix computation. + + **opg, oim, approx, robust, robust_approx** + + - 'approx_complex_step' : bool, optional - If True, numerical + approximations are computed using complex-step methods. If False, numerical + approximations are computed using finite difference methods. Default is + True. + - 'approx_centered' : bool, optional - If True, numerical + approximations computed using finite difference methods use a centered + approximation. Default is False. + method : str, optional + The `method` determines which solver from `scipy.optimize` is used, and it can + be chosen from among the following strings: + + - 'newton' for Newton-Raphson + - 'nm' for Nelder-Mead + - 'bfgs' for Broyden-Fletcher-Goldfarb-Shanno (BFGS) + - 'lbfgs' for limited-memory BFGS with optional box constraints + - 'powell' for modified Powell's method + - 'cg' for conjugate gradient + - 'ncg' for Newton-conjugate gradient + - 'basinhopping' for global basin-hopping solver + + The explicit arguments in `fit` are passed to the solver, with the exception of + the basin-hopping solver. Each solver has several optional arguments that are + not the same across solvers. See the notes section below (or scipy.optimize) for + the available arguments and for the list of explicit arguments that the + basin-hopping solver supports. + maxiter : int, optional + The maximum number of iterations to perform. + full_output : bool, optional + Set to True to have all available output in the Results object's mle_retvals + attribute. The output is dependent on the solver. See LikelihoodModelResults + notes section for more information. + callback : callable callback(xk), optional + Called after each iteration, as callback(xk), where xk is the current parameter + vector. + return_params : bool, optional + Whether or not to return only the array of maximizing parameters. Default is + False. + optim_score : {'harvey', 'approx'} or None, optional + The method by which the score vector is calculated. 'harvey' uses the method + from Harvey (1989), 'approx' uses either finite difference or complex step + differentiation depending upon the value of `optim_complex_step`, and None uses + the built-in gradient approximation of the optimizer. Default is None. This + keyword is only relevant if the optimization method uses the score. + optim_complex_step : bool, optional + Whether or not to use complex step differentiation when approximating the score; + if False, finite difference approximation is used. Default is True. This keyword + is only relevant if `optim_score` is set to 'harvey' or 'approx'. + optim_hessian : {'opg','oim','approx'}, optional + The method by which the Hessian is numerically approximated. 'opg' uses outer + product of gradients, 'oim' uses the information matrix formula from Harvey + (1989), and 'approx' uses numerical approximation. This keyword is only relevant + if the optimization method uses the Hessian matrix. + low_memory : bool, optional + If set to True, techniques are applied to substantially reduce memory usage. If + used, some features of the results object will not be available (including + smoothed results and in-sample prediction), although out-of-sample forecasting + is possible. Default is False. See Also -------- @@ -116,6 +208,13 @@ class SARIMAX(_StatsModelsAdapter): """ _tags = { + # packaging info + # -------------- + "authors": ["TNTran92", "yarnabrina"], + "maintainers": ["TNTran92", "yarnabrina"], + # "python_dependencnies": "statsmodels" - inherited from _StatsModelsAdapter + # estimator type + # -------------- "ignores-exogeneous-X": False, "capability:pred_int": True, "capability:pred_int:insample": True, @@ -140,7 +239,22 @@ def __init__( freq=None, missing="none", validate_specification=True, + disp=False, random_state=None, + start_params=None, + transformed=True, + includes_fixed=False, + cov_type=None, + cov_kwds=None, + method="lbfgs", + maxiter=50, + full_output=1, + callback=None, + return_params=False, + optim_score=None, + optim_complex_step=None, + optim_hessian=None, + low_memory=False, ): self.order = order self.seasonal_order = seasonal_order @@ -160,6 +274,23 @@ def __init__( self.missing = missing self.validate_specification = validate_specification + # Fit params + self.disp = disp + self.start_params = start_params + self.transformed = transformed + self.includes_fixed = includes_fixed + self.cov_type = cov_type + self.cov_kwds = cov_kwds + self.method = method + self.maxiter = maxiter + self.full_output = full_output + self.callback = callback + self.return_params = return_params + self.optim_score = optim_score + self.optim_complex_step = optim_complex_step + self.optim_hessian = optim_hessian + self.low_memory = low_memory + super().__init__(random_state=random_state) def _fit_forecaster(self, y, X=None): @@ -186,7 +317,23 @@ def _fit_forecaster(self, y, X=None): missing=self.missing, validate_specification=self.validate_specification, ) - self._fitted_forecaster = self._forecaster.fit() + self._fitted_forecaster = self._forecaster.fit( + disp=self.disp, + start_params=self.start_params, + transformed=self.transformed, + includes_fixed=self.includes_fixed, + cov_type=self.cov_type, + cov_kwds=self.cov_kwds, + method=self.method, + maxiter=self.maxiter, + full_output=self.full_output, + callback=self.callback, + return_params=self.return_params, + optim_score=self.optim_score, + optim_complex_step=self.optim_complex_step, + optim_hessian=self.optim_hessian, + low_memory=self.low_memory, + ) def summary(self): """Get a summary of the fitted forecaster. @@ -268,4 +415,5 @@ def get_test_params(cls, parameter_set="default"): "hamilton_representation": True, "simple_differencing": True, }, + {"cov_type": "robust", "method": "bfgs", "maxiter": 5}, ] diff --git a/sktime/forecasting/squaring_residuals.py b/sktime/forecasting/squaring_residuals.py index c9333d22ade..7265b8aad00 100644 --- a/sktime/forecasting/squaring_residuals.py +++ b/sktime/forecasting/squaring_residuals.py @@ -80,6 +80,12 @@ class SquaringResiduals(BaseForecaster): """ _tags = { + # packaging info + # -------------- + "authors": ["kcc-lion", "fkiraly"], + "maintainers": ["kcc-lion"], + # estimator type + # -------------- "scitype:y": "univariate", # which y are fine? univariate/multivariate/both "ignores-exogeneous-X": True, # does estimator ignore the exogeneous X? "handles-missing-data": False, # can estimator handle missing data? @@ -91,7 +97,6 @@ class SquaringResiduals(BaseForecaster): "capability:insample": False, "capability:pred_int": True, # does forecaster implement proba forecasts? "capability:pred_int:insample": False, - "python_version": None, # PEP 440 python version specifier to limit versions } def __init__( diff --git a/sktime/forecasting/statsforecast.py b/sktime/forecasting/statsforecast.py index 21493ab634d..237bbb5c2f5 100644 --- a/sktime/forecasting/statsforecast.py +++ b/sktime/forecasting/statsforecast.py @@ -7,6 +7,7 @@ "StatsForecastAutoARIMA", "StatsForecastAutoCES", "StatsForecastAutoETS", + "StatsForecastAutoTBATS", "StatsForecastAutoTheta", "StatsForecastMSTL", ] @@ -23,8 +24,12 @@ class StatsForecastAutoARIMA(_GeneralisedStatsForecastAdapter): """StatsForecast AutoARIMA estimator. - This implementation is inspired by Hyndman's forecast::auto.arima [1]_ - and based on the Python implementation of statsforecast [2]_ by Nixtla. + Direct interface to ``statsforecast.models.AutoARIMA`` by Nixtla. + + This estimator directly interfaces ``AutoARIMA``, + from ``statsforecast`` [2]_ by Nixtla. + The ``statsforecast`` implementation is inspired + by Hyndman's forecast::auto.arima [1]_. Returns best ARIMA model according to either AIC, AICc or BIC value. The function conducts a search over possible model within @@ -103,10 +108,12 @@ class StatsForecastAutoARIMA(_GeneralisedStatsForecastAdapter): is to use conditional-sum-of-squares to find starting values, then maximum likelihood. Can be abbreviated. It can be chosen from among the following strings: + - 'CSS-ML' for conditional sum-of-squares to find starting values and then maximum likelihood. - 'ML' for maximum likelihood. - 'CSS' for conditional sum-of-squares. + offset_test_args: dict optional (default None) Additional arguments to be passed to the unit root test. seasonal_test_args: dict optional (default None) @@ -170,6 +177,14 @@ class StatsForecastAutoARIMA(_GeneralisedStatsForecastAdapter): """ _tags = { + # packaging info + # -------------- + "authors": ["FedericoGarza", "yarnabrina"], + "maintainers": ["FedericoGarza"], + # "python_dependencies": "statsforecast" + # inherited from _GeneralisedStatsForecastAdapter + # estimator type + # -------------- "ignores-exogeneous-X": False, "capability:pred_int": True, "capability:pred_int:insample": True, @@ -317,9 +332,12 @@ def get_test_params(cls, parameter_set="default"): class StatsForecastAutoTheta(_GeneralisedStatsForecastAdapter): - """StatsForecast AutoTheta estimator. + """Statsforecast AutoTheta estimator. - This implementation is a wrapper over Nixtla implementation in statsforecast [1]_. + Direct interface to ``statsforecast.models.AutoTheta`` by Nixtla. + + This estimator directly interfaces ``AutoTheta``, + from ``statsforecast`` [1]_ by Nixtla. AutoTheta model automatically selects the best Theta (Standard Theta Model ("STM"), Optimized Theta Model ("OTM"), Dynamic Standard Theta Model ("DSTM"), Dynamic @@ -327,18 +345,19 @@ class StatsForecastAutoTheta(_GeneralisedStatsForecastAdapter): Parameters ---------- - season_length : int, optional + season_length : int, optional, default=1 number of observations per unit of time (e.g. 24 for hourly data), by default 1 - decomposition_type : str, optional - type of seasonal decomposition, by default "multiplicative" + decomposition_type : str, optional, default="multipliciative" possible values: "additive", "multiplicative" + type of seasonal decomposition, by default "multiplicative" + model : Optional[str], optional controlling Theta Model, by default searches the best model References ---------- - .. [1] https://nixtla.github.io/statsforecast/models.html#autotheta + .. [1] https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autotheta See Also -------- @@ -346,6 +365,14 @@ class StatsForecastAutoTheta(_GeneralisedStatsForecastAdapter): """ _tags = { + # packaging info + # -------------- + # "authors": ["yarnabrina"], + # "maintainers": ["yarnabrina"], + # "python_dependencies": "statsforecast" + # inherited from _GeneralisedStatsForecastAdapter + # estimator type + # -------------- "ignores-exogeneous-X": True, "capability:pred_int": True, "capability:pred_int:insample": True, @@ -405,7 +432,9 @@ def get_test_params(cls, parameter_set="default"): class StatsForecastAutoETS(_GeneralisedStatsForecastAdapter): """StatsForecast Automatic Exponential Smoothing model. - This implementation is a wrapper over Nixtla implementation in statsforecast [1]_. + Direct interface to ``statsforecast.models.AutoETS``, + from ``statsforecast`` [1]_ by Nixtla. + The ``statsforecast`` implementation is a mirror of Hyndman's forecast::ets [2]_. Automatically selects the best ETS (Error, Trend, Seasonality) model using an information criterion. Default is Akaike Information Criterion (AICc), while @@ -429,13 +458,9 @@ class StatsForecastAutoETS(_GeneralisedStatsForecastAdapter): damped : bool A parameter that 'dampens' the trend. - Notes - ----- - This implementation is a mirror of Hyndman's forecast::ets [2]_. - References ---------- - .. [1] https://nixtla.github.io/statsforecast/models.html#autoets + .. [1] https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autoets .. [2] https://github.com/robjhyndman/forecast See Also @@ -444,6 +469,14 @@ class StatsForecastAutoETS(_GeneralisedStatsForecastAdapter): """ _tags = { + # packaging info + # -------------- + # "authors": ["yarnabrina"], + # "maintainers": ["yarnabrina"], + # "python_dependencies": "statsforecast" + # inherited from _GeneralisedStatsForecastAdapter + # estimator type + # -------------- "ignores-exogeneous-X": True, "capability:pred_int": True, "capability:pred_int:insample": True, @@ -500,7 +533,8 @@ def get_test_params(cls, parameter_set="default"): class StatsForecastAutoCES(_GeneralisedStatsForecastAdapter): """StatsForecast Complex Exponential Smoothing model. - This implementation is a wrapper over Nixtla implementation in statsforecast [1]_. + Direct interface to ``statsforecast.models.AutoCES``, + from ``statsforecast`` [1]_ by Nixtla. Automatically selects the best Complex Exponential Smoothing model using an information criterion. Default is Akaike Information Criterion (AICc), while @@ -523,10 +557,18 @@ class StatsForecastAutoCES(_GeneralisedStatsForecastAdapter): References ---------- - .. [1] https://nixtla.github.io/statsforecast/models.html#autoces + .. [1] https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autoces """ _tags = { + # packaging info + # -------------- + # "authors": ["yarnabrina"], + # "maintainers": ["yarnabrina"], + # "python_dependencies": "statsforecast" + # inherited from _GeneralisedStatsForecastAdapter + # estimator type + # -------------- "ignores-exogeneous-X": True, "capability:pred_int": True, "capability:pred_int:insample": True, @@ -576,11 +618,125 @@ def get_test_params(cls, parameter_set="default"): return params +class StatsForecastAutoTBATS(_GeneralisedStatsForecastAdapter): + """StatsForecast TBATS model. + + Direct interface to `statsforecast.models.AutoTBATS`, + from `statsforecast` [1]_ by Nixtla. + + Automatically selects the best TBATS model from all feasible combinations of the + parameters `use_boxcox`, `use_trend`, `use_damped_trend`, and `use_arma_errors`. + Selection is made using the AIC. + + Default value for `use_arma_errors` is `True` since this enables the evaluation of + models with and without ARMA errors. + + Parameters + ---------- + seasonal_periods : int or list of int. + Number of observations per unit of time. Ex: 24 Hourly data. + use_boxcox : bool (default=None) + Whether or not to use a Box-Cox transformation. By default tries both. + bc_lower_bound : float (default=0.0) + Lower bound for the Box-Cox transformation. + bc_upper_bound : float (default=1.5) + Upper bound for the Box-Cox transformation. + use_trend : bool (default=None) + Whether or not to use a trend component. By default tries both. + use_damped_trend : bool (default=None) + Whether or not to dampen the trend component. By default tries both. + use_arma_errors : bool (default=True) + Whether or not to use a ARMA errors. + Default is True and this evaluates both models. + + See Also + -------- + BATS + TBATS + + References + ---------- + .. [1] https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autotbats + """ + + _tags = { + # packaging info + # -------------- + # "authors": ["yarnabrina"], + # "maintainers": ["yarnabrina"], + # "python_dependencies": "statsforecast" + # inherited from _GeneralisedStatsForecastAdapter + # estimator type + # -------------- + "ignores-exogeneous-X": True, + "capability:pred_int": True, + "capability:pred_int:insample": True, + "python_dependencies": ["statsforecast>=1.7.2"], + } + + def __init__( + self, + seasonal_periods: Union[int, List[int]], + use_boxcox: Optional[bool] = None, + use_trend: Optional[bool] = None, + use_damped_trend: Optional[bool] = None, + use_arma_errors: bool = True, + ): + self.seasonal_periods = seasonal_periods + self.use_boxcox = use_boxcox + self.use_trend = use_trend + self.use_damped_trend = use_damped_trend + self.use_arma_errors = use_arma_errors + + super().__init__() + + def _get_statsforecast_class(self): + """Create underlying forecaster instance.""" + from statsforecast.models import AutoTBATS + + return AutoTBATS + + def _get_statsforecast_params(self) -> dict: + return { + "seasonal_periods": self.seasonal_periods, + "use_boxcox": self.use_boxcox, + "use_trend": self.use_trend, + "use_damped_trend": self.use_damped_trend, + "use_arma_errors": self.use_arma_errors, + } + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are currently no reserved values for forecasters. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + del parameter_set # to avoid being detected as unused by `vulture` etc. + + params = [{"seasonal_periods": 3}, {"seasonal_periods": [3, 12]}] + + return params + + class StatsForecastMSTL(_GeneralisedStatsForecastAdapter): """StatsForecast Multiple Seasonal-Trend decomposition using LOESS model. - This implementation is a wrapper over Nixtla implementation in - statsforecast [1]_. + Direct interface to ``statsforecast.models.MSTL``, + from ``statsforecast`` [1]_ by Nixtla, with a back-adapter that allows + to use ``sktime`` forecasters as trend forecasters. The MSTL (Multiple Seasonal-Trend decomposition using LOESS) decomposes the time series in multiple seasonalities using LOESS. Then forecasts the trend using @@ -619,10 +775,17 @@ class StatsForecastMSTL(_GeneralisedStatsForecastAdapter): """ _tags = { + # packaging info + # -------------- + "authors": "luca-miniati", + "maintainers": "luca-miniati", + # "python_dependencies": "statsforecast" + # inherited from _GeneralisedStatsForecastAdapter + # estimator type + # -------------- "ignores-exogeneous-X": True, "capability:pred_int": True, "capability:pred_int:insample": True, - "python_dependencies": ["statsforecast"], } def __init__( diff --git a/sktime/forecasting/stream/_update.py b/sktime/forecasting/stream/_update.py index 43c4b2eb5a3..23c6f55da9b 100644 --- a/sktime/forecasting/stream/_update.py +++ b/sktime/forecasting/stream/_update.py @@ -9,13 +9,6 @@ from sktime.datatypes._utilities import get_window from sktime.forecasting.base._delegate import _DelegatedForecaster -# prepare tags to clone - exceptions are TAGS_TO_KEEP -TAGS_TO_KEEP = ["fit_is_empty", "X_inner_mtype", "y_inner_mtype"] -# fit must be executed to fit the wrapped estimator and remember the cutoff -# mtype tags are set so X/y is passed through, conversions happen in wrapped estimator -TAGS_TO_CLONE = _DelegatedForecaster().get_tags().keys() -TAGS_TO_CLONE = list(set(TAGS_TO_CLONE).difference(TAGS_TO_KEEP)) - class UpdateRefitsEvery(_DelegatedForecaster): """Refits periodically when update is called. @@ -50,6 +43,7 @@ class UpdateRefitsEvery(_DelegatedForecaster): _delegate_name = "forecaster_" _tags = { + "authors": "fkiraly", "fit_is_empty": False, "requires-fh-in-fit": False, "y_inner_mtype": ALL_TIME_SERIES_MTYPES, @@ -68,7 +62,8 @@ def __init__( super().__init__() - self.clone_tags(forecaster, TAGS_TO_CLONE) + self._set_delegated_tags(self.forecaster_) + self.set_tags(**{"fit_is_empty": False}) def _fit(self, y, X, fh): """Fit forecaster to training data. @@ -235,6 +230,7 @@ class UpdateEvery(_DelegatedForecaster): _delegate_name = "forecaster_" _tags = { + "authors": "fkiraly", "fit_is_empty": False, "requires-fh-in-fit": False, "y_inner_mtype": ALL_TIME_SERIES_MTYPES, @@ -249,7 +245,8 @@ def __init__(self, forecaster, update_interval=None): super().__init__() - self.clone_tags(forecaster, TAGS_TO_KEEP) + self._set_delegated_tags(self.forecaster_) + self.set_tags(**{"fit_is_empty": False}) def _fit(self, y, X, fh): """Fit forecaster to training data. @@ -400,6 +397,7 @@ class DontUpdate(_DelegatedForecaster): _delegate_name = "forecaster_" _tags = { + "authors": "fkiraly", "fit_is_empty": False, "requires-fh-in-fit": False, "y_inner_mtype": ALL_TIME_SERIES_MTYPES, @@ -412,7 +410,8 @@ def __init__(self, forecaster): super().__init__() - self.clone_tags(forecaster, TAGS_TO_CLONE) + self._set_delegated_tags(self.forecaster_) + self.set_tags(**{"fit_is_empty": False}) def _update(self, y, X=None, update_params=True): """Update time series to incremental training data. diff --git a/sktime/forecasting/structural.py b/sktime/forecasting/structural.py index fe57dc545fa..bdf74b3e21f 100644 --- a/sktime/forecasting/structural.py +++ b/sktime/forecasting/structural.py @@ -198,6 +198,13 @@ class UnobservedComponents(_StatsModelsAdapter): """ _tags = { + # packaging info + # -------------- + "authors": ["juanitorduz"], + "maintainers": ["juanitorduz"], + # python_dependencies: "statsmodels" - inherited from _StatsModelsAdapter + # estimator type + # -------------- "capability:pred_int": True, "capability:pred_int:insample": True, "handles-missing-data": False, diff --git a/sktime/forecasting/tbats.py b/sktime/forecasting/tbats.py index 9477bf76b0d..79fbaca851f 100644 --- a/sktime/forecasting/tbats.py +++ b/sktime/forecasting/tbats.py @@ -79,6 +79,7 @@ class TBATS(_TbatsAdapter): See Also -------- BATS + StatsForecastAutoTBATS References ---------- @@ -134,7 +135,7 @@ def get_test_params(cls, parameter_set="default"): ------- params : dict or list of dict """ - params = { + params1 = { "use_box_cox": False, "use_trend": False, "use_damped_trend": False, @@ -142,4 +143,12 @@ def get_test_params(cls, parameter_set="default"): "use_arma_errors": False, "n_jobs": 1, } - return params + params2 = { + "use_box_cox": False, + "use_trend": True, + "use_damped_trend": True, + "sp": [], + "use_arma_errors": True, + "n_jobs": 2, + } + return [params1, params2] diff --git a/sktime/forecasting/tests/test_all_forecasters.py b/sktime/forecasting/tests/test_all_forecasters.py index 8fe750a3f41..6b79d547673 100644 --- a/sktime/forecasting/tests/test_all_forecasters.py +++ b/sktime/forecasting/tests/test_all_forecasters.py @@ -13,6 +13,7 @@ from sktime.datatypes._utilities import get_cutoff from sktime.exceptions import NotFittedError from sktime.forecasting.base._delegate import _DelegatedForecaster +from sktime.forecasting.base._fh import ForecastingHorizon from sktime.forecasting.tests._config import ( TEST_ALPHAS, TEST_FHS, @@ -350,7 +351,11 @@ def _check_predict_intervals(self, pred_ints, y_train, fh, coverage): """Check expected interval prediction output.""" # check expected type valid, msg, _ = check_is_mtype( - pred_ints, mtype="pred_interval", scitype="Proba", return_metadata=True + pred_ints, + mtype="pred_interval", + scitype="Proba", + return_metadata=True, + msg_return_dict="list", ) # type: ignore assert valid, msg @@ -379,7 +384,7 @@ def get_expected_columns(): found = pred_ints.columns.to_flat_index() msg = ( - "columns of returned prediction interval DataFrame do not" + "columns of returned prediction interval DataFrame do not " f"match up with expected columns. Expected: {expected}," f"found: {found}" ) @@ -436,6 +441,7 @@ def _check_predict_quantiles(self, pred_quantiles, y_train, fh, alpha): mtype="pred_quantiles", scitype="Proba", return_metadata=True, + msg_return_dict="list", ) # type: ignore assert valid, msg @@ -830,7 +836,7 @@ def test_hierarchical_with_exogeneous(self, estimator_instance, n_columns): y_pred = estimator_instance.predict(X=X_test) assert isinstance(y_pred, pd.DataFrame) - assert check_is_mtype(y_pred, "pd_multiindex_hier") + assert check_is_mtype(y_pred, "pd_multiindex_hier", msg_return_dict="list") msg = ( "returned columns after predict are not as expected. " f"expected: {y_train.columns}. Found: {y_pred.columns}" @@ -851,7 +857,9 @@ def test_hierarchical_with_exogeneous(self, estimator_instance, n_columns): y_pred_int = estimator_instance.predict_interval(X=X_test) assert isinstance(y_pred_int, pd.DataFrame) - assert check_is_mtype(y_pred_int, "pd_multiindex_hier") + assert check_is_mtype( + y_pred_int, "pd_multiindex_hier", msg_return_dict="list" + ) if len(y_pred_int.index) == len(X_test.index): assert np.all(y_pred_int.index == X_test.index) @@ -861,9 +869,32 @@ def test_hierarchical_with_exogeneous(self, estimator_instance, n_columns): y_pred_q = estimator_instance.predict_quantiles(X=X_test) assert isinstance(y_pred_q, pd.DataFrame) - assert check_is_mtype(y_pred_q, "pd_multiindex_hier") + assert check_is_mtype( + y_pred_q, "pd_multiindex_hier", msg_return_dict="list" + ) if len(y_pred_q.index) == len(X_test.index): assert np.all(y_pred_q.index == X_test.index) else: assert set(X_test.index).issubset(y_pred_q.index) + + def test_fit_predict(self, estimator_instance, n_columns): + """Check fit_predict method against interface expectations. + + Does not check directly against fit and predict, as either may + be stochastic and not return the same result each time. + """ + y = _make_series(n_columns=n_columns) + X = _make_series(n_columns=3) + + fh = ForecastingHorizon([1, 2, 3]) + + y_train, _, X_train, X_test = temporal_train_test_split(y, X, fh=fh) + + y_pred = estimator_instance.fit_predict( + y=y_train, X=X_train, fh=fh, X_pred=X_test + ) + + cutoff = get_cutoff(y_train, return_index=True) + _assert_correct_pred_time_index(y_pred.index, cutoff, fh) + _assert_correct_columns(y_pred, y_train) diff --git a/sktime/forecasting/tests/test_conformal.py b/sktime/forecasting/tests/test_conformal.py index a371cc6d634..dc84aeda778 100644 --- a/sktime/forecasting/tests/test_conformal.py +++ b/sktime/forecasting/tests/test_conformal.py @@ -23,7 +23,7 @@ def test_conformal_standard(): conformal_forecaster.fit(y, fh=[1, 2, 3]) pred_int = conformal_forecaster.predict_interval() - assert check_is_mtype(pred_int, "pred_interval", "Proba") + assert check_is_mtype(pred_int, "pred_interval", "Proba", msg_return_dict="list") @pytest.mark.skipif( @@ -60,7 +60,9 @@ def test_conformal_with_gscv(): y_pred_quantiles = gscv_with_conformal.predict_quantiles() - assert check_is_mtype(y_pred_quantiles, "pred_quantiles", "Proba") + assert check_is_mtype( + y_pred_quantiles, "pred_quantiles", "Proba", msg_return_dict="list" + ) @pytest.mark.skipif( diff --git a/sktime/forecasting/tests/test_exp_smoothing.py b/sktime/forecasting/tests/test_exp_smoothing.py index fe1a2cf21d4..2d2633faac8 100644 --- a/sktime/forecasting/tests/test_exp_smoothing.py +++ b/sktime/forecasting/tests/test_exp_smoothing.py @@ -1,11 +1,13 @@ """Test exponential smoothing forecasters.""" -__author__ = ["mloning", "big-o"] +__author__ = ["mloning", "big-o", "ciaran-g"] __all__ = ["test_set_params"] +import pandas as pd import pytest from numpy.testing import assert_array_equal +from sktime.datasets import load_airline from sktime.forecasting.exp_smoothing import ExponentialSmoothing from sktime.split import temporal_train_test_split from sktime.utils._testing.forecasting import make_forecasting_problem @@ -35,3 +37,23 @@ def test_set_params(): y_pred = f.predict() assert_array_equal(y_pred, expected) + + +@pytest.mark.skipif( + not _check_soft_dependencies("statsmodels", severity="none"), + reason="skip test if required soft dependency not available", +) +def check_panel_expsmooth(): + """Test exponential smoothing on panel data with datetime index.""" + # make panel with hour of day panel and datetime index + y = load_airline() + y.index = pd.date_range(start="1960-01-01", periods=len(y.index), freq="H") + y.index.names = ["datetime"] + y.name = "passengers" + y = y.to_frame() + y["hour_of_day"] = y.index.hour + y = y.reset_index().set_index(["hour_of_day", "datetime"]).sort_index() + + forecaster = ExponentialSmoothing(trend="add", sp=1) + forecaster.fit(y) + forecaster.predict(fh=[1, 3]) diff --git a/sktime/forecasting/tests/test_naive.py b/sktime/forecasting/tests/test_naive.py index ef5f37af02a..c003e18bfb8 100644 --- a/sktime/forecasting/tests/test_naive.py +++ b/sktime/forecasting/tests/test_naive.py @@ -213,7 +213,7 @@ def test_strategy_mean_and_last_seasonal_additional_combinations( # For selected cases, remove a redundant data point by making it NaN if window_length > sp: # create a trailing NaN value in the training set - data[window_length - 1] = np.nan + data.iloc[window_length - 1] = np.nan # Split into train and test data train_data = data[:window_length] diff --git a/sktime/forecasting/tests/test_prophet.py b/sktime/forecasting/tests/test_prophet.py index e1cc1012791..82ca8d25279 100644 --- a/sktime/forecasting/tests/test_prophet.py +++ b/sktime/forecasting/tests/test_prophet.py @@ -3,7 +3,9 @@ # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) """ -__author__ = ["fkiraly"] +__author__ = ["fkiraly", "tpvasconcelos"] + +from unittest import mock import pandas as pd import pytest @@ -77,3 +79,28 @@ def test_prophet_period_fh(convert_to_datetime): else: assert isinstance(y_pred.index, pd.PeriodIndex) assert (y_pred.index == fh_index).all() + + +@pytest.mark.skipif( + not run_test_for_class(Prophet), + reason="run test only if softdeps are present and incrementally (if requested)", +) +@pytest.mark.parametrize( + "fit_kwargs", [None, {"foo": "bar"}, {"foo1": "bar1", "foo2": "bar2"}] +) +def test_prophet_fit_kwargs_are_passed_down(fit_kwargs: dict): + """Test that the `fit_kwargs` hyperparameter is passed down to Prophet.fit().""" + from sktime.datasets import load_airline + from sktime.forecasting.fbprophet import Prophet + + y = load_airline() + with mock.patch("prophet.forecaster.Prophet.fit") as mock_fit: + forecaster = Prophet(fit_kwargs=fit_kwargs) + forecaster.fit(y) + mock_fit.assert_called_once() + assert mock_fit.call_args.args == () + call_kwargs = mock_fit.call_args.kwargs + # `df` should always be one of the arguments but + # we don't care about its actual value here + call_kwargs.pop("df") + assert call_kwargs == (fit_kwargs or {}) diff --git a/sktime/forecasting/tests/test_statsforecast.py b/sktime/forecasting/tests/test_statsforecast.py new file mode 100644 index 00000000000..106230898e9 --- /dev/null +++ b/sktime/forecasting/tests/test_statsforecast.py @@ -0,0 +1,43 @@ +"""Tests for StatsForecast. + +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +""" + +from unittest.mock import patch + +import numpy as np +import pandas as pd +import pytest + +from sktime.forecasting.base import ForecastingHorizon +from sktime.forecasting.statsforecast import StatsForecastMSTL +from sktime.tests.test_switch import run_test_for_class + + +@pytest.mark.skipif( + not run_test_for_class(StatsForecastMSTL), + reason="run test only if softdeps are present and incrementally (if requested)", +) +@patch("statsforecast.models.AutoETS", autospec=True) +def test_statsforecast_mstl(mock_autoets): + """ + Check that StatsForecast MSTL adapter calls trend forecaster with + the correct arguments. + """ + from sktime.datasets import load_airline + + y = load_airline() + + predict = mock_autoets.return_value.predict + predict.return_value = { + "mean": np.arange(36, dtype=np.float64), + "lo-95.0": np.arange(36, dtype=np.float64), + "hi-95.0": np.arange(36, dtype=np.float64), + } + + model = StatsForecastMSTL(season_length=[12]) + model.fit(y) + fh_index = pd.PeriodIndex(pd.date_range("1961-01", periods=36, freq="M")) + fh = ForecastingHorizon(fh_index, is_relative=False) + model.predict_interval(fh, coverage=0.95) + predict.assert_called_with(36, X=None, level=[95.0]) diff --git a/sktime/forecasting/tests/test_theta.py b/sktime/forecasting/tests/test_theta.py index f6b6685b15b..a31a22fd3de 100644 --- a/sktime/forecasting/tests/test_theta.py +++ b/sktime/forecasting/tests/test_theta.py @@ -1,9 +1,10 @@ """Tests for ThetaForecaster.""" # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) -__author__ = ["big-o", "kejsitake"] +__author__ = ["big-o", "kejsitake", "ciaran-g"] import numpy as np +import pandas as pd import pytest from sktime.datasets import load_airline @@ -121,3 +122,24 @@ def test_theta_and_thetamodular(): y_pred_thetamodular = f1.predict(fh=fh) np.testing.assert_allclose(y_pred_theta, y_pred_thetamodular, rtol=0.06) + + +@pytest.mark.skipif( + not _check_soft_dependencies("statsmodels", severity="none"), + reason="skip test if required soft dependency not available", +) +def check_panel_theta_quantiles(): + """Test predict quantiles with theta on panel data with datetime index.""" + # make panel with hour of day panel and datetime index + y = load_airline() + y.index = pd.date_range(start="1960-01-01", periods=len(y.index), freq="H") + y.index.names = ["datetime"] + y.name = "passengers" + y = y.to_frame() + y["hour_of_day"] = y.index.hour + y = y.reset_index().set_index(["hour_of_day", "datetime"]).sort_index() + + forecaster = ThetaForecaster(sp=1) + forecaster.fit(y) + forecaster.predict(fh=[1, 3]) + forecaster.predict_quantiles(fh=[1, 3], alpha=[0.1, 0.5, 0.9]) diff --git a/sktime/forecasting/theta.py b/sktime/forecasting/theta.py index ce00bf451c4..6f7460579b2 100644 --- a/sktime/forecasting/theta.py +++ b/sktime/forecasting/theta.py @@ -89,7 +89,13 @@ class ThetaForecaster(ExponentialSmoothing): _fitted_param_names = ("initial_level", "smoothing_level") _tags = { + # packaging info + # -------------- + "authors": ["big-o", "mloning", "kejsitake", "fkiraly", "GuzalBulatova"], "scitype:y": "univariate", + # "python_dependencies": "statsmodels" - inherited from _StatsModelsAdapter + # estimator type + # -------------- "ignores-exogeneous-X": True, "capability:pred_int": True, "capability:pred_int:insample": True, @@ -411,6 +417,7 @@ class ThetaModularForecaster(BaseForecaster): """ _tags = { + "authors": ["GuzalBulatova", "fkiraly"], "univariate-only": False, "y_inner_mtype": "pd.Series", "requires-fh-in-fit": False, diff --git a/sktime/forecasting/trend/__init__.py b/sktime/forecasting/trend/__init__.py index f1df869127b..70703a83205 100644 --- a/sktime/forecasting/trend/__init__.py +++ b/sktime/forecasting/trend/__init__.py @@ -7,11 +7,15 @@ "PolynomialTrendForecaster", "STLForecaster", "CurveFitForecaster", + "ProphetPiecewiseLinearTrendForecaster", ] from sktime.forecasting.trend._curve_fit_forecaster import CurveFitForecaster from sktime.forecasting.trend._polynomial_trend_forecaster import ( PolynomialTrendForecaster, ) +from sktime.forecasting.trend._pwl_trend_forecaster import ( + ProphetPiecewiseLinearTrendForecaster, +) from sktime.forecasting.trend._stl_forecaster import STLForecaster from sktime.forecasting.trend._trend_forecaster import TrendForecaster diff --git a/sktime/forecasting/trend/_curve_fit_forecaster.py b/sktime/forecasting/trend/_curve_fit_forecaster.py index 741b6d13298..0d7803e9846 100644 --- a/sktime/forecasting/trend/_curve_fit_forecaster.py +++ b/sktime/forecasting/trend/_curve_fit_forecaster.py @@ -74,6 +74,8 @@ class CurveFitForecaster(BaseForecaster): """ _tags = { + "authors": ["benheid"], + "maintainers": ["benheid"], "scitype:y": "univariate", "y_inner_mtype": "pd.Series", "X_inner_mtype": "pd.DataFrame", diff --git a/sktime/forecasting/trend/_polynomial_trend_forecaster.py b/sktime/forecasting/trend/_polynomial_trend_forecaster.py index 2da6bc927de..fcafac102ed 100644 --- a/sktime/forecasting/trend/_polynomial_trend_forecaster.py +++ b/sktime/forecasting/trend/_polynomial_trend_forecaster.py @@ -18,28 +18,30 @@ class PolynomialTrendForecaster(BaseForecaster): r"""Forecast time series data with a polynomial trend. - Uses a `sklearn` regressor specified by the `regressor` parameter + Uses an ``sklearn`` regressor specified by the ``regressor`` parameter to perform regression on time series values against their corresponding indices, after extraction of polynomial features. - Same as `TrendForecaster` where `regressor` is pipelined with transformation step - `PolynomialFeatures(degree, with_intercept)` applied to time index, at the start. + Same as ``TrendForecaster`` where ``regressor`` is pipelined with transformation + step ``PolynomialFeatures(degree, with_intercept)`` applied to time index, + at the start. - In `fit`, for input time series :math:`(v_i, p(t_i)), i = 1, \dots, T`, + In ``fit``, for input time series :math:`(v_i, p(t_i)), i = 1, \dots, T`, where :math:`v_i` are values, :math:`t_i` are time stamps, and :math:`p` is the polynomial feature transform with degree `degree`, and with/without intercept depending on `with_intercept`, - fits an `sklearn` model :math:`v_i = f(p(t_i)) + \epsilon_i`, where `f` is - the model fitted when `regressor.fit` is passed `X` = vector of :math:`p(t_i)`, - and `y` = vector of :math:`v_i`. + fits an `sklearn` model :math:`v_i = f(p(t_i)) + \epsilon_i`, where :math:`f` is + the model fitted when ``regressor.fit`` is passed ``X`` = vector of :math:`p(t_i)`, + and ``y`` = vector of :math:`v_i`. - In `predict`, for a new time point :math:`t_*`, predicts :math:`f(p(t_*))`, - where :math:`f` is the function as fitted above in `fit`, + In ``predict``, for a new time point :math:`t_*`, predicts :math:`f(p(t_*))`, + where :math:`f` is the function as fitted above in ``fit``, and :math:`p` is the same polynomial feature transform as above. - Default for `regressor` is linear regression = `sklearn` `LinearRegression` default. + Default for ``regressor`` is linear regression = ``sklearn`` ``LinearRegression``, + with default parameters. Default for `degree` is 1. If time stamps are `pd.DatetimeIndex`, fitted coefficients are in units - of days since start of 1970. If time stamps are `pd.PeriodIndex`, + of days since start of 1970. If time stamps are ``pd.PeriodIndex``, coefficients are in units of (full) periods since start of 1970. Parameters @@ -54,6 +56,14 @@ class PolynomialTrendForecaster(BaseForecaster): zero. (i.e. a column of ones, acts as an intercept term in a linear model) + Attributes + ---------- + regressor_ : sklearn regressor estimator object + The fitted regressor object. + This is a fitted ``sklearn`` pipeline with steps + ``PolynomialFeatures(degree, with_intercept)``, + followed by a clone of ``regressor``. + Examples -------- >>> from sktime.datasets import load_airline @@ -66,6 +76,8 @@ class PolynomialTrendForecaster(BaseForecaster): """ _tags = { + "authors": ["tensorflow-as-tf", "mloning", "aiwalter", "fkiraly"], + "maintainers": ["tensorflow-as-tf"], "ignores-exogeneous-X": True, "requires-fh-in-fit": False, "handles-missing-data": False, diff --git a/sktime/forecasting/trend/_pwl_trend_forecaster.py b/sktime/forecasting/trend/_pwl_trend_forecaster.py new file mode 100644 index 00000000000..e5620f2747c --- /dev/null +++ b/sktime/forecasting/trend/_pwl_trend_forecaster.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 -u +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Implements a piecewise linear trend forecaster by wrapping fbprophet.""" + +__author__ = ["sbuse"] + +import pandas as pd + +from sktime.forecasting.base._base import DEFAULT_ALPHA +from sktime.forecasting.base.adapters import _ProphetAdapter + + +class ProphetPiecewiseLinearTrendForecaster(_ProphetAdapter): + """ + Forecast time series data with a piecewise linear trend, fitted via prophet. + + The forecaster uses Facebook's prophet algorithm [1]_ and extracts the piecewise + linear trend from it. Only hyper-parameters relevant for the trend modelling are + exposed via the constructor. + + Seasonalities are set to additive and "auto" detection in prophet, + which means that yearly, weekly and daily seasonality are automatically detected, + and included in the model if present, using prophet's default settings. + + For more granular control of components or seasonality, use + ``sktime.forecasting.fbprophet.Prophet`` directly. + + Data can be passed in one of the sktime compatible formats, + naming a column `ds` such as in the prophet package is not necessary. + + Unlike vanilla `prophet`, also supports integer/range and period index: + * integer/range index is interpreted as days since Jan 1, 2000 + * `PeriodIndex` is converted using the `pandas` method `to_timestamp` + + Parameters + ---------- + changepoints: list or None, default=None + List of dates at which to include potential changepoints. If + not specified, potential changepoints are selected automatically. + n_changepoints: int, default=25 + Number of potential changepoints to include. Not used + if input `changepoints` is supplied. If `changepoints` is not supplied, + then n_changepoints potential changepoints are selected uniformly from + the first `changepoint_range` proportion of the history. + changepoint_range: float, default=0.8 + Proportion of history in which trend changepoints will + be estimated. Defaults to 0.8 for the first 80%. Not used if + `changepoints` is specified. + changepoint_prior_scale: float, default=0.05 + Parameter modulating the flexibility of the + automatic changepoint selection. Large values will allow many + changepoints, small values will allow few changepoints. + Recommended to take values within [0.001,0.5]. + + References + ---------- + .. [1] https://facebook.github.io/prophet + + Examples + -------- + >>> from sktime.datasets import load_airline + >>> from sktime.forecasting.trend import ProphetPiecewiseLinearTrendForecaster + >>> from sktime.forecasting.base import ForecastingHorizon + >>> from sktime.split import temporal_train_test_split + >>> y =load_airline().to_timestamp(freq='M') + >>> y_train, y_test = temporal_train_test_split(y) + >>> fh = ForecastingHorizon(y.index, is_relative=False) + >>> forecaster = ProphetPiecewiseLinearTrendForecaster() # doctest: +SKIP + >>> forecaster.fit(y_train) # doctest: +SKIP + ProphetPiecewiseLinearTrendForecaster(...) + >>> y_pred = forecaster.predict(fh) # doctest: +SKIP + """ + + _tags = { + "authors": ["sbuse"], + "maintainers": ["sbuse"], + "scitype:y": "univariate", + "y_inner_mtype": "pd.DataFrame", + "X_inner_mtype": "pd.DataFrame", + "ignores-exogeneous-X": True, + "requires-fh-in-fit": False, + "python_dependencies": "prophet", + } + + def __init__( + self, + changepoints=None, + n_changepoints=25, + changepoint_range=0.8, + changepoint_prior_scale=0.05, + verbose=0, + ): + self.freq = None + self.add_seasonality = None + self.add_country_holidays = None + self.growth = "linear" + self.growth_floor = 0.0 + self.growth_cap = None + self.changepoints = changepoints + self.n_changepoints = n_changepoints + self.changepoint_range = changepoint_range + self.yearly_seasonality = "auto" + self.weekly_seasonality = "auto" + self.daily_seasonality = "auto" + self.holidays = None + self.seasonality_mode = "additive" + self.seasonality_prior_scale = 10.0 + self.changepoint_prior_scale = changepoint_prior_scale + self.holidays_prior_scale = 10.0 + self.mcmc_samples = 0 + self.alpha = DEFAULT_ALPHA + self.uncertainty_samples = 1000 + self.stan_backend = None + self.verbose = verbose + + super().__init__() + + # import inside method to avoid hard dependency + from prophet.forecaster import Prophet as _Prophet + + self._ModelClass = _Prophet + + def _instantiate_model(self): + self._forecaster = self._ModelClass( + growth=self.growth, + changepoints=self.changepoints, + n_changepoints=self.n_changepoints, + changepoint_range=self.changepoint_range, + yearly_seasonality=self.yearly_seasonality, + weekly_seasonality=self.weekly_seasonality, + daily_seasonality=self.daily_seasonality, + holidays=self.holidays, + seasonality_mode=self.seasonality_mode, + seasonality_prior_scale=float(self.seasonality_prior_scale), + holidays_prior_scale=float(self.holidays_prior_scale), + changepoint_prior_scale=float(self.changepoint_prior_scale), + mcmc_samples=self.mcmc_samples, + interval_width=1 - self.alpha, + uncertainty_samples=self.uncertainty_samples, + stan_backend=self.stan_backend, + ) + return self + + # _fit is defined in the superclass and is fine as it is. + + def _predict(self, fh, X=None): + """Forecast time series trend at future horizon. + + private _predict containing the core logic, called from predict + + State required: + Requires state to be "fitted". + + Accesses in self: + Fitted model attributes ending in "_" + self.cutoff + + Parameters + ---------- + fh : guaranteed to be ForecastingHorizon or None, optional (default=None) + The forecasting horizon with the steps ahead to to predict. + X : pd.DataFrame, optional (default=None) + Exogenous time series + + Returns + ------- + y_pred : pd.DataFrame + Point predictions + """ + fh = self._get_prophet_fh() + future = pd.DataFrame({"ds": fh}, index=fh) + + out = self._forecaster.setup_dataframe(future.copy()) + out["trend"] = self._forecaster.predict_trend(out) + + y_pred = out.loc[:, "trend"] + y_pred.index = future.index + + if isinstance(self._y.columns[0], str): + y_pred.name = self._y.columns[0] + else: + y_pred.name = None + + if self.y_index_was_int_ or self.y_index_was_period_: + y_pred.index = self.fh.to_absolute_index(cutoff=self.cutoff) + + return y_pred + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + + Returns + ------- + params : dict or list of dict + """ + params0 = { + "changepoint_range": 0.8, + "changepoint_prior_scale": 0.05, + } + + return params0 diff --git a/sktime/forecasting/trend/_stl_forecaster.py b/sktime/forecasting/trend/_stl_forecaster.py index 41843579626..2b7dc4ad747 100644 --- a/sktime/forecasting/trend/_stl_forecaster.py +++ b/sktime/forecasting/trend/_stl_forecaster.py @@ -134,6 +134,8 @@ class STLForecaster(BaseForecaster): """ _tags = { + "authors": ["tensorflow-as-tf", "mloning", "aiwalter", "fkiraly"], + "maintainers": ["tensorflow-as-tf"], "scitype:y": "univariate", # which y are fine? univariate/multivariate/both "ignores-exogeneous-X": False, # does estimator ignore the exogeneous X? "handles-missing-data": False, # can estimator handle missing data? diff --git a/sktime/forecasting/trend/_trend_forecaster.py b/sktime/forecasting/trend/_trend_forecaster.py index 0377e821b20..c0fe92d8dc6 100644 --- a/sktime/forecasting/trend/_trend_forecaster.py +++ b/sktime/forecasting/trend/_trend_forecaster.py @@ -16,23 +16,24 @@ class TrendForecaster(BaseForecaster): r"""Trend based forecasts of time series data, regressing values on index. - Uses a `sklearn` regressor specified by the `regressor` parameter + Uses an ``sklearn`` regressor specified by the ``regressor`` parameter to perform regression on time series values against their corresponding indices, providing trend-based forecasts: - In `fit`, for input time series :math:`(v_i, t_i), i = 1, \dots, T`, + In ``fit``, for input time series :math:`(v_i, t_i), i = 1, \dots, T`, where :math:`v_i` are values and :math:`t_i` are time stamps, - fits an `sklearn` model :math:`v_i = f(t_i) + \epsilon_i`, where `f` is - the model fitted when `regressor.fit` is passed `X` = vector of :math:`t_i`, - and `y` = vector of :math:`v_i`. + fits an `sklearn` model :math:`v_i = f(t_i) + \epsilon_i`, where :math:`f` is + the model fitted when ``regressor.fit`` is passed ``X`` = vector of :math:`t_i`, + and ``y`` = vector of :math:`v_i`. - In `predict`, for a new time point :math:`t_*`, predicts :math:`f(t_*)`, - where :math:`f` is the function as fitted above in `fit`. + In ``predict``, for a new time point :math:`t_*`, predicts :math:`f(t_*)`, + where :math:`f` is the function as fitted above in ``fit``. - Default for `regressor` is linear regression = `sklearn` `LinearRegression` default. + Default for ``regressor`` is linear regression = ``sklearn`` ``LinearRegression``, + with default parameters. - If time stamps are `pd.DatetimeIndex`, fitted coefficients are in units - of days since start of 1970. If time stamps are `pd.PeriodIndex`, + If time stamps are ``pd.DatetimeIndex``, fitted coefficients are in units + of days since start of 1970. If time stamps are ``pd.PeriodIndex``, coefficients are in units of (full) periods since start of 1970. Parameters @@ -41,6 +42,11 @@ class TrendForecaster(BaseForecaster): Define the regression model type. If not set, will default to sklearn.linear_model.LinearRegression + Attributes + ---------- + regressor_ : sklearn regressor estimator object + The fitted regressor object. Clone of ``regressor``. + Examples -------- >>> from sktime.datasets import load_airline @@ -53,6 +59,8 @@ class TrendForecaster(BaseForecaster): """ _tags = { + "authors": ["tensorflow-as-tf", "mloning", "aiwalter", "fkiraly"], + "maintainers": ["tensorflow-as-tf"], "ignores-exogeneous-X": True, "requires-fh-in-fit": False, "handles-missing-data": False, diff --git a/sktime/forecasting/trend/tests/test_pwl_trend.py b/sktime/forecasting/trend/tests/test_pwl_trend.py new file mode 100644 index 00000000000..01275098c47 --- /dev/null +++ b/sktime/forecasting/trend/tests/test_pwl_trend.py @@ -0,0 +1,130 @@ +"""Test piecewise linear trend forecasters. + +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +""" + +__author__ = ["sbuse"] + +import numpy as np +import pandas as pd +import pytest + +from sktime.datasets import load_airline +from sktime.forecasting.base import ForecastingHorizon +from sktime.forecasting.trend import ( + PolynomialTrendForecaster, + ProphetPiecewiseLinearTrendForecaster, +) +from sktime.split import temporal_train_test_split +from sktime.tests.test_switch import run_test_for_class + + +@pytest.mark.skipif( + not run_test_for_class(ProphetPiecewiseLinearTrendForecaster), + reason="run test only if softdeps are present and incrementally (if requested)", +) +def test_for_changes_in_original(): + """Check if the original prophet implementation returns the same result as the + sktime wrapper for the airline dataset. + + Raises + ------ + AssertionError - if the predictions are not exactly the same + """ + from prophet import Prophet + + from sktime.forecasting.fbprophet import Prophet as skProphet + + y = load_airline().to_timestamp(freq="M") + + # ------original Prophet--------- + prophet = Prophet() + prophet.fit(pd.DataFrame(data={"ds": y.index, "y": y.values})) + future = prophet.make_future_dataframe(periods=12, freq="M", include_history=False) + forecast = prophet.predict(future)[["ds", "yhat"]] + y_pred_original = forecast["yhat"] + y_pred_original.index = forecast["ds"].values + + # ------sktime Prophet----------- + skprophet = skProphet() + y_pred_sktime = skprophet.fit_predict(y, fh=np.arange(1, 13)) + + np.testing.assert_array_equal(y_pred_original.values, y_pred_sktime.values) # exact + + +@pytest.mark.skipif( + not run_test_for_class(ProphetPiecewiseLinearTrendForecaster), + reason="run test only if softdeps are present and incrementally (if requested)", +) +def test_pred_errors_against_linear(): + """Check prediction performance on airline dataset. + + For a small value of changepoint_prior_scale like 0.001 the + ProphetPiecewiseLinearTrendForecaster must return a single straight trendline. + + Raises + ------ + AssertionError - if the trend forecast is not compatible with a linear trend. + """ + y = load_airline().to_timestamp(freq="M") + fh = ForecastingHorizon(y.index, is_relative=False) + + pwl = ProphetPiecewiseLinearTrendForecaster(changepoint_prior_scale=0.001) + y_pred_pwl = pwl.fit(y).predict(fh) + + linear = PolynomialTrendForecaster(degree=1) + y_pred_linear = linear.fit(y).predict(fh) + + np.testing.assert_allclose(y_pred_pwl, y_pred_linear, rtol=0.04) + + +@pytest.mark.skipif( + not run_test_for_class(ProphetPiecewiseLinearTrendForecaster), + reason="run test only if softdeps are present and incrementally (if requested)", +) +def test_pred_with_explicit_changepoints(): + """Check functionality with explicit changepoints. + + When changepoints are passed to the ProphetPiecewiseLinearTrendForecaster + the prediction has to be different then the automatic detection because the + changepoints are forcefully added. + + Raises + ------ + AssertionError - if adding a changepoint has no effect on the trend prediction. + """ + y = load_airline().to_timestamp(freq="M") + y_train, y_test = temporal_train_test_split(y) + + fh = ForecastingHorizon(y_test.index, is_relative=False) + a = ProphetPiecewiseLinearTrendForecaster(changepoints=["1953-05-31"]) + b = ProphetPiecewiseLinearTrendForecaster() + + slope_a = a.fit(y_train).predict(fh).diff().mean() + slope_b = b.fit(y_train).predict(fh).diff().mean() + + assert not np.allclose(slope_a, slope_b, rtol=0.1) + + +@pytest.mark.skipif( + not run_test_for_class(ProphetPiecewiseLinearTrendForecaster), + reason="run test only if softdeps are present and incrementally (if requested)", +) +@pytest.mark.parametrize("indextype", ["range", "period"]) +def test_pwl_trend_nonnative_index(indextype): + """Check pwl detrend with RangeIndex and PeriodIndex.""" + y = pd.DataFrame({"a": [1, 2, 3, 4]}) + + if indextype == "period": + y.index = pd.period_range("2000-01-01", periods=4) + + fh = [1, 2] + + f = ProphetPiecewiseLinearTrendForecaster() + f.fit(y) + y_pred = f.predict(fh=fh) + + if indextype == "range": + assert pd.api.types.is_integer_dtype(y_pred.index) + if indextype == "period": + assert isinstance(y_pred.index, pd.PeriodIndex) diff --git a/sktime/forecasting/var.py b/sktime/forecasting/var.py index dd0dab8dbe3..c370cd6bbfa 100644 --- a/sktime/forecasting/var.py +++ b/sktime/forecasting/var.py @@ -72,6 +72,13 @@ class VAR(_StatsModelsAdapter): _fitted_param_names = ("aic", "fpe", "hqic", "bic") _tags = { + # packaging info + # -------------- + "authors": ["thayeylolu", "aiwalter", "lbventura"], + "maintainers": "lbventura", + # "python_dependencies": "statsmodels" - inherited from _StatsModelsAdapter + # estimator type + # -------------- "scitype:y": "multivariate", "y_inner_mtype": "pd.DataFrame", "requires-fh-in-fit": False, diff --git a/sktime/forecasting/varmax.py b/sktime/forecasting/varmax.py index 91c603cd077..25755f25555 100644 --- a/sktime/forecasting/varmax.py +++ b/sktime/forecasting/varmax.py @@ -206,6 +206,12 @@ class VARMAX(_StatsModelsAdapter): """ _tags = { + # packaging info + # -------------- + "authors": "KatieBuc", + # "python_dependencies": "statsmodels" - inherited from _StatsModelsAdapter + # estimator type + # -------------- "scitype:y": "multivariate", "ignores-exogeneous-X": False, "handles-missing-data": False, diff --git a/sktime/forecasting/vecm.py b/sktime/forecasting/vecm.py index 4b4c55b9ab6..de59d51eb3b 100644 --- a/sktime/forecasting/vecm.py +++ b/sktime/forecasting/vecm.py @@ -77,6 +77,12 @@ class VECM(_StatsModelsAdapter): """ _tags = { + # packaging info + # -------------- + "authors": ["thayeylolu", "AurumnPegasus"], + # "python_dependencies": "statsmodels" - inherited from _StatsModelsAdapter + # estimator type + # -------------- "scitype:y": "multivariate", "y_inner_mtype": "pd.DataFrame", "X_inner_mtype": "pd.DataFrame", diff --git a/sktime/networks/base.py b/sktime/networks/base.py index 4cf14fff7d9..7d04a18fb84 100644 --- a/sktime/networks/base.py +++ b/sktime/networks/base.py @@ -4,7 +4,15 @@ from abc import ABC, abstractmethod +import numpy as np +import pandas as pd + from sktime.base import BaseObject +from sktime.forecasting.base import BaseForecaster +from sktime.utils.validation._dependencies import _check_soft_dependencies + +if _check_soft_dependencies("torch", severity="none"): + import torch class BaseDeepNetwork(BaseObject, ABC): @@ -27,3 +35,244 @@ def build_network(self, input_shape, **kwargs): output_layer : a keras layer """ ... + + +class BaseDeepNetworkPyTorch(BaseForecaster, ABC): + """Abstract base class for deep learning networks using torch.nn.""" + + _tags = { + "python_dependencies": "torch", + "y_inner_mtype": "pd.DataFrame", + "capability:insample": False, + "capability:pred_int:insample": False, + "scitype:y": "both", + "ignores-exogeneous-X": True, + } + + def __init__( + self, + num_epochs=16, + batch_size=8, + in_channels=1, + individual=False, + criterion_kwargs=None, + optimizer=None, + optimizer_kwargs=None, + lr=0.001, + ): + self.num_epochs = num_epochs + self.batch_size = batch_size + self.in_channels = in_channels + self.individual = individual + self.criterion_kwargs = criterion_kwargs + self.optimizer = optimizer + self.optimizer_kwargs = optimizer_kwargs + self.lr = lr + + super().__init__() + + def _fit(self, y, fh, X=None): + """Fit the network. + + Changes to state: + writes to self._network.state_dict + + Parameters + ---------- + X : iterable-style or map-style dataset + see (https://pytorch.org/docs/stable/data.html) for more information + """ + from sktime.forecasting.base import ForecastingHorizon + + # save fh and y for prediction later + if fh.is_relative: + self._fh = fh + else: + fh = fh.to_relative(self.cutoff) + self._fh = fh + + self._y = y + + if type(fh) is ForecastingHorizon: + self.network = self._build_network(fh._values[-1]) + else: + self.network = self._build_network(fh) + + if self.criterion: + if self.criterion in self.criterions.keys(): + if self.criterion_kwargs: + self._criterion = self.criterions[self.criterion]( + **self.criterion_kwargs + ) + else: + self._criterion = self.criterions[self.criterion]() + else: + raise TypeError( + f"Please pass one of {self.criterions.keys()} for `criterion`." + ) + else: + # default criterion + self._criterion = torch.nn.MSELoss() + + if self.optimizer: + if self.optimizer in self.optimizers.keys(): + if self.optimizer_kwargs: + self._optimizer = self.optimizers[self.optimizer]( + self.network.parameters(), lr=self.lr, **self.optimizer_kwargs + ) + else: + self._optimizer = self.optimizers[self.optimizer]( + self.network.parameters(), lr=self.lr + ) + else: + raise TypeError( + f"Please pass one of {self.optimizers.keys()} for `optimizer`." + ) + else: + # default optimizer + self._optimizer = torch.optim.Adam(self.network.parameters(), lr=self.lr) + + dataloader = self.build_pytorch_train_dataloader(y) + self.network.train() + + for _ in range(self.num_epochs): + for x, y in dataloader: + y_pred = self.network(x) + loss = self._criterion(y_pred, y) + self._optimizer.zero_grad() + loss.backward() + self._optimizer.step() + + def _predict(self, X=None, fh=None): + """Predict with fitted model.""" + from torch import cat + + if fh is None: + fh = self._fh + + if max(fh._values) > self.network.pred_len or min(fh._values) < 0: + raise ValueError( + f"fh of {fh} passed to {self.__class__.__name__} is not " + "within `pred_len`. Please use a fh that aligns with the `pred_len` of " + "the forecaster." + ) + + if X is None: + dataloader = self.build_pytorch_pred_dataloader(self._y, fh) + else: + dataloader = self.build_pytorch_pred_dataloader(X, fh) + + y_pred = [] + for x, _ in dataloader: + y_pred.append(self.network(x).detach()) + y_pred = cat(y_pred, dim=0).view(-1, y_pred[0].shape[-1]).numpy() + y_pred = y_pred[fh._values.values - 1] + y_pred = pd.DataFrame( + y_pred, columns=self._y.columns, index=fh.to_absolute_index(self.cutoff) + ) + + return y_pred + + def build_pytorch_train_dataloader(self, y): + """Build PyTorch DataLoader for training.""" + from torch.utils.data import DataLoader + + if self.custom_dataset_train: + if hasattr(self.custom_dataset_train, "build_dataset") and callable( + self.custom_dataset_train.build_dataset + ): + self.custom_dataset_train.build_dataset(y) + dataset = self.custom_dataset_train + else: + raise NotImplementedError( + "Custom Dataset `build_dataset` method is not available. Please " + f"refer to the {self.__class__.__name__}.build_dataset " + "documentation." + ) + else: + dataset = PyTorchTrainDataset( + y=y, + seq_len=self.network.seq_len, + fh=self._fh._values[-1], + ) + + return DataLoader( + dataset, + self.batch_size, + ) + + def build_pytorch_pred_dataloader(self, y, fh): + """Build PyTorch DataLoader for prediction.""" + from torch.utils.data import DataLoader + + if self.custom_dataset_pred: + if hasattr(self.custom_dataset_pred, "build_dataset") and callable( + self.custom_dataset_pred.build_dataset + ): + self.custom_dataset_train.build_dataset(y) + dataset = self.custom_dataset_train + else: + raise NotImplementedError( + "Custom Dataset `build_dataset` method is not available. Please" + f"refer to the {self.__class__.__name__}.build_dataset" + "documentation." + ) + else: + dataset = PyTorchPredDataset( + y=y[-self.network.seq_len :], + seq_len=self.network.seq_len, + ) + + return DataLoader( + dataset, + self.batch_size, + ) + + def get_y_true(self, y): + """Get y_true values for validation.""" + dataloader = self.build_pytorch_pred_dataloader(y) + y_true = [y.flatten().numpy() for _, y in dataloader] + return np.concatenate(y_true, axis=0) + + +class PyTorchTrainDataset: + """Dataset for use in sktime deep learning forecasters.""" + + def __init__(self, y, seq_len, fh): + self.y = y.values + self.seq_len = seq_len + self.fh = fh + + def __len__(self): + """Return length of dataset.""" + return len(self.y) - self.seq_len - self.fh + 1 + + def __getitem__(self, i): + """Return data point.""" + from torch import from_numpy, tensor + + return ( + tensor(self.y[i : i + self.seq_len]).float(), + from_numpy(self.y[i + self.seq_len : i + self.seq_len + self.fh]).float(), + ) + + +class PyTorchPredDataset: + """Dataset for use in sktime deep learning forecasters.""" + + def __init__(self, y, seq_len): + self.y = y.values + self.seq_len = seq_len + + def __len__(self): + """Return length of dataset.""" + return 1 + + def __getitem__(self, i): + """Return data point.""" + from torch import from_numpy, tensor + + return ( + tensor(self.y[i : i + self.seq_len]).float(), + from_numpy(self.y[i + self.seq_len : i + self.seq_len]).float(), + ) diff --git a/sktime/networks/cntc.py b/sktime/networks/cntc.py new file mode 100644 index 00000000000..d94ed71ec58 --- /dev/null +++ b/sktime/networks/cntc.py @@ -0,0 +1,193 @@ +"""a combination of CCNN and CLSTM as feature extractors.""" + +__author__ = ["James-Large", "Withington", "TonyBagnall", "AurumnPegasus"] + +from sktime.networks.base import BaseDeepNetwork +from sktime.utils.validation._dependencies import ( + _check_dl_dependencies, + _check_soft_dependencies, +) + + +class CNTCNetwork(BaseDeepNetwork): + """Combining contextual neural networks for time series classification. + + Adapted from the implementation used in [1] + + Parameters + ---------- + kernel_size : int, default = 7 + specifying the length of the 1D convolution window + avg_pool_size : int, default = 3 + size of the average pooling windows + n_conv_layers : int, default = 2 + the number of convolutional plus average pooling layers + filter_sizes : array of int, shape = (nb_conv_layers) + activation : string, default = sigmoid + keras activation function + random_state : int, default = 0 + seed to any needed random actions + + Notes + ----- + Adapted from the implementation from Fullah et. al + https://github.com/AmaduFullah/CNTC_MODEL/blob/master/cntc.ipynb + + References + ---------- + .. [1] Network originally defined in: + @article{FULLAHKAMARA202057, + title = {Combining contextual neural networks for time series classification}, + journal = {Neurocomputing}, + volume = {384}, + pages = {57-66}, + year = {2020}, + issn = {0925-2312}, + doi = {https://doi.org/10.1016/j.neucom.2019.10.113}, + url = {https://www.sciencedirect.com/science/article/pii/S0925231219316364}, + author = {Amadu {Fullah Kamara} and Enhong Chen and Qi Liu and Zhen Pan}, + keywords = {Time series classification, Contextual convolutional neural + networks, Contextual long short-term memory, Attention, Multilayer + perceptron}, + } + """ + + _tags = { + "authors": ["James-Large", "Withington", "TonyBagnall", "AurumnPegasus"], + "maintainers": ["James-Large", "Withington", "AurumnPegasus"], + "python_dependencies": ["tensorflow", "keras-self-attention"], + } + + def __init__( + self, + random_state=0, + rnn_layer=64, + filter_sizes=(16, 8), + kernel_sizes=(1, 1), + lstm_size=8, + dense_size=64, + ): + _check_soft_dependencies( + "keras-self-attention", + package_import_alias={"keras-self-attention": "keras_self_attention"}, + severity="error", + ) + _check_dl_dependencies(severity="error") + + self.random_state = random_state + self.rnn_layer = rnn_layer + self.filter_sizes = filter_sizes + self.kernel_sizes = kernel_sizes + self.lstm_size = lstm_size + self.dense_size = dense_size + + super().__init__() + + def build_network(self, input_shape, **kwargs): + """Construct a network and return its input and output layers. + + Arguments + --------- + input_shape: tuple + The shape of the data fed into the input layer, should be (m,d) + + Returns + ------- + input_layer: a keras layer + output_layer: a keras layer + """ + from keras_self_attention import SeqSelfAttention + from tensorflow import keras + + input_layers = [] + + # CNN Arm + input_layers.append(keras.layers.Input(input_shape)) + input_layers.append(keras.layers.Input(input_shape)) + self.dropout = 0.2 + + conv1 = keras.layers.Conv1D( + self.filter_sizes[0], + self.kernel_sizes[0], + activation="relu", + use_bias=True, + kernel_initializer="glorot_uniform", + )(input_layers[0]) + conv1 = keras.layers.BatchNormalization()(conv1) + conv1 = keras.layers.Dropout(self.dropout)(conv1) + conv1 = keras.layers.Dense( + input_shape[1], + input_shape=(input_shape[0], keras.backend.int_shape(conv1)[2]), + )(conv1) + + # RNN for CNN Arm (CCNN) + rnn1 = keras.layers.SimpleRNN( + self.rnn_layer * input_shape[1], + activation="relu", + use_bias=True, + kernel_initializer="glorot_uniform", + )(input_layers[1]) + rnn1 = keras.layers.BatchNormalization()(rnn1) + rnn1 = keras.layers.Dropout(self.dropout)(rnn1) + rnn1 = keras.layers.Reshape((64, input_shape[1]))(rnn1) + + # Combining CNN and RNN + conc1 = keras.layers.Concatenate( + axis=-2, name="contextual_convolutional_layer1" + )([conv1, rnn1]) + + # Final CNN for C-CNN (WHY) + conv2 = keras.layers.Conv1D( + self.filter_sizes[1], + self.kernel_sizes[1], + activation="relu", + kernel_initializer="glorot_uniform", + name="standard_cnn_layer", + )(conc1) + conv2 = keras.layers.Dense( + input_shape[1], + input_shape=(input_shape[0], keras.backend.int_shape(conv2)[2]), + )(conv2) + conv2 = keras.layers.BatchNormalization()(conv2) + conv2 = keras.layers.Dropout(0.1)(conv2) + + # CLSTM Arm + input_layers.append(keras.layers.Input(input_shape)) + lstm1 = keras.layers.LSTM( + self.lstm_size * input_shape[1], + return_sequences=False, + kernel_initializer="glorot_uniform", + activation="relu", + )(input_layers[2]) + lstm1 = keras.layers.Reshape((self.lstm_size, input_shape[1]))(lstm1) + lstm1 = keras.layers.Dropout(self.dropout)(lstm1) + merge = keras.layers.Concatenate( + axis=-2, name="contextual_convolutional_layer2" + )([conv2, lstm1]) + + # Output calculation based on combination + avg = keras.layers.MaxPooling1D(pool_size=1, strides=None, padding="valid")( + merge + ) + avg = keras.layers.Dropout(0.1)(avg) + + # Adding self attention + att = SeqSelfAttention( + attention_width=10, + attention_activation="sigmoid", + name="Attention", + attention_type="multiplicative", + )(avg) + att = keras.layers.Dropout(0.1)(att) + + # Adding ouutput MLP Layer + mlp1 = keras.layers.Dense( + self.dense_size, kernel_initializer="glorot_uniform", activation="relu" + )(att) + mlp1 = keras.layers.Dropout(0.1)(mlp1) + mlp2 = keras.layers.Dense( + self.dense_size, kernel_initializer="glorot_uniform", activation="relu" + )(mlp1) + mlp2 = keras.layers.Dropout(0.1)(mlp2) + flat = keras.layers.Flatten()(mlp2) + return input_layers, flat diff --git a/sktime/networks/ltsf/__init__.py b/sktime/networks/ltsf/__init__.py new file mode 100644 index 00000000000..403b83db0df --- /dev/null +++ b/sktime/networks/ltsf/__init__.py @@ -0,0 +1 @@ +"""Init LTSF.""" diff --git a/sktime/networks/ltsf/_ltsf.py b/sktime/networks/ltsf/_ltsf.py new file mode 100644 index 00000000000..43b2a0fb332 --- /dev/null +++ b/sktime/networks/ltsf/_ltsf.py @@ -0,0 +1,316 @@ +"""Deep Learning Forecasters using LTSF-Linear Models.""" +from sktime.utils.validation._dependencies import _check_soft_dependencies + +if _check_soft_dependencies("torch", severity="none"): + import torch.nn as nn + + nn_module = nn.Module +else: + + class nn_module: + """Dummy class if torch is unavailable.""" + + pass + + +class LTSFLinearNetwork: + """LTSF-Linear Forecaster. + + Implementation of the Long-Term Short-Term Feature (LTSF) linear forecaster, + aka LTSF-Linear, by Zeng et al [1]_. + + Core logic is directly copied from the cure-lab LTSF-Linear implementation [2]_, + which is unfortunately not available as a package. + + Parameters + ---------- + seq_len : int + length of input sequence + pred_len : int + length of prediction (forecast horizon) + in_channels : int, default=None + number of input channels passed to network + individual : bool, default=False + boolean flag that controls whether the network treats each channel individually" + "or applies a single linear layer across all channels. If individual=True, the" + "a separate linear layer is created for each input channel. If" + "individual=False, a single shared linear layer is used for all channels." + + References + ---------- + .. [1] Zeng A, Chen M, Zhang L, Xu Q. 2023. + Are transformers effective for time series forecasting? + Proceedings of the AAAI conference on artificial intelligence 2023 + (Vol. 37, No. 9, pp. 11121-11128). + .. [2] https://github.com/cure-lab/LTSF-Linear + """ + + class _LTSFLinearNetwork(nn_module): + def __init__( + self, + seq_len, + pred_len, + in_channels, + individual, + ): + super().__init__() + + self.seq_len = seq_len + self.pred_len = pred_len + self.in_channels = in_channels + self.individual = individual + + if self.individual: + self.Linear = nn.ModuleList() + for _ in range(self.in_channels): + self.Linear.append(nn.Linear(self.seq_len, self.pred_len)) + else: + self.Linear = nn.Linear(self.seq_len, self.pred_len) + + def forward(self, x): + """Forward pass for LSTF-Linear Network. + + Parameters + ---------- + x : torch.Tensor + torch.Tensor of shape [Batch, Input Sequence Length, Channel] + + Returns + ------- + x : torch.Tensor + output of Linear Model. x.shape = [Batch, Output Length, Channel] + """ + from torch import zeros + + if self.individual: + output = zeros([x.size(0), self.pred_len, x.size(2)], dtype=x.dtype).to( + x.device + ) + for i in range(self.in_channels): + output[:, :, i] = self.Linear[i](x[:, :, i]) + x = output + else: + x = self.Linear(x.permute(0, 2, 1)).permute(0, 2, 1) + return x # [Batch, Output Length, Channel] + + def __init__(self, seq_len, pred_len, in_channels=1, individual=False): + self.seq_len = seq_len + self.pred_len = pred_len + self.in_channels = in_channels + self.individual = individual + + def _build(self): + return self._LTSFLinearNetwork( + self.seq_len, self.pred_len, self.in_channels, self.individual + ) + + +class LTSFDLinearNetwork: + """LTSF-DLinear Forecaster. + + Implementation of the Long-Term Short-Term Feature (LTSF) decomposition linear + forecaster, aka LTSF-DLinear, by Zeng et al [1]_. + + Core logic is directly copied from the cure-lab LTSF-Linear implementation [2]_, + which is unfortunately not available as a package. + + Parameters + ---------- + seq_len : int + length of input sequence + pred_len : int + length of prediction (forecast horizon) + in_channels : int, default=None + number of input channels passed to network + individual : bool, default=False + boolean flag that controls whether the network treats each channel individually" + "or applies a single linear layer across all channels. If individual=True, the" + "a separate linear layer is created for each input channel. If" + "individual=False, a single shared linear layer is used for all channels." + + References + ---------- + .. [1] Zeng A, Chen M, Zhang L, Xu Q. 2023. + Are transformers effective for time series forecasting? + Proceedings of the AAAI conference on artificial intelligence 2023 + (Vol. 37, No. 9, pp. 11121-11128). + .. [2] https://github.com/cure-lab/LTSF-Linear + """ + + class _LTSFDLinearNetwork(nn_module): + def __init__( + self, + seq_len, + pred_len, + in_channels, + individual, + ): + from sktime.networks.ltsf.layers import SeriesDecomposer + + super().__init__() + self.seq_len = seq_len + self.pred_len = pred_len + + # Decompsition Kernel Size + kernel_size = 25 + self.decompsition = SeriesDecomposer(kernel_size)._build() + self.individual = individual + self.in_channels = in_channels + + if self.individual: + self.Linear_Seasonal = nn.ModuleList() + self.Linear_Trend = nn.ModuleList() + for _ in range(self.in_channels): + self.Linear_Seasonal.append(nn.Linear(self.seq_len, self.pred_len)) + self.Linear_Trend.append(nn.Linear(self.seq_len, self.pred_len)) + else: + self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len) + self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len) + + def forward(self, x): + """Forward pass for LSTF-DLinear Network. + + Parameters + ---------- + x : torch.Tensor + torch.Tensor of shape [Batch, Input Sequence Length, Channel] + + Returns + ------- + x : torch.Tensor + output of Linear Model. x.shape = [Batch, Output Length, Channel] + """ + from torch import zeros + + # x: [Batch, Input length, Channel] + seasonal_init, trend_init = self.decompsition(x) + seasonal_init, trend_init = seasonal_init.permute( + 0, 2, 1 + ), trend_init.permute(0, 2, 1) + if self.individual: + seasonal_output = zeros( + [seasonal_init.size(0), seasonal_init.size(1), self.pred_len], + dtype=seasonal_init.dtype, + ).to(seasonal_init.device) + trend_output = zeros( + [trend_init.size(0), trend_init.size(1), self.pred_len], + dtype=trend_init.dtype, + ).to(trend_init.device) + for i in range(self.in_channels): + seasonal_output[:, i, :] = self.Linear_Seasonal[i]( + seasonal_init[:, i, :] + ) + trend_output[:, i, :] = self.Linear_Trend[i](trend_init[:, i, :]) + else: + seasonal_output = self.Linear_Seasonal(seasonal_init) + trend_output = self.Linear_Trend(trend_init) + + x = seasonal_output + trend_output + return x.permute(0, 2, 1) # to [Batch, Output length, Channel] + + def __init__(self, seq_len, pred_len, in_channels=1, individual=False): + self.seq_len = seq_len + self.pred_len = pred_len + self.in_channels = in_channels + self.individual = individual + + def _build(self): + return self._LTSFDLinearNetwork( + self.seq_len, self.pred_len, self.in_channels, self.individual + ) + + +class LTSFNLinearNetwork: + """LTSF-NLinear Forecaster. + + Implementation of the Long-Term Short-Term Feature (LTSF) normalization linear + forecaster, aka LTSF-NLinear, by Zeng et al [1]_. + + Core logic is directly copied from the cure-lab LTSF-Linear implementation [2]_, + which is unfortunately not available as a package. + + Parameters + ---------- + seq_len : int + length of input sequence + pred_len : int + length of prediction (forecast horizon) + in_channels : int, default=None + number of input channels passed to network + individual : bool, default=False + boolean flag that controls whether the network treats each channel individually" + "or applies a single linear layer across all channels. If individual=True, the" + "a separate linear layer is created for each input channel. If" + "individual=False, a single shared linear layer is used for all channels." + + References + ---------- + .. [1] Zeng A, Chen M, Zhang L, Xu Q. 2023. + Are transformers effective for time series forecasting? + Proceedings of the AAAI conference on artificial intelligence 2023 + (Vol. 37, No. 9, pp. 11121-11128). + .. [2] https://github.com/cure-lab/LTSF-Linear + """ + + class _LTSFNLinearNetwork(nn_module): + def __init__( + self, + seq_len, + pred_len, + in_channels, + individual, + ): + super().__init__() + self.seq_len = seq_len + self.pred_len = pred_len + + self.in_channels = in_channels + self.individual = individual + + if self.individual: + self.Linear = nn.ModuleList() + for _ in range(self.in_channels): + self.Linear.append(nn.Linear(self.seq_len, self.pred_len)) + else: + self.Linear = nn.Linear(self.seq_len, self.pred_len) + + def forward(self, x): + """Forward pass for LSTF-NLinear Network. + + Parameters + ---------- + x : torch.Tensor + torch.Tensor of shape [Batch, Input Sequence Length, Channel] + + Returns + ------- + x : torch.Tensor + output of Linear Model. x.shape = [Batch, Output Length, Channel] + """ + from torch import zeros + + # x: [Batch, Input length, Channel] + seq_last = x[:, -1:, :].detach() + x = x - seq_last + if self.individual: + output = zeros([x.size(0), self.pred_len, x.size(2)], dtype=x.dtype).to( + x.device + ) + for i in range(self.in_channels): + output[:, :, i] = self.Linear[i](x[:, :, i]) + x = output + else: + x = self.Linear(x.permute(0, 2, 1)).permute(0, 2, 1) + x = x + seq_last + return x # [Batch, Output length, Channel] + + def __init__(self, seq_len, pred_len, in_channels=1, individual=False): + self.seq_len = seq_len + self.pred_len = pred_len + self.in_channels = in_channels + self.individual = individual + + def _build(self): + return self._LTSFNLinearNetwork( + self.seq_len, self.pred_len, self.in_channels, self.individual + ) diff --git a/sktime/networks/ltsf/layers.py b/sktime/networks/ltsf/layers.py new file mode 100644 index 00000000000..dd6d87769dd --- /dev/null +++ b/sktime/networks/ltsf/layers.py @@ -0,0 +1,577 @@ +"""Extra LTSF-Linear Model Layers.""" +from sktime.utils.validation._dependencies import _check_soft_dependencies + +if _check_soft_dependencies("torch", severity="none"): + import torch.nn as nn + + nn_module = nn.Module +else: + + class nn_module: + """Dummy class if torch is unavailable.""" + + pass + + +import math + + +class SeriesDecomposer: + """Series decomposition block.""" + + def __init__(self, kernel_size): + self.kernel_size = kernel_size + + def _build(self): + return self._SeriesDecomposer(self.kernel_size) + + class _SeriesDecomposer(nn_module): + """Series decomposition block.""" + + def __init__(self, kernel_size): + super().__init__() + self.moving_avg = MovingAverage(kernel_size, stride=1)._build() + + def forward(self, x): + moving_mean = self.moving_avg(x) + res = x - moving_mean + return res, moving_mean + + +class MovingAverage: + """Moving average block to highlight the trend of time series.""" + + def __init__(self, kernel_size, stride): + self.kernel_size = kernel_size + self.stride = stride + + def _build(self): + return self._MovingAverage(self.kernel_size, self.stride) + + class _MovingAverage(nn_module): + """Moving average block to highlight the trend of time series.""" + + def __init__(self, kernel_size, stride): + super().__init__() + self.kernel_size = kernel_size + self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0) + + def forward(self, x): + from torch import cat + + # padding on the both ends of time series + front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1) + end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1) + x = cat([front, x, end], dim=1) + x = self.avg(x.permute(0, 2, 1)) + x = x.permute(0, 2, 1) + return x + + +class AutoCorrelation: + """AutoCorrelation layer. + + AutoCorrelation Mechanism with the following two phases: + (1) period-based dependencies discovery + (2) time delay aggregation + This block can replace the self-attention family mechanism seamlessly. + """ + + class _AutoCorrelation(nn_module): + def __init__( + self, + mask_flag=True, + factor=1, + scale=None, + attention_dropout=0.1, + output_attention=False, + ): + super().__init__() + self.factor = factor + self.scale = scale + self.mask_flag = mask_flag + self.output_attention = output_attention + self.dropout = nn.Dropout(attention_dropout) + + def time_delay_agg_training(self, values, corr): + """Normalize. + + SpeedUp version of Autocorrelation (a batch-normalization style design) + This is for the training phase. + """ + from torch import mean, roll, softmax, stack, topk, zeros_like + + head = values.shape[1] + channel = values.shape[2] + length = values.shape[3] + # find top k + top_k = int(self.factor * math.log(length)) + mean_value = mean(mean(corr, dim=1), dim=1) + index = topk(mean(mean_value, dim=0), top_k, dim=-1)[1] + weights = stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1) + # update corr + tmp_corr = softmax(weights, dim=-1) + # aggregation + tmp_values = values + delays_agg = zeros_like(values).float() + for i in range(top_k): + pattern = roll(tmp_values, -int(index[i]), -1) + delays_agg = delays_agg + pattern * ( + tmp_corr[:, i] + .unsqueeze(1) + .unsqueeze(1) + .unsqueeze(1) + .repeat(1, head, channel, length) + ) + return delays_agg + + def time_delay_agg_inference(self, values, corr): + """Normalize data. + + SpeedUp version of Autocorrelation (a batch-normalization style design) + This is for the inference phase. + """ + from torch import arange, gather, mean, softmax, topk, zeros_like + + batch = values.shape[0] + head = values.shape[1] + channel = values.shape[2] + length = values.shape[3] + # index init + init_index = ( + arange(length) + .unsqueeze(0) + .unsqueeze(0) + .unsqueeze(0) + .repeat(batch, head, channel, 1) + .cuda() + ) + # find top k + top_k = int(self.factor * math.log(length)) + mean_value = mean(mean(corr, dim=1), dim=1) + weights = topk(mean_value, top_k, dim=-1)[0] + delay = topk(mean_value, top_k, dim=-1)[1] + # update corr + tmp_corr = softmax(weights, dim=-1) + # aggregation + tmp_values = values.repeat(1, 1, 1, 2) + delays_agg = zeros_like(values).float() + for i in range(top_k): + tmp_delay = init_index + ( + delay[:, i] + .unsqueeze(1) + .unsqueeze(1) + .unsqueeze(1) + .repeat(1, head, channel, length) + ) + pattern = gather(tmp_values, dim=-1, index=tmp_delay) + delays_agg = delays_agg + pattern * ( + tmp_corr[:, i] + .unsqueeze(1) + .unsqueeze(1) + .unsqueeze(1) + .repeat(1, head, channel, length) + ) + return delays_agg + + def time_delay_agg_full(self, values, corr): + """Normalize Data. + + Standard version of Autocorrelation + """ + from torch import arange, gather, softmax, topk, zeros_like + + batch = values.shape[0] + head = values.shape[1] + channel = values.shape[2] + length = values.shape[3] + # index init + init_index = ( + arange(length) + .unsqueeze(0) + .unsqueeze(0) + .unsqueeze(0) + .repeat(batch, head, channel, 1) + .cuda() + ) + # find top k + top_k = int(self.factor * math.log(length)) + weights = topk(corr, top_k, dim=-1)[0] + delay = topk(corr, top_k, dim=-1)[1] + # update corr + tmp_corr = softmax(weights, dim=-1) + # aggregation + tmp_values = values.repeat(1, 1, 1, 2) + delays_agg = zeros_like(values).float() + for i in range(top_k): + tmp_delay = init_index + delay[..., i].unsqueeze(-1) + pattern = gather(tmp_values, dim=-1, index=tmp_delay) + delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1)) + return delays_agg + + def forward(self, queries, keys, values, attn_mask): + """Call model.""" + from torch import cat, conj, fft, zeros_like + + B, L, H, E = queries.shape + _, S, _, D = values.shape + if L > S: + zeros = zeros_like(queries[:, : (L - S), :]).float() + values = cat([values, zeros], dim=1) + keys = cat([keys, zeros], dim=1) + else: + values = values[:, :L, :, :] + keys = keys[:, :L, :, :] + + # period-based dependencies + q_fft = fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1) + k_fft = fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1) + res = q_fft * conj(k_fft) + corr = fft.irfft(res, dim=-1) + + # time delay agg + if self.training: + V = self.time_delay_agg_training( + values.permute(0, 2, 3, 1).contiguous(), corr + ).permute(0, 3, 1, 2) + else: + V = self.time_delay_agg_inference( + values.permute(0, 2, 3, 1).contiguous(), corr + ).permute(0, 3, 1, 2) + + if self.output_attention: + return (V.contiguous(), corr.permute(0, 3, 1, 2)) + else: + return (V.contiguous(), None) + + def __init__( + self, + mask_flag=True, + factor=1, + scale=None, + attention_dropout=0.1, + output_attention=False, + ): + self.mask_flag = mask_flag + self.factor = factor + self.scale = scale + self.attention_dropout = attention_dropout + self.output_attention = output_attention + + def _build(self): + return self._AutoCorrelation( + self.mask_flag, + self.factor, + self.scale, + self.attention_dropout, + self.output_attention, + ) + + +class AutoCorrelationLayer: + """Layer for AutoCorrelation.""" + + class _AutoCorrelationLayer(nn_module): + def __init__(self, correlation, d_model, n_heads, d_keys=None, d_values=None): + super().__init__() + + d_keys = d_keys or (d_model // n_heads) + d_values = d_values or (d_model // n_heads) + + self.inner_correlation = correlation + self.query_projection = nn.Linear(d_model, d_keys * n_heads) + self.key_projection = nn.Linear(d_model, d_keys * n_heads) + self.value_projection = nn.Linear(d_model, d_values * n_heads) + self.out_projection = nn.Linear(d_values * n_heads, d_model) + self.n_heads = n_heads + + def forward(self, queries, keys, values, attn_mask): + """Call layer.""" + B, L, _ = queries.shape + _, S, _ = keys.shape + H = self.n_heads + + queries = self.query_projection(queries).view(B, L, H, -1) + keys = self.key_projection(keys).view(B, S, H, -1) + values = self.value_projection(values).view(B, S, H, -1) + + out, attn = self.inner_correlation(queries, keys, values, attn_mask) + out = out.view(B, L, -1) + + return self.out_projection(out), attn + + def __init__(self, correlation, d_model, n_heads, d_keys=None, d_values=None): + self.correlation = correlation + self.d_model = d_model + self.n_heads = n_heads + self.d_keys = d_keys + self.d_values = d_values + + def _build(self): + return self._AutoCorrelationLayer( + self.correlation, self.d_model, self.n_heads, self.d_keys, self.d_values + ) + + +class LTSFLayerNorm: + """LayerNorm.""" + + class _LTSFLayernorm(nn_module): + def __init__(self, channels): + super().__init__() + self.layernorm = nn.LayerNorm(channels) + + def forward(self, x): + """Call layer.""" + from torch import mean + + x_hat = self.layernorm(x) + bias = mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1) + return x_hat - bias + + def __init__(self, channels): + self.channels = channels + + def _build(self): + return self._LTSFLayernorm(self.channels) + + +class EncoderLayer: + """Encoder Layer. + + Autoformer encoder layer with the progressive decomposition architecture + """ + + class _EncoderLayer(nn_module): + def __init__( + self, + attention, + d_model, + d_ff=None, + moving_avg=25, + dropout=0.1, + activation="relu", + ): + super().__init__() + from torch.nn.functional import gelu, relu + + d_ff = d_ff or 4 * d_model + self.attention = attention + self.conv1 = nn.Conv1d( + in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False + ) + self.conv2 = nn.Conv1d( + in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False + ) + self.decomp1 = SeriesDecomposer(moving_avg) + self.decomp2 = SeriesDecomposer(moving_avg) + self.dropout = nn.Dropout(dropout) + self.activation = relu if activation == "relu" else gelu + + def forward(self, x, attn_mask=None): + new_x, attn = self.attention(x, x, x, attn_mask=attn_mask) + x = x + self.dropout(new_x) + x, _ = self.decomp1(x) + y = x + y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) + y = self.dropout(self.conv2(y).transpose(-1, 1)) + res, _ = self.decomp2(x + y) + return res, attn + + def __init__( + self, + attention, + d_model, + d_ff=None, + moving_avg=25, + dropout=0.1, + activation="relu", + ): + self.attention = attention + self.d_model = d_model + self.d_ff = d_ff + self.moving_avg = moving_avg + self.dropout = dropout + self.activation = activation + + def _build(self): + return self._EncoderLayer( + self.attention, + self.d_model, + self.d_ff, + self.moving_avg, + self.dropout, + self.activation, + ) + + +class Encoder: + """Autoformer encoder.""" + + class _Encoder(nn_module): + def __init__(self, attn_layers, conv_layers=None, norm_layer=None): + super(Encoder, self).__init__() + self.attn_layers = nn.ModuleList(attn_layers) + self.conv_layers = ( + nn.ModuleList(conv_layers) if conv_layers is not None else None + ) + self.norm = norm_layer + + def forward(self, x, attn_mask=None): + """Call encoder.""" + attns = [] + if self.conv_layers is not None: + for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): + x, attn = attn_layer(x, attn_mask=attn_mask) + x = conv_layer(x) + attns.append(attn) + x, attn = self.attn_layers[-1](x) + attns.append(attn) + else: + for attn_layer in self.attn_layers: + x, attn = attn_layer(x, attn_mask=attn_mask) + attns.append(attn) + + if self.norm is not None: + x = self.norm(x) + + return x, attns + + def __init__(self, attn_layers, conv_layers=None, norm_layer=None): + self.attn_layers = attn_layers + self.conv_layers = conv_layers + self.norm_layer = norm_layer + + def _build(self): + return self._Encoder(self.attn_layers, self.conv_layers, self.norm_layer) + + +class DecoderLayer: + """Decoder Layer. + + Autoformer decoder layer with the progressive decomposition architecture + """ + + class _DecoderLayer(nn_module): + def __init__( + self, + self_attention, + cross_attention, + d_model, + c_out, + d_ff=None, + moving_avg=25, + dropout=0.1, + activation="relu", + ): + super().__init__() + from torch.nn.functional import gelu, relu + + d_ff = d_ff or 4 * d_model + self.self_attention = self_attention + self.cross_attention = cross_attention + self.conv1 = nn.Conv1d( + in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False + ) + self.conv2 = nn.Conv1d( + in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False + ) + self.decomp1 = SeriesDecomposer(moving_avg) + self.decomp2 = SeriesDecomposer(moving_avg) + self.decomp3 = SeriesDecomposer(moving_avg) + self.dropout = nn.Dropout(dropout) + self.projection = nn.Conv1d( + in_channels=d_model, + out_channels=c_out, + kernel_size=3, + stride=1, + padding=1, + padding_mode="circular", + bias=False, + ) + self.activation = relu if activation == "relu" else gelu + + def forward(self, x, cross, x_mask=None, cross_mask=None): + """Call decoder.""" + x = x + self.dropout(self.self_attention(x, x, x, attn_mask=x_mask)[0]) + x, trend1 = self.decomp1(x) + x = x + self.dropout( + self.cross_attention(x, cross, cross, attn_mask=cross_mask)[0] + ) + x, trend2 = self.decomp2(x) + y = x + y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) + y = self.dropout(self.conv2(y).transpose(-1, 1)) + x, trend3 = self.decomp3(x + y) + + residual_trend = trend1 + trend2 + trend3 + residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose( + 1, 2 + ) + return x, residual_trend + + def __init__( + self, + self_attention, + cross_attention, + d_model, + c_out, + d_ff=None, + moving_avg=25, + dropout=0.1, + activation="relu", + ): + self.self_attention = self_attention + self.cross_attention = cross_attention + self.d_model = d_model + self.c_out = c_out + self.d_ff = d_ff + self.moving_avg = moving_avg + self.dropout = dropout + self.activation = activation + + def _build(self): + return self._DecoderLayer( + self.self_attention, + self.cross_attention, + self.d_model, + self.c_out, + self.d_ff, + self.moving_avg, + self.dropout, + self.activation, + ) + + +class Decoder: + """Autoformer decoder.""" + + class _Decoder(nn_module): + def __init__(self, layers, norm_layer=None, projection=None): + super().__init__() + self.layers = nn.ModuleList(layers) + self.norm = norm_layer + self.projection = projection + + def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None): + """Call decoder.""" + for layer in self.layers: + x, residual_trend = layer( + x, cross, x_mask=x_mask, cross_mask=cross_mask + ) + trend = trend + residual_trend + + if self.norm is not None: + x = self.norm(x) + + if self.projection is not None: + x = self.projection(x) + return x, trend + + def __init__(self, layers, norm_layer=None, projection=None): + self.layers = layers + self.norm_layer = norm_layer + self.projection = projection + + def _build(self): + return self._Decoder(self.layers, self.norm_layer, self.projection) diff --git a/sktime/networks/rnn.py b/sktime/networks/rnn.py index 099193e33a5..836bef65072 100644 --- a/sktime/networks/rnn.py +++ b/sktime/networks/rnn.py @@ -1,6 +1,6 @@ """Time Recurrent Neural Network (RNN) (minus the final output layer).""" -__authors__ = ["JamesLarge", "Withington", "TonyBagnall", "achieveordie"] +__authors__ = ["James-Large", "Withington", "TonyBagnall", "achieveordie"] from sktime.networks.base import BaseDeepNetwork diff --git a/sktime/networks/tests/test_all_networks.py b/sktime/networks/tests/test_all_networks.py new file mode 100644 index 00000000000..c11fb889e5c --- /dev/null +++ b/sktime/networks/tests/test_all_networks.py @@ -0,0 +1,33 @@ +"""Unit tests for all neural networks.""" + +from sktime.tests.test_all_estimators import BaseFixtureGenerator, QuickTester + + +class NetworkFixtureGenerator(BaseFixtureGenerator): + """Fixture generator for classifier tests. + + Fixtures parameterized + ---------------------- + estimator_class: estimator inheriting from BaseObject + ranges over estimator classes not excluded by EXCLUDE_ESTIMATORS, EXCLUDED_TESTS + estimator_instance: instance of estimator inheriting from BaseObject + ranges over estimator classes not excluded by EXCLUDE_ESTIMATORS, EXCLUDED_TESTS + instances are generated by create_test_instance class method + scenario: instance of TestScenario + ranges over all scenarios returned by retrieve_scenarios + """ + + # note: this should be separate from TestAllNetworks + # additional fixtures, parameters, etc should be added here + # TestAllNetworks should contain the tests only + + estimator_type_filter = "network" + + +class TestAllNetworks(NetworkFixtureGenerator, QuickTester): + """Module level tests for all sktime neural networks.""" + + def test_dummy(self, estimator_instance): + """Dummy test to act as placeholder.""" + # check if multivariate input raises error for univariate regressors + assert 42 == 42 diff --git a/sktime/param_est/base.py b/sktime/param_est/base.py index d1a430e7603..55a6af4324e 100644 --- a/sktime/param_est/base.py +++ b/sktime/param_est/base.py @@ -60,6 +60,8 @@ class BaseParamFitter(BaseEstimator): "capability:multivariate": False, # can estimator handle multivariate data? "python_version": None, # PEP 440 python version specifier to limit versions "python_dependencies": None, # string or str list of pkg soft dependencies + "authors": "sktime developers", # author(s) of the object + "maintainers": "sktime developers", # current maintainer(s) of the object } def __init__(self): @@ -70,11 +72,46 @@ def __init__(self): super().__init__() _check_estimator_deps(self) + def __mul__(self, other): + """Magic * method, for estimators on the right. + + Overloaded multiplication operation for parameter fitters. + Implemented for ``other`` being: + + * a forecaster, results in ``PluginParamsForecaster`` + * a transformer, results in ``PluginParamsTransformer`` + * otherwise returns `NotImplemented`. + + Parameters + ---------- + other: `sktime` estimator, must be one of the types specified above + otherwise, `NotImplemented` is returned + + Returns + ------- + one of the plugin estimator objects, + concatenation of `self` (first) with `other` (last). + """ + from sktime.forecasting.base import BaseForecaster + from sktime.param_est.plugin import ( + PluginParamsForecaster, + PluginParamsTransformer, + ) + from sktime.transformations.base import BaseTransformer + + if isinstance(other, BaseForecaster): + return PluginParamsForecaster(param_est=self, forecaster=other) + elif isinstance(other, BaseTransformer): + return PluginParamsTransformer(param_est=self, transformer=other) + else: + return NotImplemented + def __rmul__(self, other): """Magic * method, return concatenated ParamFitterPipeline, trafos on left. - Overloaded multiplication operation for classifiers. Implemented for `other` - being a transformer, otherwise returns `NotImplemented`. + Overloaded multiplication operation for parameter fitters. + Implemented for ``other`` being a transformer, + otherwise returns `NotImplemented`. Parameters ---------- @@ -381,23 +418,3 @@ def _update(self, X): # but looping to self.fit for now to avoid interface break return self - - def _get_fitted_params(self): - """Get fitted parameters. - - private _get_fitted_params, called from get_fitted_params - - State required: - Requires state to be "fitted". - - Returns - ------- - fitted_params : dict - """ - # default retrieves all self attributes ending in "_" - # and returns them with keys that have the "_" removed - fitted_params = [attr for attr in dir(self) if attr.endswith("_")] - fitted_params = [x for x in fitted_params if not x.startswith("_")] - fitted_param_dict = {p[:-1]: getattr(self, p) for p in fitted_params} - - return fitted_param_dict diff --git a/sktime/param_est/compose/__init__.py b/sktime/param_est/compose/__init__.py new file mode 100644 index 00000000000..10423cfb2b0 --- /dev/null +++ b/sktime/param_est/compose/__init__.py @@ -0,0 +1,7 @@ +"""Composition involving parameter estimators.""" +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +from sktime.param_est.compose._func_fitter import FunctionParamFitter +from sktime.param_est.compose._pipeline import ParamFitterPipeline + +__author__ = ["fkiraly", "tpvasconcelos"] +__all__ = ["ParamFitterPipeline", "FunctionParamFitter"] diff --git a/sktime/param_est/compose/_func_fitter.py b/sktime/param_est/compose/_func_fitter.py new file mode 100644 index 00000000000..32807df4848 --- /dev/null +++ b/sktime/param_est/compose/_func_fitter.py @@ -0,0 +1,164 @@ +"""Implements FunctionParamFitter, a class to create custom parameter fitters.""" +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +from sktime.datatypes import ALL_TIME_SERIES_MTYPES +from sktime.param_est.base import BaseParamFitter + +__author__ = ["tpvasconcelos"] +__all__ = ["FunctionParamFitter"] + + +class FunctionParamFitter(BaseParamFitter): + r"""Constructs a parameter fitter from an arbitrary callable. + + A FunctionParamFitter forwards its X argument to a user-defined + function (or callable object) and sets the result of this function + to the ``param`` attribute. This can be useful for stateless + estimators such as simple conditional parameter selectors. + + Note: If a lambda function is used as the ``func``, then the + resulting estimator will not be pickleable. + + Parameters + ---------- + param : str + The name of the parameter to set. + func : callable (X: X_type, **kwargs) -> Any + The callable to use for the parameter estimation. This will be + passed the same arguments as estimator, with args and kwargs + forwarded. + kw_args : dict, default=None + Dictionary of additional keyword arguments to pass to func. + X_type : str, one of "pd.DataFrame, pd.Series, np.ndarray", or list thereof + default = ["pd.DataFrame", "pd.Series", "np.ndarray"] + list of types that func is assumed to allow for X (see signature above) + if X passed to transform/inverse_transform is not on the list, + it will be converted to the first list element before passed to funcs + + See Also + -------- + sktime.param_est.plugin.PluginParamsForecaster : + Plugs parameters from a parameter estimator into a forecaster. + sktime.forecasting.compose.MultiplexForecaster : + MultiplexForecaster for selecting among different models. + + Examples + -------- + This class could be used to construct a parameter estimator that + selects a forecaster based on the input data's length. The + selected forecaster can be stored in the ``selected_forecaster_`` + attribute, which can be then passed down to a + :class:`~sktime.forecasting.compose.MultiplexForecaster` via a + :class:`~sktime.param_est.plugin.PluginParamsForecaster`. + + >>> import numpy as np + >>> param_est = FunctionParamFitter( + ... param="selected_forecaster", + ... func=( + ... lambda X, threshold: "naive-seasonal" + ... if len(X) >= threshold + ... else "naive-last" + ... ), + ... kw_args={"threshold": 7}, + ... ) + >>> param_est.fit(np.asarray([1, 2, 3, 4])) + FunctionParamFitter(...) + >>> param_est.get_fitted_params() + {'selected_forecaster': 'naive-last'} + >>> param_est.fit(np.asarray([1, 2, 3, 4, 5, 6, 7])) + FunctionParamFitter(...) + >>> param_est.get_fitted_params() + {'selected_forecaster': 'naive-seasonal'} + + The full conditional forecaster selection pipeline could look + like this: + + >>> from sktime.forecasting.compose import MultiplexForecaster + >>> from sktime.forecasting.naive import NaiveForecaster + >>> from sktime.param_est.plugin import PluginParamsForecaster + >>> forecaster = PluginParamsForecaster( + ... param_est=param_est, + ... forecaster=MultiplexForecaster( + ... forecasters=[ + ... ("naive-last", NaiveForecaster()), + ... ("naive-seasonal", NaiveForecaster(sp=7)), + ... ] + ... ), + ... ) + >>> forecaster.fit(np.asarray([1, 2, 3, 4])) + PluginParamsForecaster(...) + >>> forecaster.predict(fh=[1,2,3]) + array([[4.], + [4.], + [4.]]) + >>> forecaster.fit(np.asarray([1, 2, 3, 4, 5, 6, 7])) + PluginParamsForecaster(...) + >>> forecaster.predict(fh=[1,2,3]) + array([[1.], + [2.], + [3.]]) + """ + + _tags = { + "authors": ["tpvasconcelos"], + "maintainers": ["tpvasconcelos"], + "X_inner_mtype": ALL_TIME_SERIES_MTYPES, + "scitype:X": ["Series", "Panel", "Hierarchical"], + "capability:missing_values": True, + "capability:multivariate": False, + } + + def __init__(self, param, func, kw_args=None, X_type=None): + self.param = param + self.func = func + self.kw_args = kw_args + self.X_type = X_type + super().__init__() + + if X_type is not None: + self.set_tags(X_inner_mtype=X_type) + + def _fit(self, X): + """Fit estimator and estimate parameters. + + private _fit containing the core logic, called from fit + + Writes to self: + Sets fitted model attributes ending in "_". + + Parameters + ---------- + X : guaranteed to be of a type in self.get_tag("X_inner_mtype") + Time series to which to fit the estimator. + + Returns + ------- + self : reference to self + """ + param = self.param.rstrip("_") + "_" + setattr(self, param, self.func(X, **(self.kw_args or {}))) + return self + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are no reserved values for parameter estimators. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params = [ + {"param": "param", "func": lambda X: "foo"}, + {"param": "param", "func": lambda X, kwarg: "foo", "kw_args": {"kwarg": 1}}, + ] + return params diff --git a/sktime/param_est/compose.py b/sktime/param_est/compose/_pipeline.py similarity index 98% rename from sktime/param_est/compose.py rename to sktime/param_est/compose/_pipeline.py index 7c4d3d8e47b..662441f7966 100644 --- a/sktime/param_est/compose.py +++ b/sktime/param_est/compose/_pipeline.py @@ -1,4 +1,7 @@ -"""Composition involving parameter estimators.""" +"""Implements ParamFitterPipeline. + +A class to create a pipeline of transformers and a parameter estimator. +""" # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) from sktime.base import _HeterogenousMetaEstimator from sktime.param_est.base import BaseParamFitter @@ -8,7 +11,6 @@ __author__ = ["fkiraly"] __all__ = ["ParamFitterPipeline"] - # we ensure that internally we convert to pandas for now SUPPORTED_MTYPES = ["pd.DataFrame", "pd.Series", "pd-multiindex", "pd_multiindex_hier"] @@ -86,6 +88,7 @@ class ParamFitterPipeline(_HeterogenousMetaEstimator, BaseParamFitter): """ _tags = { + "authors": "fkiraly", "X_inner_mtype": SUPPORTED_MTYPES, # which types do _fit/_predict, support for X? "scitype:X": ["Series", "Panel", "Hierarchical"], diff --git a/sktime/param_est/fixed.py b/sktime/param_est/fixed.py index 88092cd9a92..bfb4e806a59 100644 --- a/sktime/param_est/fixed.py +++ b/sktime/param_est/fixed.py @@ -25,6 +25,7 @@ class FixedParams(BaseParamFitter): """ _tags = { + "authors": "fkiraly", "X_inner_mtype": ALL_TIME_SERIES_MTYPES, # which types do _fit/_predict, support for X? "scitype:X": ["Series", "Panel", "Hierarchical"], diff --git a/sktime/param_est/plugin/__init__.py b/sktime/param_est/plugin/__init__.py new file mode 100644 index 00000000000..04cf864a7e4 --- /dev/null +++ b/sktime/param_est/plugin/__init__.py @@ -0,0 +1,10 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Plugin composites for parameter estimators.""" + +__all__ = [ + "PluginParamsForecaster", + "PluginParamsTransformer", +] + +from sktime.param_est.plugin._forecaster import PluginParamsForecaster +from sktime.param_est.plugin._transformer import PluginParamsTransformer diff --git a/sktime/param_est/plugin/_common.py b/sktime/param_est/plugin/_common.py new file mode 100644 index 00000000000..7d71f8bd49b --- /dev/null +++ b/sktime/param_est/plugin/_common.py @@ -0,0 +1,52 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Common routines for plugin estimators.""" + +__author__ = ["fkiraly"] + + +def _resolve_param_map(param_est, estimator, params=None): + """Resolve parameter map from params for parameter plugin compositors. + + Parameters + ---------- + param_est : sktime estimator object with a fit method, inheriting from BaseEstimator + e.g., estimator inheriting from BaseParamFitter + assumed to be fitted and have ``get_fitted_params`` method + estimator : sktime object, inheriting from BaseObject or Basestimator + assumed to have ``get_params`` method, not assumed to be fitted + params : None, str, list of str, dict with str values/keys, optional, default=None + determines which parameters from param_est are plugged into estimator and where + None: all parameters of param_est are plugged into estimator + only parameters present in both ``estimator`` and ``param_est`` are plugged in + list of str: parameters in the list are plugged into parameters of the same name + only parameters present in both ``estimator`` and ``param_est`` are plugged in + str: considered as a one-element list of str with the string as single element + dict: parameter with name of value is plugged into parameter with name of key + only keys present in ``param_est`` and values in ``estimator`` are plugged in + + Returns + ------- + param_map : dict with str keys and str values + mapping of parameters from ``param_est_`` to estimator used in ``fit``, + after filtering for parameters present in both + to be used as ``param_map`` attribute of plugin compositor + """ + fitted_params = param_est.get_fitted_params() + + # normalize params to a dict with str keys and str values + if params is None: + param_map = {x: x for x in fitted_params} + elif isinstance(params, str): + param_map = {params: params} + elif isinstance(params, list): + param_map = {x: x for x in params} + elif isinstance(params, dict): + param_map = params + else: + raise TypeError("params must be None, a str, a list of str, or a dict") + + # obtain the mapping restricted to param names that are available in both + param_map = {x: param_map[x] for x in param_map if x in estimator.get_params()} + param_map = {x: param_map[x] for x in param_map if param_map[x] in fitted_params} + + return param_map diff --git a/sktime/param_est/plugin.py b/sktime/param_est/plugin/_forecaster.py similarity index 78% rename from sktime/param_est/plugin.py rename to sktime/param_est/plugin/_forecaster.py index 8b9aa0ee1ed..fac00d660c6 100644 --- a/sktime/param_est/plugin.py +++ b/sktime/param_est/plugin/_forecaster.py @@ -1,5 +1,5 @@ # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) -"""Parameter estimators for seasonality.""" +"""Plugin composite for substituting parameter estimator fit into forecasters.""" __author__ = ["fkiraly"] __all__ = ["PluginParamsForecaster"] @@ -7,56 +7,63 @@ from inspect import signature from sktime.forecasting.base._delegate import _DelegatedForecaster +from sktime.param_est.plugin._common import _resolve_param_map class PluginParamsForecaster(_DelegatedForecaster): """Plugs parameters from a parameter estimator into a forecaster. - In `fit`, first fits `param_est` to data passed: + In ``fit``, first fits ``param_est`` to data passed: - * `y` of `fit` is passed as the first arg to `param_est.fit` - * `X` of `fit` is passed as the second arg, if `param_est.fit` has a second arg - * `fh` of `fit` is passed as `fh`, if any remaining arg of `param_est.fit` is `fh` + * ``y`` of ``fit`` is passed as the first arg to ``param_est.fit`` + * ``X`` of ``fit`` is passed as the second arg, + if ``param_est.fit`` has a second arg + * ``fh`` of ``fit`` is passed as ``fh``, + if any remaining arg of ``param_est.fit`` is ``fh`` - Then, does `forecaster.set_params` with desired/selected parameters. - Parameters of the fitted `param_est` are passed on to `forecaster`, - from/to pairs are as specified by the `params` parameter of `self`, see below. + Then, does ``forecaster.set_params`` with desired/selected parameters. + Parameters of the fitted ``param_est`` are passed on to ``forecaster``, + from/to pairs are as specified by the ``params`` parameter of ``self``, see below. - Then, fits `forecaster` to the data passed in `fit`. + Then, fits ``forecaster`` to the data passed in ``fit``. After that, behaves identically to `forecaster` with those parameters set. `update` behaviour is controlled by the `update_params` parameter. - Example: `param_est` seasonality test to determine `sp` parameter; - `forecaster` being any forecaster with an `sp` parameter. + Example: ``param_est`` seasonality test to determine ``sp`` parameter; + ``forecaster`` a forecaster with an ``sp`` parameter, + e.g., ``ExponentialSmoothing``. Parameters ---------- param_est : sktime estimator object with a fit method, inheriting from BaseEstimator e.g., estimator inheriting from BaseParamFitter or forecaster this is a "blueprint" estimator, state does not change when `fit` is called + forecaster : sktime forecaster, i.e., estimator inheriting from BaseForecaster this is a "blueprint" estimator, state does not change when `fit` is called + params : None, str, list of str, dict with str values/keys, optional, default=None - determines which parameters from param_est are plugged into forecaster and where + determines which parameters from ``param_est`` are plugged into forecaster where None: all parameters of param_est are plugged into forecaster - only parameters present in both `forecaster` and `param_est` are plugged in + only parameters present in both ``forecaster`` and ``param_est`` are plugged in list of str: parameters in the list are plugged into parameters of the same name - only parameters present in both `forecaster` and `param_est` are plugged in + only parameters present in both ``forecaster`` and ``param_est`` are plugged in str: considered as a one-element list of str with the string as single element dict: parameter with name of value is plugged into parameter with name of key - only keys present in `param_est` and values in `forecaster` are plugged in + only keys present in ``param_est`` and values in ``forecaster`` are plugged in + update_params : bool, optional, default=False whether fitted parameters by param_est_ are to be updated in self.update Attributes ---------- - param_est_ : sktime parameter estimator, clone of estimator in `param_est` - this clone is fitted in the pipeline when `fit` is called - forecaster_ : sktime forecaster, clone of forecaster in `forecaster` - this clone is fitted in the pipeline when `fit` is called + param_est_ : sktime parameter estimator, clone of estimator in ``param_est`` + this clone is fitted in the pipeline when ``fit`` is called + forecaster_ : sktime forecaster, clone of ``forecaster`` + this clone is fitted in the pipeline when ``fit`` is called param_map_ : dict - mapping of parameters from `param_est_` to `forecaster_` used in `fit`, + mapping of parameters from ``param_est_`` to ``forecaster_`` used in ``fit``, after filtering for parameters present in both Examples @@ -68,24 +75,38 @@ class PluginParamsForecaster(_DelegatedForecaster): >>> from sktime.transformations.series.difference import Differencer >>> >>> y = load_airline() # doctest: +SKIP + >>> + >>> # sp_est is a seasonality estimator + >>> # ACF assumes stationarity so we concat with differencing first >>> sp_est = Differencer() * SeasonalityACF() # doctest: +SKIP + >>> + >>> # fcst is a forecaster with a "sp" parameter which we want to tune >>> fcst = NaiveForecaster() # doctest: +SKIP + >>> + >>> # sp_auto is auto-tuned via PluginParamsForecaster >>> sp_auto = PluginParamsForecaster(sp_est, fcst) # doctest: +SKIP + >>> + >>> # fit sp_auto to data, predict, and inspect the tuned sp parameter >>> sp_auto.fit(y, fh=[1, 2, 3]) # doctest: +SKIP PluginParamsForecaster(...) >>> y_pred = sp_auto.predict() # doctest: +SKIP >>> sp_auto.forecaster_.get_params()["sp"] # doctest: +SKIP 12 + >>> # shorthand ways to specify sp_auto, via dunder, does the same + >>> sp_auto = sp_est * fcst # doctest: +SKIP + >>> # or entire pipeline in one go + >>> sp_auto = Differencer() * SeasonalityACF() * NaiveForecaster() # doctest: +SKIP using dictionary to plug "foo" parameter into "sp" >>> from sktime.param_est.fixed import FixedParams >>> sp_plugin = PluginParamsForecaster( - ... FixedParams({"foo": 12}), NaiveForecaster(), params={"foo": "sp"} + ... FixedParams({"foo": 12}), NaiveForecaster(), params={"sp": "foo"} ... ) # doctest: +SKIP """ _tags = { + "authors": "fkiraly", "requires-fh-in-fit": False, "handles-missing-data": False, "scitype:y": "both", @@ -107,10 +128,19 @@ def __init__(self, param_est, forecaster, params=None, update_params=False): self.update_params = update_params super().__init__() - self.clone_tags(self.forecaster_) + + self._set_delegated_tags(self.forecaster_) + + # parameter estimators that are univariate do not broadcast, + # so broadcasting needs to be done by the composite (i.e., self) + if param_est.get_tags()["object_type"] == "param_est": + if not param_est.get_tags()["capability:multivariate"]: + self.set_tags(**{"scitype:y": "univariate"}) + self.set_tags(**{"fit_is_empty": False}) # todo: only works for single series now # think about how to deal with vectorization later + self.set_tags(**{"X_inner_mtype": ["pd.DataFrame", "pd.Series", "np.ndarray"]}) self.set_tags(**{"y_inner_mtype": ["pd.DataFrame", "pd.Series", "np.ndarray"]}) def _fit(self, y, X, fh): @@ -162,26 +192,7 @@ def _fit(self, y, X, fh): param_est.fit(y, **fit_kwargs) fitted_params = param_est.get_fitted_params() - # obtain the mapping restricted to param names that are available - fc_par_names = forecaster.get_params().keys() - pe_par_names = fitted_params.keys() - - params = self.params - if params is None: - param_map = {x: x for x in fitted_params.keys()} - elif isinstance(params, str): - param_map = {params: params} - elif isinstance(params, list): - param_map = {x: x for x in params} - elif isinstance(params, dict): - param_map = params - else: - raise TypeError("params must be None, a str, a list of str, or a dict") - - param_map = {x: param_map[x] for x in param_map.keys() if x in fc_par_names} - param_map = { - x: param_map[x] for x in param_map.keys() if param_map[x] in pe_par_names - } + param_map = _resolve_param_map(param_est, forecaster, self.params) self.param_map_ = param_map # obtain the values of fitted params, and set forecaster to those @@ -259,7 +270,7 @@ def get_test_params(cls, parameter_set="default"): parameter_set : str, default="default" Name of the set of test parameters to return, for use in tests. If no special parameters are defined for a value, will return `"default"` set. - There are currently no reserved values for transformers. + There are currently no reserved values for forecasters. Returns ------- @@ -278,7 +289,7 @@ def get_test_params(cls, parameter_set="default"): params1 = { "forecaster": NaiveForecaster(), "param_est": FixedParams({"foo": 12}), - "params": {"foo": "sp"}, + "params": {"sp": "foo"}, } params = [params1] diff --git a/sktime/param_est/plugin/_transformer.py b/sktime/param_est/plugin/_transformer.py new file mode 100644 index 00000000000..aa02c51987c --- /dev/null +++ b/sktime/param_est/plugin/_transformer.py @@ -0,0 +1,241 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Plugin composite for substituting parameter estimator fit into transformers.""" + +__author__ = ["fkiraly"] +__all__ = ["PluginParamsTransformer"] + +from inspect import signature + +from sktime.param_est.plugin._common import _resolve_param_map +from sktime.transformations._delegate import _DelegatedTransformer + + +class PluginParamsTransformer(_DelegatedTransformer): + """Plugs parameters from a parameter estimator into a transformer. + + In ``fit``, first fits ``param_est`` to data passed: + + * ``X`` of ``fit`` is passed as the first arg to ``param_est.fit`` + * ``y`` of ``fit`` is passed as the second arg to ``param_est.fit``, + if ``param_est.fit`` has a second arg + + Then, does ``transformer.set_params`` with desired/selected parameters. + Parameters of the fitted `param_est` are passed on to ``transformer``, + from/to pairs are as specified by the `params` parameter of ``self``, see below. + + Then, fits ``transformer`` to the data passed in ``fit``. + + After that, behaves identically to ``transformer`` with those parameters set. + + Example: ``param_est`` seasonality test to determine ``sp`` parameter; + ``transformer`` a transformer with an ``sp`` parameter, e.g., ``Deseasonalizer``. + + Parameters + ---------- + param_est : sktime estimator object with a fit method, inheriting from BaseEstimator + e.g., estimator inheriting from BaseParamFitter or transformer + this is a "blueprint" estimator, state does not change when `fit` is called + + transformer : sktime transformer, i.e., estimator inheriting from BaseTransformer + this is a "blueprint" estimator, state does not change when `fit` is called + + params : None, str, list of str, dict with str values/keys, optional, default=None + determines which parameters from ``param_est`` are plugged into trafo and where + None: all parameters of param_est are plugged into transformer + only parameters present in both ``transformer`` and ``param_est`` are plugged in + list of str: parameters in the list are plugged into parameters of the same name + only parameters present in both ``transformer`` and ``param_est`` are plugged in + str: considered as a one-element list of str with the string as single element + dict: parameter with name of value is plugged into parameter with name of key + only keys present in ``param_est`` and values in ``transformer`` are plugged in + + Attributes + ---------- + param_est_ : sktime parameter estimator, clone of estimator in ``param_est`` + this clone is fitted in the pipeline when ``fit`` is called + transformer_ : sktime transformer, clone of ``transformer`` + this clone is fitted in the pipeline when ``fit`` is called + param_map_ : dict + mapping of parameters from `param_est_` to `transformer_` used in `fit`, + after filtering for parameters present in both + + Examples + -------- + >>> from sktime.datasets import load_airline + >>> from sktime.param_est.plugin import PluginParamsTransformer + >>> from sktime.param_est.seasonality import SeasonalityACF + >>> from sktime.transformations.series.detrend import Deseasonalizer + >>> from sktime.transformations.series.difference import Differencer + >>> + >>> X = load_airline() # doctest: +SKIP + >>> + >>> # sp_est is a seasonality estimator + >>> # ACF assumes stationarity so we concat with differencing first + >>> sp_est = Differencer() * SeasonalityACF() # doctest: +SKIP + + >>> # trafo is a forecaster with a "sp" parameter which we want to tune + >>> trafo = Deseasonalizer() # doctest: +SKIP + >>> sp_auto = PluginParamsTransformer(sp_est, trafo) # doctest: +SKIP + >>> + >>> # fit sp_auto to data, transform, and inspect the tuned sp parameter + >>> sp_auto.fit(X) # doctest: +SKIP + PluginParamsTransformer(...) + >>> Xt = sp_auto.transform(X) # doctest: +SKIP + >>> sp_auto.transformer_.get_params()["sp"] # doctest: +SKIP + 12 + >>> # shorthand ways to specify sp_auto, via dunder, does the same + >>> sp_auto = sp_est * trafo # doctest: +SKIP + >>> # or entire pipeline in one go + >>> sp_auto = Differencer() * SeasonalityACF() * Deseasonalizer() # doctest: +SKIP + + using dictionary to plug "foo" parameter into "sp" + + >>> from sktime.param_est.fixed import FixedParams + >>> sp_plugin = PluginParamsTransformer( + ... FixedParams({"foo": 12}), Deseasonalizer(), params={"sp": "foo"} + ... ) # doctest: +SKIP + """ + + _tags = { + "authors": "fkiraly", + "capability:inverse_transform": True, # can the transformer inverse transform? + "univariate-only": False, # can the transformer handle multivariate X? + "fit_is_empty": False, # is fit empty and can be skipped? Yes = True + "remember_data": False, # whether all data seen is remembered as self._X + } + + # attribute for _DelegatedTransformer, which then delegates + # all non-overridden methods to those of same name in self.transformer_ + # see further details in _DelegatedTransformer docstring + _delegate_name = "transformer_" + + def __init__(self, param_est, transformer, params=None, update_params=False): + self.param_est = param_est + self.param_est_ = param_est.clone() + self.transformer = transformer + self.transformer_ = transformer.clone() + self.params = params + self.update_params = update_params + + super().__init__() + + TAGS_TO_CLONE = [ + "scitype:transform-input", + "scitype:transform-output", + "scitype:transform-labels", + "scitype:instancewise", + "capability:inverse_transform", + "capability:inverse_transform:range", + "capability:inverse_transform:exact", + "univariate-only", + "y_inner_mtype", + "requires_y", + "enforce_index_type", + "X-y-must-have-same-index", + "transform-returns-same-time-index", + "skip-inverse-transform", + "capability:unequal_length", + "capability:unequal_length:removes", + "handles-missing-data", + "capability:missing_values:removes", + ] + + self.clone_tags(self.transformer_, TAGS_TO_CLONE) + + # todo: only works for single series now + # think about how to deal with vectorization later + SERIES_MTYPES = ["pd.DataFrame", "pd.Series", "np.ndarray"] + self.set_tags(**{"X_inner_mtype": SERIES_MTYPES}) + + if self.get_tags()["y_inner_mtype"] not in [None, "None"]: + self.set_tags(**{"y_inner_mtype": SERIES_MTYPES}) + + def _fit(self, X, y=None): + """Fit transformer to X and y. + + private _fit containing the core logic, called from fit + + Parameters + ---------- + X : Series or Panel of mtype X_inner_mtype + if X_inner_mtype is list, _fit must support all types in it + Data to fit transform to + y : Series or Panel of mtype y_inner_mtype, default=None + Additional data, e.g., labels for transformation + + Returns + ------- + self: reference to self + """ + # reference to delegate + transformer = self._get_delegate() + + # fit the parameter estimator to X + param_est = self.param_est_ + + # map args X, y onto inner signature + # X is passed always + # y is passed if param_est fit has at least two arguments + inner_params = list(signature(param_est.fit).parameters.keys()) + fit_kwargs = {} + if len(inner_params) > 1: + fit_kwargs[inner_params[1]] = y + + param_est.fit(X, **fit_kwargs) + fitted_params = param_est.get_fitted_params() + + param_map = _resolve_param_map(param_est, transformer, self.params) + self.param_map_ = param_map + + # obtain the values of fitted params, and set transformer to those + new_params = {k: fitted_params[v] for k, v in param_map.items()} + transformer.set_params(**new_params) + + # fit the transformer, with the fitted parameter values + transformer.fit(X=X, y=y) + return self + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are currently no reserved values for transformers. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + from sktime.param_est.fixed import FixedParams + from sktime.param_est.seasonality import SeasonalityACF + from sktime.transformations.series.detrend import Deseasonalizer + from sktime.transformations.series.exponent import ExponentTransformer + from sktime.utils.validation._dependencies import _check_estimator_deps + + # use of dictionary to plug "foo" parameter into "power", uses mock param_est + params1 = { + "transformer": ExponentTransformer(), + "param_est": FixedParams({"foo": 12}), + "params": {"power": "foo"}, + } + params = [params1] + + # uses a "real" param est that depends on statsmodels, requires statsmodels + if _check_estimator_deps(SeasonalityACF, severity="none"): + # explicit reference to a parameter "sp", present in both estimators + params2 = { + "transformer": Deseasonalizer(), + "param_est": SeasonalityACF(), + "params": "sp", + } + params = params + [params2] + + return params diff --git a/sktime/param_est/plugin/tests/__init__.py b/sktime/param_est/plugin/tests/__init__.py new file mode 100644 index 00000000000..8f604b2023b --- /dev/null +++ b/sktime/param_est/plugin/tests/__init__.py @@ -0,0 +1,2 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Tests for plugin composites for parameter estimators.""" diff --git a/sktime/param_est/plugin/tests/test_plugin.py b/sktime/param_est/plugin/tests/test_plugin.py new file mode 100644 index 00000000000..ca608c37d71 --- /dev/null +++ b/sktime/param_est/plugin/tests/test_plugin.py @@ -0,0 +1,107 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Tests for plugin composites for parameter estimators.""" + +import pandas as pd +import pytest + +from sktime.param_est.seasonality import SeasonalityACF +from sktime.utils.validation._dependencies import _check_estimator_deps + + +@pytest.mark.skipif( + not _check_estimator_deps(SeasonalityACF, severity="none"), + reason="skip test if required soft dependencies not available", +) +def test_plugin_fcst(): + """Test PluginParamsForecaster - same as docstring.""" + from sktime.datasets import load_airline + from sktime.forecasting.naive import NaiveForecaster + from sktime.param_est.fixed import FixedParams + from sktime.param_est.plugin import PluginParamsForecaster + from sktime.transformations.series.difference import Differencer + + y = load_airline() + + # sp_est is a seasonality estimator + # ACF assumes stationarity so we concat with differencing first + sp_est = Differencer() * SeasonalityACF() + + # fcst is a forecaster with a "sp" parameter which we want to tune + fcst = NaiveForecaster() + + # sp_auto is auto-tuned via PluginParamsForecaster + sp_auto = PluginParamsForecaster(sp_est, fcst) + + # fit sp_auto to data, predict, and inspect the tuned sp parameter + sp_auto.fit(y, fh=[1, 2, 3]) + y_pred = sp_auto.predict() + assert isinstance(y_pred, pd.Series) + + assert sp_auto.forecaster_.get_params()["sp"] == 12 + + # shorthand ways to specify sp_auto, via dunder, does the same + sp_auto2 = sp_est * fcst + assert isinstance(sp_auto2, PluginParamsForecaster) + assert sp_auto2 == sp_auto + + # or entire pipeline in one go + sp_auto3 = Differencer() * SeasonalityACF() * NaiveForecaster() + assert isinstance(sp_auto3, PluginParamsForecaster) + assert sp_auto3 == sp_auto + + # plugin with dict + sp_plugin = PluginParamsForecaster( + FixedParams({"foo": 42}), NaiveForecaster(), params={"sp": "foo"} + ) + + sp_plugin.fit(y, fh=[1, 2, 3]) + assert sp_plugin.forecaster_.get_params()["sp"] == 42 + + +@pytest.mark.skipif( + not _check_estimator_deps(SeasonalityACF, severity="none"), + reason="skip test if required soft dependencies not available", +) +def test_plugin_trafo(): + """Test PluginParamsTransformer - same as docstring.""" + from sktime.datasets import load_airline + from sktime.param_est.fixed import FixedParams + from sktime.param_est.plugin import PluginParamsTransformer + from sktime.param_est.seasonality import SeasonalityACF + from sktime.transformations.series.detrend import Deseasonalizer + from sktime.transformations.series.difference import Differencer + + X = load_airline() + + # sp_est is a seasonality estimator + # ACF assumes stationarity so we concat with differencing first + sp_est = Differencer() * SeasonalityACF() + + # trafo is a forecaster with a "sp" parameter which we want to tune + trafo = Deseasonalizer() + sp_auto = PluginParamsTransformer(sp_est, trafo) + + # fit sp_auto to data, transform, and inspect the tuned sp parameter + sp_auto.fit(X) + + Xt = sp_auto.transform(X) + assert isinstance(Xt, pd.Series) + assert sp_auto.transformer_.get_params()["sp"] == 12 + + # shorthand ways to specify sp_auto, via dunder, does the same + sp_auto2 = sp_est * trafo + assert isinstance(sp_auto2, PluginParamsTransformer) + assert sp_auto2 == sp_auto + + # or entire pipeline in one go + sp_auto3 = Differencer() * SeasonalityACF() * Deseasonalizer() + assert isinstance(sp_auto3, PluginParamsTransformer) + assert sp_auto3 == sp_auto + + # plugin with dict + sp_plugin = PluginParamsTransformer( + FixedParams({"foo": 42}), Deseasonalizer(), params={"sp": "foo"} + ) + + sp_plugin.fit(X) + assert sp_plugin.transformer_.get_params()["sp"] == 42 diff --git a/sktime/param_est/seasonality.py b/sktime/param_est/seasonality.py index a6174c302c8..b6540d22b9a 100644 --- a/sktime/param_est/seasonality.py +++ b/sktime/param_est/seasonality.py @@ -87,6 +87,7 @@ class SeasonalityACF(BaseParamFitter): """ _tags = { + "authors": "fkiraly", "X_inner_mtype": "pd.Series", # which types do _fit/_predict, support for X? "scitype:X": "Series", # which X scitypes are supported natively? "capability:missing_values": True, # can estimator handle missing data? @@ -273,6 +274,7 @@ class SeasonalityACFqstat(BaseParamFitter): """ _tags = { + "authors": "fkiraly", "X_inner_mtype": "pd.Series", # which types do _fit/_predict, support for X? "scitype:X": "Series", # which X scitypes are supported natively? "capability:missing_values": True, # can estimator handle missing data? @@ -452,6 +454,8 @@ class SeasonalityPeriodogram(BaseParamFitter): """ _tags = { + "authors": ["blazingbhavneek"], + "maintainers": ["blaingbhavneek"], "X_inner_mtype": "pd.Series", "scitype:X": "Series", "capability:missing_values": True, diff --git a/sktime/param_est/stationarity/__init__.py b/sktime/param_est/stationarity/__init__.py new file mode 100644 index 00000000000..ef6f4343cb6 --- /dev/null +++ b/sktime/param_est/stationarity/__init__.py @@ -0,0 +1,23 @@ +"""Module for parameter estimators of stationarity tests.""" + +__author__ = ["fkiraly", "Vasudeva-bit"] +__all__ = [ + "StationarityADF", + "StationarityKPSS", + "StationarityADFArch", + "StationarityDFGLS", + "StationarityPhillipsPerron", + "StationarityKPSSArch", + "StationarityZivotAndrews", + "StationarityVarianceRatio", +] + +from sktime.param_est.stationarity._arch import ( + StationarityADFArch, + StationarityDFGLS, + StationarityKPSSArch, + StationarityPhillipsPerron, + StationarityVarianceRatio, + StationarityZivotAndrews, +) +from sktime.param_est.stationarity._statsmodels import StationarityADF, StationarityKPSS diff --git a/sktime/param_est/stationarity/_arch.py b/sktime/param_est/stationarity/_arch.py new file mode 100644 index 00000000000..24af0c96f6d --- /dev/null +++ b/sktime/param_est/stationarity/_arch.py @@ -0,0 +1,895 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Parameter estimators for stationarity.""" + +__author__ = ["Vasudeva-bit"] +__all__ = [ + "StationarityADFArch", + "StationarityDFGLS", + "StationarityPhillipsPerron", + "StationarityKPSSArch", + "StationarityZivotAndrews", + "StationarityVarianceRatio", +] + +from sktime.param_est.base import BaseParamFitter + + +class StationarityADFArch(BaseParamFitter): + """Test for stationarity via the Augmented Dickey-Fuller Unit Root Test (ADF). + + Direct interface to ``DFGLS`` test from the ``arch`` package. + Does not assume ARCH process, naming is due to the use of the ``arch`` package. + + Uses ``arch.unitroot.ADF`` as a test for unit roots, + and derives a boolean statement whether a series is stationary. + + Also returns test results for the unit root test as fitted parameters. + + Parameters + ---------- + lags : int, optional + The number of lags to use in the ADF regression. If omitted or None, + ``method`` is used to automatically select the lag length with no more + than ``max_lags`` are included. + trend : {"n", "c", "ct", "ctt"}, optional + The trend component to include in the test + + - "n" - No trend components + - "c" - Include a constant (Default) + - "ct" - Include a constant and linear time trend + - "ctt" - Include a constant and linear and quadratic time trends + + max_lags : int, optional + The maximum number of lags to use when selecting lag length + method : {"AIC", "BIC", "t-stat"}, optional + The method to use when selecting the lag length + + - "AIC" - Select the minimum of the Akaike IC + - "BIC" - Select the minimum of the Schwarz/Bayesian IC + - "t-stat" - Select the minimum of the Schwarz/Bayesian IC + + low_memory : bool + Flag indicating whether to use a low memory implementation of the + lag selection algorithm. The low memory algorithm is slower than + the standard algorithm but will use 2-4% of the memory required for + the standard algorithm. This options allows automatic lag selection + to be used in very long time series. If None, use automatic selection + of algorithm. + + Attributes + ---------- + stationary_ : bool + whether the series in ``fit`` is stationary according to the test + more precisely, whether the null of the ADF test is rejected at ``p_threshold`` + test_statistic_ : float + The ADF test statistic, of running ``adfuller`` on ``y`` in ``fit`` + pvalue_ : float : float + MacKinnon's approximate p-value based on MacKinnon (1994, 2010), + obtained when running ``adfuller`` on ``y`` in ``fit`` + usedlag_ : int + The number of lags used in the test. + + Examples + -------- + >>> from sktime.datasets import load_airline + >>> from sktime.param_est.stationarity import StationarityADFArch + >>> + >>> X = load_airline() # doctest: +SKIP + >>> sty_est = StationarityADFArch() # doctest: +SKIP + >>> sty_est.fit(X) # doctest: +SKIP + StationarityADFArch(...) + >>> sty_est.get_fitted_params()["stationary"] # doctest: +SKIP + False + """ + + _tags = { + "authors": ["Vasudeva-bit"], + "maintainers": ["Vasudeva-bit"], + "X_inner_mtype": ["pd.Series", "np.ndarray"], + "scitype:X": "Series", + "python_dependencies": "arch", + } + + def __init__( + self, + lags=None, + trend="c", + max_lags=None, + method="aic", + low_memory=None, + p_threshold=0.05, + ): + self.lags = lags + self.trend = trend + self.max_lags = max_lags + self.method = method + self.low_memory = low_memory + self.p_threshold = p_threshold + super().__init__() + + def _fit(self, X): + """Fit estimator and estimate parameters. + + private _fit containing the core logic, called from fit + + Writes to self: + Sets fitted model attributes ending in "_". + + Parameters + ---------- + X : {ndarray, Series} + The data to test for a unit root + + Returns + ------- + self : reference to self + """ + from arch.unitroot import ADF + + p_threshold = self.p_threshold + + result = ADF( + y=X, + lags=self.lags, + trend=self.trend, + max_lags=self.max_lags, + method=self.method, + low_memory=self.low_memory, + ) + self.test_statistic_ = result.stat + self.pvalue = result.pvalue + self.stationary_ = result.pvalue <= p_threshold + self.used_lag_ = result._lags + + return self + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are no reserved values for parameter estimators. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params1 = {} + params2 = { + "lags": 5, + "trend": "ctt", + "max_lags": 10, + "method": "t-stat", + "low_memory": True, + "p_threshold": 0.1, + } + + return [params1, params2] + + +class StationarityDFGLS(BaseParamFitter): + """Test for stationarity via the Dickey-Fuller GLS (DFGLS) Unit Root Test. + + Direct interface to ``DFGLS`` test from the ``arch`` package. + + Uses ``arch.unitroot.DFGLS`` as a test for unit roots, + and derives a boolean statement whether a series is stationary. + + Also returns test results for the unit root test as fitted parameters. + + Parameters + ---------- + lags : int, optional + The number of lags to use in the ADF regression. If omitted or None, + ``method`` is used to automatically select the lag length with no more + than ``max_lags`` are included. + trend : {"c", "ct"}, optional + The trend component to include in the test + + - "c" - Include a constant (Default) + - "ct" - Include a constant and linear time trend + + max_lags : int, optional + The maximum number of lags to use when selecting lag length. When using + automatic lag length selection, the lag is selected using OLS + detrending rather than GLS detrending ([2]_). + method : {"AIC", "BIC", "t-stat"}, optional + The method to use when selecting the lag length + + - "AIC" - Select the minimum of the Akaike IC + - "BIC" - Select the minimum of the Schwarz/Bayesian IC + - "t-stat" - Select the minimum of the Schwarz/Bayesian IC + + Attributes + ---------- + stationary_ : bool + whether the series in ``fit`` is stationary according to the test + more precisely, whether the null of the Dickey-Fuller-GLS test is rejected at + ``p_threshold`` + test_statistic_ : float + The DFGLS test statistic, of running ``DFGLS`` on ``y`` in ``fit`` + pvalue_ : float : float + p-value obtained when running ``DFGLS`` on ``y`` in ``fit`` + usedlag_ : int + The number of lags used in the test. + + Examples + -------- + >>> from sktime.datasets import load_airline + >>> from sktime.param_est.stationarity import StationarityDFGLS + >>> + >>> X = load_airline() # doctest: +SKIP + >>> sty_est = StationarityDFGLS() # doctest: +SKIP + >>> sty_est.fit(X) # doctest: +SKIP + StationarityDFGLS(...) + >>> sty_est.get_fitted_params()["stationary"] # doctest: +SKIP + False + """ + + _tags = { + "authors": ["Vasudeva-bit"], + "maintainers": ["Vasudeva-bit"], + "X_inner_mtype": ["pd.Series", "np.ndarray"], + "scitype:X": "Series", + "python_dependencies": "arch", + } + + def __init__( + self, + lags=None, + trend="c", + max_lags=None, + method="aic", + p_threshold=0.05, + ): + self.lags = lags + self.trend = trend + self.max_lags = max_lags + self.method = method + self.p_threshold = p_threshold + super().__init__() + + def _fit(self, X): + """Fit estimator and estimate parameters. + + private _fit containing the core logic, called from fit + + Writes to self: + Sets fitted model attributes ending in "_". + + Parameters + ---------- + X : {ndarray, Series} + The data to test for a unit root + + Returns + ------- + self : reference to self + """ + from arch.unitroot import DFGLS + + p_threshold = self.p_threshold + + result = DFGLS( + y=X, + lags=self.lags, + trend=self.trend, + max_lags=self.max_lags, + method=self.method, + ) + self.test_statistic_ = result.stat + self.pvalue = result.pvalue + self.stationary_ = result.pvalue <= p_threshold + self.used_lag_ = result._lags + + return self + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are no reserved values for parameter estimators. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params1 = {} + params2 = { + "lags": 5, + "trend": "ct", + "max_lags": 10, + "method": "t-stat", + "p_threshold": 0.1, + } + + return [params1, params2] + + +class StationarityPhillipsPerron(BaseParamFitter): + """Test for unit root order 1 via the Phillips-Perron Unit Root Test. + + Direct interface to ``PhillipsPerron`` test from the ``arch`` package. + + Uses ``arch.unitroot.PhillipsPerron`` as a test for unit roots, + and derives a boolean statement whether a series is stationary. + + Also returns test results for the unit root test as fitted parameters. + + Parameters + ---------- + lags : int, optional + The number of lags to use in the Newey-West estimator of the long-run + covariance. If omitted or None, the lag length is set automatically to + ``12 * (nobs/100) ** (1/4)`` + trend : {"n", "c", "ct"}, optional + The trend component to include in the test + + - "n" - No trend components + - "c" - Include a constant (Default) + - "ct" - Include a constant and linear time trend + + test_type : {"tau", "rho"} + The test to use when computing the test statistic. "tau" is based on + the t-stat and "rho" uses a test based on nobs times the re-centered + regression coefficient + + Attributes + ---------- + stationary_ : bool + whether the series in ``fit`` is integrated of order 1 + more precisely, whether the null of the Phillips-Perron test is rejected at + ``p_threshold`` + test_statistic_ : float + The PP test statistic, of running ``PhillipsPerron`` on ``y`` in ``fit`` + pvalue_ : float : float + p-value obtained when running ``PhillipsPerron`` on ``y`` in ``fit`` + usedlag_ : int + The number of lags used in the test. + + Examples + -------- + >>> from sktime.datasets import load_airline + >>> from sktime.param_est.stationarity import StationarityPhillipsPerron + >>> + >>> X = load_airline() # doctest: +SKIP + >>> sty_est = StationarityPhillipsPerron() # doctest: +SKIP + >>> sty_est.fit(X) # doctest: +SKIP + StationarityPhillipsPerron(...) + >>> sty_est.get_fitted_params()["stationary"] # doctest: +SKIP + False + """ + + _tags = { + "authors": ["Vasudeva-bit"], + "maintainers": ["Vasudeva-bit"], + "X_inner_mtype": ["pd.Series", "np.ndarray"], + "scitype:X": "Series", + "python_dependencies": "arch", + } + + def __init__( + self, + lags=None, + trend="c", + test_type="tau", + p_threshold=0.05, + ): + self.lags = lags + self.trend = trend + self.test_type = test_type + self.p_threshold = p_threshold + super().__init__() + + def _fit(self, X): + """Fit estimator and estimate parameters. + + private _fit containing the core logic, called from fit + + Writes to self: + Sets fitted model attributes ending in "_". + + Parameters + ---------- + X : {ndarray, Series} + The data to test for a unit root + + Returns + ------- + self : reference to self + """ + from arch.unitroot import PhillipsPerron + + p_threshold = self.p_threshold + + result = PhillipsPerron( + y=X, + lags=self.lags, + trend=self.trend, + test_type=self.test_type, + ) + self.test_statistic_ = result.stat + self.pvalue = result.pvalue + self.stationary_ = result.pvalue <= p_threshold + self.used_lag_ = result._lags + + return self + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are no reserved values for parameter estimators. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params1 = {} + params2 = { + "lags": 5, + "trend": "ct", + "test_type": "rho", + "p_threshold": 0.1, + } + + return [params1, params2] + + +class StationarityKPSSArch(BaseParamFitter): + """Test for stationarity via the Kwiatkowski-Phillips-Schmidt-Shin Unit Root Test. + + Direct interface to ``KPSS`` test from the ``arch`` package. + Does not assume ARCH process, naming is due to the use of the ``arch`` package. + + Uses ``arch.unitroot.KPSS`` as a test for trend-stationarity, + and derives a boolean statement whether a series is (trend-)stationary. + + Also returns test results for the unit root test as fitted parameters. + + Parameters + ---------- + lags : int, optional + The number of lags to use in the Newey-West estimator of the long-run + covariance. If omitted or None, the number of lags is calculated + with the data-dependent method of Hobijn et al. (1998). See also + Andrews (1991), Newey & West (1994), and Schwert (1989). + Set ``lags=-1`` to use the old method that only depends on the sample + size, ``12 * (nobs/100) ** (1/4)``. + trend : {"c", "ct"}, optional + The trend component to include in the ADF test + "c" - Include a constant (Default) + "ct" - Include a constant and linear time trend + + Attributes + ---------- + stationary_ : bool + whether the series in ``fit`` is stationary according to the test + more precisely, whether the null of the KPSS test is accepted at ``p_threshold`` + test_statistic_ : float + The KPSS test statistic, of running ``KPSS`` on ``y`` in ``fit`` + pvalue_ : float : float + p-value obtained when running ``KPSS`` on ``y`` in ``fit`` + usedlag_ : int + The number of lags used in the test. + + Examples + -------- + >>> from sktime.datasets import load_airline + >>> from sktime.param_est.stationarity import StationarityKPSSArch + >>> + >>> X = load_airline() # doctest: +SKIP + >>> sty_est = StationarityKPSSArch() # doctest: +SKIP + >>> sty_est.fit(X) # doctest: +SKIP + StationarityKPSSArch(...) + >>> sty_est.get_fitted_params()["stationary"] # doctest: +SKIP + True + """ + + _tags = { + "authors": ["Vasudeva-bit"], + "maintainers": ["Vasudeva-bit"], + "X_inner_mtype": ["pd.Series", "np.ndarray"], + "scitype:X": "Series", + "python_dependencies": "arch", + } + + def __init__( + self, + lags=None, + trend="c", + p_threshold=0.05, + ): + self.lags = lags + self.trend = trend + self.p_threshold = p_threshold + super().__init__() + + def _fit(self, X): + """Fit estimator and estimate parameters. + + private _fit containing the core logic, called from fit + + Writes to self: + Sets fitted model attributes ending in "_". + + Parameters + ---------- + X : {ndarray, Series} + The data to test for a unit root + + Returns + ------- + self : reference to self + """ + from arch.unitroot import KPSS + + p_threshold = self.p_threshold + + result = KPSS( + y=X, + lags=self.lags, + trend=self.trend, + ) + self.test_statistic_ = result.stat + self.pvalue = result.pvalue + self.stationary_ = result.pvalue > p_threshold + self.used_lag_ = result._lags + + return self + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are no reserved values for parameter estimators. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params1 = {} + params2 = { + "lags": 5, + "trend": "ct", + "p_threshold": 0.1, + } + + return [params1, params2] + + +class StationarityZivotAndrews(BaseParamFitter): + """Test for stationarity via the Zivot-Andrews Unit Root Test. + + Direct interface to ``ZivotAndrews`` test from the `arch` package. + + Uses ``arch.unitroot.ZivotAndrews`` as a test for unit roots, + and derives a boolean statement whether a series is stationary. + + Also returns test results for the unit root test as fitted parameters. + + Parameters + ---------- + lags : int, optional + The number of lags to use in the ADF regression. If omitted or None, + ``method`` is used to automatically select the lag length with no more + than `max_lags` are included. + trend : {"c", "t", "ct"}, optional + The trend component to include in the test + + - "c" - Include a constant (Default) + - "t" - Include a linear time trend + - "ct" - Include a constant and linear time trend + + trim : float + percentage of series at begin/end to exclude from break-period + calculation in range [0, 0.333] (default=0.15) + max_lags : int, optional + The maximum number of lags to use when selecting lag length + method : {"AIC", "BIC", "t-stat"}, optional + The method to use when selecting the lag length + + - "AIC" - Select the minimum of the Akaike IC + - "BIC" - Select the minimum of the Schwarz/Bayesian IC + - "t-stat" - Select the minimum of the Schwarz/Bayesian IC + + Attributes + ---------- + stationary_ : bool, whether the series in `fit` has a unit root + (with structural break) + more precisely, whether the null of the Zivot-Andrews test is rejected at + ``p_threshold`` + test_statistic_ : float + The ZA test statistic, of running ``ZivotAndrews`` on ``y`` in ``fit`` + pvalue_ : float : float + p-value obtained when running ``ZivotAndrews`` on ``y`` in ``fit`` + usedlag_ : int + The number of lags used in the test. + + Examples + -------- + >>> from sktime.datasets import load_airline + >>> from sktime.param_est.stationarity import StationarityZivotAndrews + >>> + >>> X = load_airline() # doctest: +SKIP + >>> sty_est = StationarityZivotAndrews() # doctest: +SKIP + >>> sty_est.fit(X) # doctest: +SKIP + StationarityZivotAndrews(...) + >>> sty_est.get_fitted_params()["stationary"] # doctest: +SKIP + False + """ + + _tags = { + "authors": ["Vasudeva-bit"], + "maintainers": ["Vasudeva-bit"], + "X_inner_mtype": ["pd.Series", "np.ndarray"], + "scitype:X": "Series", + "python_dependencies": "arch", + } + + def __init__( + self, + lags=None, + trend="c", + trim=0.15, + max_lags=None, + method="aic", + p_threshold=0.05, + ): + self.lags = lags + self.trend = trend + self.trim = trim + self.max_lags = max_lags + self.method = method + self.p_threshold = p_threshold + super().__init__() + + def _fit(self, X): + """Fit estimator and estimate parameters. + + private _fit containing the core logic, called from fit + + Writes to self: + Sets fitted model attributes ending in "_". + + Parameters + ---------- + X : {ndarray, Series} + The data to test for a unit root + + Returns + ------- + self : reference to self + """ + from arch.unitroot import ZivotAndrews + + p_threshold = self.p_threshold + + result = ZivotAndrews( + y=X, + lags=self.lags, + trend=self.trend, + trim=self.trim, + max_lags=self.max_lags, + method=self.method, + ) + self.test_statistic_ = result.stat + self.pvalue = result.pvalue + self.stationary_ = result.pvalue <= p_threshold + self.used_lag_ = result._lags + + return self + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are no reserved values for parameter estimators. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params1 = {} + params2 = { + "lags": 5, + "trend": "ct", + "trim": 0.1, + "max_lags": 10, + "method": "t-stat", + "p_threshold": 0.1, + } + + return [params1, params2] + + +class StationarityVarianceRatio(BaseParamFitter): + """Test for stationarity via the variance ratio test for random walks. + + Direct interface to ``VarianceRatio`` test from the `arch` package. + + Uses ``arch.unitroot.VarianceRatio`` as a test for unit roots, + and derives a boolean statement whether a series is stationary. + + Also returns test results for the unit root test as fitted parameters. + + Parameters + ---------- + lags : int + The number of periods to used in the multi-period variance, which is + the numerator of the test statistic. Must be at least 2 + trend : {"n", "c"}, optional + "c" allows for a non-zero drift in the random walk, while "n" requires + that the increments to y are mean 0 + overlap : bool, optional + Indicates whether to use all overlapping blocks. Default is True. If + False, the number of observations in y minus 1 must be an exact + multiple of lags. If this condition is not satisfied, some values at + the end of y will be discarded. + robust : bool, optional + Indicates whether to use heteroskedasticity robust inference. Default + is True. + debiased : bool, optional + Indicates whether to use a debiased version of the test. Default is + True. Only applicable if overlap is True. + + Attributes + ---------- + stationary_ : bool + whether the series in ``fit`` is stationary according to the test + more precisely, whether the null of the variance ratio test is accepted at + ``p_threshold`` + test_statistic_ : float + The VR test statistic, of running ``VarianceRatio`` on ``y`` in ``fit`` + pvalue_ : float : float + p-value obtained when running ``VarianceRatio`` on ``y`` in ``fit`` + usedlag_ : int + The number of lags used in the test. + + Examples + -------- + >>> from sktime.datasets import load_airline + >>> from sktime.param_est.stationarity import StationarityVarianceRatio + >>> + >>> X = load_airline() # doctest: +SKIP + >>> sty_est = StationarityVarianceRatio() # doctest: +SKIP + >>> sty_est.fit(X) # doctest: +SKIP + StationarityVarianceRatio(...) + >>> sty_est.get_fitted_params()["stationary"] # doctest: +SKIP + True + """ + + _tags = { + "authors": ["Vasudeva-bit"], + "maintainers": ["Vasudeva-bit"], + "X_inner_mtype": ["pd.Series", "np.ndarray"], + "scitype:X": "Series", + "python_dependencies": "arch", + } + + def __init__( + self, + lags=2, + trend="c", + overlap=True, + robust=True, + debiased=True, + p_threshold=0.05, + ): + self.lags = lags + self.trend = trend + self.overlap = overlap + self.robust = robust + self.debiased = debiased + self.p_threshold = p_threshold + super().__init__() + + def _fit(self, X): + """Fit estimator and estimate parameters. + + private _fit containing the core logic, called from fit + + Writes to self: + Sets fitted model attributes ending in "_". + + Parameters + ---------- + X : {ndarray, Series} + The data to test for a unit root + + Returns + ------- + self : reference to self + """ + from arch.unitroot import VarianceRatio + + p_threshold = self.p_threshold + + result = VarianceRatio( + y=X, + lags=self.lags, + trend=self.trend, + overlap=self.overlap, + robust=self.robust, + debiased=self.debiased, + ) + self.test_statistic_ = result.stat + self.pvalue = result.pvalue + self.stationary_ = result.pvalue > p_threshold + self.used_lag_ = result._lags + + return self + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are no reserved values for parameter estimators. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params1 = {} + params2 = { + "lags": 5, + "overlap": False, + "robust": False, + "debiased": False, + "p_threshold": 0.1, + } + + return [params1, params2] diff --git a/sktime/param_est/stationarity.py b/sktime/param_est/stationarity/_statsmodels.py similarity index 90% rename from sktime/param_est/stationarity.py rename to sktime/param_est/stationarity/_statsmodels.py index 8eef716134d..49523100568 100644 --- a/sktime/param_est/stationarity.py +++ b/sktime/param_est/stationarity/_statsmodels.py @@ -2,7 +2,10 @@ """Parameter estimators for stationarity.""" __author__ = ["fkiraly"] -__all__ = ["StationarityADF", "StationarityKPSS"] +__all__ = [ + "StationarityADF", + "StationarityKPSS", +] from sktime.param_est.base import BaseParamFitter @@ -10,7 +13,7 @@ class StationarityADF(BaseParamFitter): """Test for stationarity via the Augmented Dickey-Fuller Unit Root Test (ADF). - Uses `statsmodels.tsa.stattools.adfuller` as a test for unit roots, + Uses ``statsmodels.tsa.stattools.adfuller`` as a test for unit roots, and derives a boolean statement whether a series is stationary. Also returns test results for the unit root test as fitted parameters. @@ -43,13 +46,14 @@ class StationarityADF(BaseParamFitter): Attributes ---------- - stationary_ : bool, whether the series in `fit` is stationary according to the test - more precisely, whether the null of the ADF test is rejected at `p_threshold` + stationary_ : bool + whether the series in ``fit`` is stationary according to the test + more precisely, whether the null of the ADF test is rejected at ``p_threshold`` test_statistic_ : float - The ADF test statistic, of running `adfuller` on `y` in `fit` + The ADF test statistic, of running ``adfuller`` on ``y`` in ``fit`` pvalue_ : float : float MacKinnon's approximate p-value based on MacKinnon (1994, 2010), - obtained when running `adfuller` on `y` in `fit` + obtained when running `adfuller` on ``y`` in ``fit`` usedlag_ : int The number of lags used in the test. @@ -62,11 +66,12 @@ class StationarityADF(BaseParamFitter): >>> sty_est = StationarityADF() # doctest: +SKIP >>> sty_est.fit(X) # doctest: +SKIP StationarityADF(...) - >>> sp_est.get_fitted_params()["stationary"] # doctest: +SKIP + >>> sty_est.get_fitted_params()["stationary"] # doctest: +SKIP False """ _tags = { + "authors": "fkiraly", "X_inner_mtype": "pd.Series", # which types do _fit/_predict, support for X? "scitype:X": "Series", # which X scitypes are supported natively? "capability:missing_values": False, # can estimator handle missing data? @@ -154,7 +159,7 @@ def get_test_params(cls, parameter_set="default"): class StationarityKPSS(BaseParamFitter): """Test for stationarity via the Kwiatkowski-Phillips-Schmidt-Shin Test. - Uses `statsmodels.tsa.stattools.kpss` as a test for trend-stationairty, + Uses ``statsmodels.tsa.stattools.kpss`` as a test for trend-stationarity, and derives a boolean statement whether a series is (trend-)stationary. Also returns test results for the trend-stationarity test as fitted parameters. @@ -180,12 +185,13 @@ class StationarityKPSS(BaseParamFitter): Attributes ---------- - stationary_ : bool, whether the series in `fit` is stationary according to the test - more precisely, whether the null of the KPSS test is accepted at `p_threshold` + stationary_ : bool + whether the series in ``fit`` is stationary according to the test + more precisely, whether the null of the KPSS test is accepted at ``p_threshold`` test_statistic_ : float - The KPSS test statistic, of running `kpss` on `y` in `fit` + The KPSS test statistic, of running ``kpss`` on ``y`` in ``fit`` pvalue_ : float : float - The p-value of the KPSS test, of running `kpss` on `y` in `fit`. + The p-value of the KPSS test, of running ``kpss`` on ``y`` in ``fit``. The p-value is interpolated from Table 1 in Kwiatkowski et al. (1992), and a boundary point is returned if the test statistic is outside the table of critical values, that is, if the p-value is outside the interval (0.01, 0.1). @@ -201,11 +207,12 @@ class StationarityKPSS(BaseParamFitter): >>> sty_est = StationarityKPSS() # doctest: +SKIP >>> sty_est.fit(X) # doctest: +SKIP StationarityKPSS(...) - >>> sp_est.get_fitted_params()["stationary"] # doctest: +SKIP + >>> sty_est.get_fitted_params()["stationary"] # doctest: +SKIP False """ _tags = { + "authors": "fkiraly", "X_inner_mtype": "pd.Series", # which types do _fit/_predict, support for X? "scitype:X": "Series", # which X scitypes are supported natively? "capability:missing_values": False, # can estimator handle missing data? diff --git a/sktime/performance_metrics/base/_base.py b/sktime/performance_metrics/base/_base.py index cc90b1cca02..8d2840b8960 100644 --- a/sktime/performance_metrics/base/_base.py +++ b/sktime/performance_metrics/base/_base.py @@ -13,7 +13,11 @@ class BaseMetric(BaseObject): Extends sktime BaseObject. """ - _tags = {"object_type": "metric"} + _tags = { + "object_type": "metric", + "authors": "sktime developers", # author(s) of the object + "maintainers": "sktime developers", # current maintainer(s) of the object + } def __init__(self): super().__init__() diff --git a/sktime/performance_metrics/forecasting/_classes.py b/sktime/performance_metrics/forecasting/_classes.py index 08d3313837f..9e4eec76ddb 100644 --- a/sktime/performance_metrics/forecasting/_classes.py +++ b/sktime/performance_metrics/forecasting/_classes.py @@ -273,12 +273,17 @@ def _evaluate_vectorized(self, y_true, y_pred, **kwargs): y_pred : VectorizedDF non-time-like instances of y_true, y_pred must be identical """ + backend = dict() + backend["backend"] = self.get_config()["backend:parallel"] + backend["backend_params"] = self.get_config()["backend:parallel:params"] + eval_result = y_true.vectorize_est( estimator=self.clone(), method="_evaluate", varname_of_self="y_true", args={**kwargs, "y_pred": y_pred}, colname_default=self.name, + **backend, ) if isinstance(self.multioutput, str) and self.multioutput == "raw_values": @@ -305,6 +310,10 @@ def _evaluate_by_index_vectorized(self, y_true, y_pred, **kwargs): y_pred : VectorizedDF non-time-like instances of y_true, y_pred must be identical """ + backend = dict() + backend["backend"] = self.get_config()["backend:parallel"] + backend["backend_params"] = self.get_config()["backend:parallel:params"] + eval_result = y_true.vectorize_est( estimator=self.clone().set_params(**{"multilevel": "uniform_average"}), method="_evaluate_by_index", @@ -312,6 +321,7 @@ def _evaluate_by_index_vectorized(self, y_true, y_pred, **kwargs): args={**kwargs, "y_pred": y_pred}, colname_default=self.name, return_type="list", + **backend, ) eval_result = y_true.reconstruct(eval_result) @@ -573,6 +583,10 @@ def _evaluate(self, y_true, y_pred, **kwargs): else: func = self.func + return self._evaluate_func(func=func, y_true=y_true, y_pred=y_pred, **params) + + def _evaluate_func(self, func, y_true, y_pred, **params): + """Call func with kwargs subset to func parameters.""" # import here for now to avoid interaction with getmembers in tests # todo: clean up ancient getmembers in test_metrics_classes from functools import partial @@ -584,6 +598,15 @@ def _evaluate(self, y_true, y_pred, **kwargs): func_params = func_params.intersection(params.keys()) params = {key: params[key] for key in func_params} + # deal with sklearn specific parameter constraints + # as these are a decorator, they obfuscate python native inspection + # via signature, so have to be dealt with separately + if hasattr(func, "_skl_parameter_constraints"): + constr = func._skl_parameter_constraints + if isinstance(constr, dict): + constr_params = set(constr.keys()).intersection(params.keys()) + params = {key: params[key] for key in constr_params} + res = func(y_true=y_true, y_pred=y_pred, **params) return res @@ -609,6 +632,16 @@ def __init__( self.set_tags(**{"lower_is_better": lower_is_better}) + def _evaluate(self, y_true, y_pred, **kwargs): + """Evaluate the desired metric on given inputs.""" + # this dict should contain all parameters + params = kwargs + params.update({"multioutput": self.multioutput, "multilevel": self.multilevel}) + + func = self.func + + return self._evaluate_func(func=func, y_true=y_true, y_pred=y_pred, **params) + @classmethod def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. @@ -1830,6 +1863,56 @@ def __init__( self.symmetric = symmetric super().__init__(multioutput=multioutput, multilevel=multilevel) + def _evaluate_by_index(self, y_true, y_pred, **kwargs): + """Return the metric evaluated at each time point. + + private _evaluate_by_index containing core logic, called from evaluate_by_index + + Parameters + ---------- + y_true : time series in sktime compatible pandas based data container format + Ground truth (correct) target values + y can be in one of the following formats: + Series scitype: pd.DataFrame + Panel scitype: pd.DataFrame with 2-level row MultiIndex + Hierarchical scitype: pd.DataFrame with 3 or more level row MultiIndex + y_pred :time series in sktime compatible data container format + Forecasted values to evaluate + must be of same format as y_true, same indices and columns if indexed + + Returns + ------- + loss : pd.Series or pd.DataFrame + Calculated metric, by time point (default=jackknife pseudo-values). + pd.Series if self.multioutput="uniform_average" or array-like + index is equal to index of y_true + entry at index i is metric at time i, averaged over variables + pd.DataFrame if self.multioutput="raw_values" + index and columns equal to those of y_true + i,j-th entry is metric at time i, at variable j + """ + multioutput = self.multioutput + symmetric = self.symmetric + + numer_values = (y_true - y_pred).abs() + + if symmetric: + denom_values = (y_true.abs() + y_pred.abs()) / 2 + else: + denom_values = y_true.abs() + + raw_values = numer_values / denom_values + + if isinstance(multioutput, str): + if multioutput == "raw_values": + return raw_values + + if multioutput == "uniform_average": + return raw_values.mean(axis=1) + + # else, we expect multioutput to be array-like + return raw_values.dot(multioutput) + @classmethod def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/sktime/performance_metrics/forecasting/_functions.py b/sktime/performance_metrics/forecasting/_functions.py index 67742950c25..2e8966be22e 100644 --- a/sktime/performance_metrics/forecasting/_functions.py +++ b/sktime/performance_metrics/forecasting/_functions.py @@ -8,6 +8,7 @@ """ import numpy as np +import sklearn from scipy.stats import gmean from sklearn.metrics import mean_absolute_error as _mean_absolute_error from sklearn.metrics import mean_squared_error as _mean_squared_error @@ -18,6 +19,9 @@ from sktime.utils.stats import _weighted_geometric_mean +if sklearn.__version__ >= "1.4.0": + from sklearn.metrics import root_mean_squared_error as _root_mean_squared_error + __author__ = ["mloning", "tch", "RNKuhns"] __all__ = [ "relative_loss", @@ -998,18 +1002,24 @@ def mean_squared_error( >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7], square_root=True) 0.8936491673103708 """ - # Scikit-learn argument `squared` returns MSE when True and RMSE when False - # Scikit-time argument `square_root` returns RMSE when True and MSE when False - # Therefore need to pass the opposite of square_root as squared argument - # to the scikit-learn function being wrapped - squared = not square_root - return _mean_squared_error( - y_true, - y_pred, - sample_weight=horizon_weight, - multioutput=multioutput, - squared=squared, - ) + metric_args = (y_true, y_pred) + metric_kwargs = {"sample_weight": horizon_weight, "multioutput": multioutput} + + if not square_root: + metric_function = _mean_squared_error + elif sklearn.__version__ < "1.4.0": + # Scikit-learn argument `squared` returns MSE when True and RMSE when False + # Scikit-time argument `square_root` returns RMSE when True and MSE when False + # Therefore need to pass the opposite of square_root as squared argument + # to the scikit-learn function being wrapped + metric_function = _mean_squared_error + metric_kwargs["squared"] = False + else: + # sklearn 1.4 introduced ``root_mean_squared_error`` function, so we can + # use that directly to avoid DeprecationWarning from sklearn 1.6 + metric_function = _root_mean_squared_error + + return metric_function(*metric_args, **metric_kwargs) def median_absolute_error( diff --git a/sktime/performance_metrics/forecasting/probabilistic/_classes.py b/sktime/performance_metrics/forecasting/probabilistic/_classes.py index 2c6d07223c1..9208d73c318 100644 --- a/sktime/performance_metrics/forecasting/probabilistic/_classes.py +++ b/sktime/performance_metrics/forecasting/probabilistic/_classes.py @@ -119,16 +119,16 @@ def evaluate(self, y_true, y_pred, multioutput=None, **kwargs): if isinstance(multioutput, str): if self.score_average and multioutput == "uniform_average": - out = float(out.mean(axis=1).iloc[0]) # average over all + out = out.mean(axis=1).iloc[0] # average over all if self.score_average and multioutput == "raw_values": - out = out.groupby(axis=1, level=0).mean() # average over scores + out = out.T.groupby(level=0).mean().T # average over scores if not self.score_average and multioutput == "uniform_average": - out = out.groupby(axis=1, level=1).mean() # average over variables + out = out.T.groupby(level=1).mean().T # average over variables if not self.score_average and multioutput == "raw_values": out = out # don't average else: # is np.array with weights if self.score_average: - out_raw = out.groupby(axis=1, level=0).mean() + out_raw = out.T.groupby(level=0).mean().T out = out_raw.dot(multioutput)[0] else: out = _groupby_dot(out, multioutput) @@ -215,14 +215,14 @@ def evaluate_by_index(self, y_true, y_pred, multioutput=None, **kwargs): if self.score_average and multioutput == "uniform_average": out = out.mean(axis=1) # average over all if self.score_average and multioutput == "raw_values": - out = out.groupby(axis=1, level=0).mean() # average over scores + out = out.T.groupby(level=0).mean().T # average over scores if not self.score_average and multioutput == "uniform_average": - out = out.groupby(axis=1, level=1).mean() # average over variables + out = out.T.groupby(level=1).mean().T # average over variables if not self.score_average and multioutput == "raw_values": out = out # don't average else: # numpy array if self.score_average: - out_raw = out.groupby(axis=1, level=0).mean() + out_raw = out.T.groupby(level=0).mean().T out = out_raw.dot(multioutput) else: out = _groupby_dot(out, multioutput) diff --git a/sktime/performance_metrics/forecasting/tests/test_metrics.py b/sktime/performance_metrics/forecasting/tests/test_metrics.py index 1b2d07c4fcd..12f9709bf8f 100644 --- a/sktime/performance_metrics/forecasting/tests/test_metrics.py +++ b/sktime/performance_metrics/forecasting/tests/test_metrics.py @@ -129,3 +129,18 @@ def test_make_scorer(): scorer = make_forecasting_scorer(rmsle, name="RMSLE") scorer.evaluate(pd.Series([1, 2, 3]), pd.Series([1, 2, 4])) + + +def test_make_scorer_sklearn(): + """Test make_forecasting_scorer and the failure case in #5715. + + Naive adaptation fails on newer sklearn versions due to + decoration with sklearn's custom input constraint wrapper. + """ + from sklearn.metrics import mean_absolute_error + + from sktime.performance_metrics.forecasting import make_forecasting_scorer + + scorer = make_forecasting_scorer(mean_absolute_error, name="RMSLE") + + scorer.evaluate(pd.Series([1, 2, 3]), pd.Series([1, 2, 4])) diff --git a/sktime/performance_metrics/tests/test_metrics_classes.py b/sktime/performance_metrics/tests/test_metrics_classes.py index 1b5d8a6aeab..1eeda271982 100644 --- a/sktime/performance_metrics/tests/test_metrics_classes.py +++ b/sktime/performance_metrics/tests/test_metrics_classes.py @@ -14,6 +14,7 @@ from sktime.utils._testing.hierarchical import _make_hierarchical from sktime.utils._testing.panel import _make_panel from sktime.utils._testing.series import _make_series +from sktime.utils.parallel import _get_parallel_test_fixtures metric_classes = getmembers(_classes, isclass) @@ -24,6 +25,9 @@ MULTIOUTPUT = ["uniform_average", "raw_values", "numpy"] +# list of parallelization backends to test +BACKENDS = _get_parallel_test_fixtures("config") + @pytest.mark.parametrize("n_columns", [1, 2]) @pytest.mark.parametrize("multioutput", MULTIOUTPUT) @@ -76,12 +80,13 @@ def test_metric_output_direct(metric, multioutput, n_columns): assert np.allclose(res[1], res[2]) +@pytest.mark.parametrize("backend", BACKENDS) @pytest.mark.parametrize("n_columns", [1, 2]) @pytest.mark.parametrize( "multilevel", ["uniform_average", "uniform_average_time", "raw_values"] ) @pytest.mark.parametrize("multioutput", MULTIOUTPUT) -def test_metric_hierarchical(multioutput, multilevel, n_columns): +def test_metric_hierarchical(multioutput, multilevel, n_columns, backend): """Test hierarchical input for metrics.""" # create numpy weights based on n_columns if multioutput == "numpy": @@ -94,6 +99,7 @@ def test_metric_hierarchical(multioutput, multilevel, n_columns): y_true = _make_hierarchical(random_state=42, n_columns=n_columns) metric = MeanSquaredError(multioutput=multioutput, multilevel=multilevel) + metric.set_config(**backend) res = metric( y_true=y_true, @@ -183,10 +189,11 @@ def test_metric_output_by_instance(metric, multioutput, n_columns): assert (res.index == y_true.index).all() +@pytest.mark.parametrize("backend", BACKENDS) @pytest.mark.parametrize("n_columns", [1, 2]) @pytest.mark.parametrize("multilevel", ["uniform_average", "raw_values"]) @pytest.mark.parametrize("multioutput", MULTIOUTPUT) -def test_metric_hierarchical_by_index(multioutput, multilevel, n_columns): +def test_metric_hierarchical_by_index(multioutput, multilevel, n_columns, backend): """Test hierarchical input for metrics.""" # create numpy weights based on n_columns if multioutput == "numpy": @@ -199,6 +206,7 @@ def test_metric_hierarchical_by_index(multioutput, multilevel, n_columns): y_true = _make_hierarchical(random_state=42, n_columns=n_columns) metric = MeanSquaredError(multioutput=multioutput, multilevel=multilevel) + metric.set_config(**backend) res = metric.evaluate_by_index( y_true=y_true, diff --git a/sktime/pipeline/pipeline.py b/sktime/pipeline/pipeline.py index 94a243a8131..8f23b6cb009 100644 --- a/sktime/pipeline/pipeline.py +++ b/sktime/pipeline/pipeline.py @@ -1,6 +1,5 @@ """class that implements a graph pipeline.""" import warnings -import weakref from copy import copy, deepcopy from sktime.base import BaseEstimator @@ -183,6 +182,17 @@ def __init__(self, steps=None): self.steps = steps self._steps = steps if steps is not None else [] + object_types = [step["skobject"].get_tag("object_type") for step in self._steps] + if len(set(object_types)) == 1: + self.set_tags(**{"object_type": object_types[0]}) + elif len(set(object_types) - {"transformer"}) == 1: + self.set_tags( + **{"object_type": list(set(object_types) - {"transformer"})[0]} + ) + else: + # Mixture of different object types + pass + for step_information in self._steps: if "method" not in step_information: step_information["method"] = None @@ -194,12 +204,10 @@ def _get_unique_id(self, skobject): self.counter += 1 # Check if not already an skobject cloned from the provided # skobject is part of the pipeline - if (id(skobject) not in self.id_to_obj) or self.id_to_obj[ - id(skobject) - ]() is None: - # In this case set a weakref of that skobject to id_to_obj to prevent that + if id(skobject) not in self.id_to_obj: + # In this case store that skobject to id_to_obj to prevent that # the garbage collector reassigns the id. - self.id_to_obj[id(skobject)] = weakref.ref(skobject) + self.id_to_obj[id(skobject)] = skobject self.id_to_true_id[id(skobject)] = self.counter return self.id_to_true_id[id(skobject)] @@ -358,7 +366,7 @@ def _assemble_steps(self): self._last_step_name = name self._assembled = True - def fit(self, X, y=None, **kwargs): + def fit(self, X=None, y=None, **kwargs): """Fit graph pipeline to training data. Parameters @@ -377,6 +385,8 @@ def fit(self, X, y=None, **kwargs): """ self._assembled = False self._initiate_call(X, y, kwargs) + + assert (X is not None) or (y is not None), "Either X or y must be provided." self._y = y self._X = X @@ -452,7 +462,7 @@ def transform(self, X, y=None, **kwargs): .result ) - def predict(self, X, y=None, **kwargs): + def predict(self, X=None, y=None, **kwargs): """Perform a prediction. I.e. calls predict or transform on each element in the graph pipeline. @@ -588,10 +598,18 @@ def predict_residuals(self, X, y=None, **kwargs): def _initiate_call(self, X, y, kwargs): if not self._assembled: self._assemble_steps() - for step in self.assembled_steps.values(): - step.reset() - self.assembled_steps["X"].buffer = X - self.assembled_steps["y"].buffer = y + for key, step in self.assembled_steps.items(): + # Empty the buffer of all steps except for the dummy + # steps X and y (input steps) + if key in ["X", "y"]: + step.reset(reset_buffer=False) + else: + step.reset() + # Overwrite the buffer of X and y if data are provided + if X is not None: + self.assembled_steps["X"].buffer = X + if y is not None: + self.assembled_steps["y"].buffer = y self.kwargs.update(kwargs) def _method_allowed(self, method): @@ -620,3 +638,72 @@ def _create_subsetter(self, edg): params={}, ) self.assembled_steps[edg] = step + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are currently no reserved values for forecasters. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier + from sktime.forecasting.naive import NaiveForecaster + from sktime.transformations.series.boxcox import BoxCoxTransformer + from sktime.transformations.series.exponent import ExponentTransformer + + return [ + { + "steps": [ + { + "skobject": ExponentTransformer(), + "name": "exp", + "edges": {"X": "X"}, + }, + { + "skobject": BoxCoxTransformer(), + "name": "box", + "edges": {"X": "exp"}, + }, + ] + }, + { + "steps": [ + { + "skobject": ExponentTransformer(), + "name": "exp", + "edges": {"X": "X"}, + }, + { + "skobject": KNeighborsTimeSeriesClassifier(), + "name": "knnclassifier", + "edges": {"X": "exp", "y": "y"}, + }, + ] + }, + { + "steps": [ + { + "skobject": ExponentTransformer(), + "name": "exp", + "edges": {"X": "y"}, + }, + { + "skobject": NaiveForecaster(), + "name": "naive", + "edges": {"X": "exp", "y": "y"}, + }, + ] + }, + ] diff --git a/sktime/pipeline/step.py b/sktime/pipeline/step.py index 0810d422db0..01d2274f075 100644 --- a/sktime/pipeline/step.py +++ b/sktime/pipeline/step.py @@ -73,9 +73,10 @@ def __init__( self.input_edges = input_edges self.params = params - def reset(self): + def reset(self, reset_buffer=True): """Reset the step.""" - self.buffer = None + if reset_buffer: + self.buffer = None self.mode = "" def get_allowed_method(self): @@ -212,7 +213,7 @@ def _fetch_input_data(self, fit, required_method, mro, kwargs): self.mode = result.mode if result.result is not None: all_none = False - if len(results) != 0 and not results[0] is None: + if len(results) != 0 and results[0] is not None: if len(results) > 1: input_data[step_name] = pd.concat( results, axis=1, keys=transformer_names diff --git a/sktime/pipeline/tests/regression_tests/test_pipeline_regression.py b/sktime/pipeline/tests/regression_tests/test_pipeline_regression.py index e32b398d1ce..c555139edd8 100644 --- a/sktime/pipeline/tests/regression_tests/test_pipeline_regression.py +++ b/sktime/pipeline/tests/regression_tests/test_pipeline_regression.py @@ -2,10 +2,11 @@ import pandas as pd import pytest from skbase.utils.dependencies import _check_soft_dependencies +from sklearn.linear_model import Ridge from sktime.classification.dummy import DummyClassifier -from sktime.datasets import load_arrow_head, load_longley -from sktime.forecasting.compose import ForecastX +from sktime.datasets import load_airline, load_arrow_head, load_longley +from sktime.forecasting.compose import ForecastX, make_reduction from sktime.forecasting.naive import NaiveForecaster from sktime.forecasting.sarimax import SARIMAX from sktime.pipeline.pipeline import Pipeline @@ -317,3 +318,20 @@ def test_forecasterX_regression(): general_pipeline.fit(y=y, X=X, fh=[1, 2, 3]) result_general = general_pipeline.predict(None, None) pd.testing.assert_series_equal(result, result_general) + + +def test_lagged_y_prediction(): + # regression test for issue 5830 + y = load_airline() + y_train, y_test = temporal_train_test_split(y) + + forecaster = make_reduction(Ridge(), window_length=12, strategy="recursive") + + pipe = Pipeline() + pipe = pipe.add_step(Differencer(lags=[1, 3]), "differencer", edges={"X": "y"}) + pipe = pipe.add_step( + forecaster, name="forecaster", edges={"X": "differencer", "y": "y"} + ) + pipe.fit(y=y_train) + y_pred = pipe.predict(fh=y_test.index) + assert y_pred.shape == y_test.shape diff --git a/sktime/proba/base.py b/sktime/proba/base.py index 11ca9f97313..06d7550e9f6 100644 --- a/sktime/proba/base.py +++ b/sktime/proba/base.py @@ -11,6 +11,7 @@ import pandas as pd from sktime.base import BaseObject +from sktime.utils.pandas import df_map from sktime.utils.validation._dependencies import _check_estimator_deps @@ -19,15 +20,23 @@ class BaseDistribution(BaseObject): # default tag values - these typically make the "safest" assumption _tags = { + # packaging info + # -------------- + "authors": "sktime developers", # author(s) of the object + "maintainers": "sktime developers", # current maintainer(s) of the object + "python_version": None, # PEP 440 python version specifier to limit versions + "python_dependencies": None, # str or list of str, package soft dependencies + # estimator type + # -------------- "object_type": "distribution", # type of object, e.g., 'distribution' "python_version": None, # PEP 440 python version specifier to limit versions - "python_dependencies": None, # string or str list of pkg soft dependencies "reserved_params": ["index", "columns"], "capabilities:approx": ["energy", "mean", "var", "pdfnorm"], "approx_mean_spl": 1000, # sample size used in MC estimates of mean "approx_var_spl": 1000, # sample size used in MC estimates of var "approx_energy_spl": 1000, # sample size used in MC estimates of energy "approx_spl": 1000, # sample size used in other MC estimates + "bisect_iter": 1000, # max iters for bisection method in ppf } def __init__(self, index=None, columns=None): @@ -41,15 +50,15 @@ def __init__(self, index=None, columns=None): def loc(self): """Location indexer. - Use `my_distribution.loc[index]` for `pandas`-like row/column subsetting of - `BaseDistribution` descendants. + Use `my_distribution.loc[index]` for `pandas`-like row/column subsetting + of `BaseDistribution` descendants. `index` can be any `pandas` `loc` compatible index subsetter. `my_distribution.loc[index]` or `my_distribution.loc[row_index, col_index]` - subset `my_distribution` to rows defined by `row_index`, cols by `col_index`, to - exactly the same/cols rows as `pandas` `loc` would subset rows in - `my_distribution.index` and columns in `my_distribution.columns`. + subset `my_distribution` to rows defined by `row_index`, cols by `col_index`, + to exactly the same/cols rows as `pandas` `loc` would subset + rows in `my_distribution.index` and columns in `my_distribution.columns`. """ return _Indexer(ref=self, method="_loc") @@ -57,15 +66,15 @@ def loc(self): def iloc(self): """Integer location indexer. - Use `my_distribution.iloc[index]` for `pandas`-like row/column subsetting of - `BaseDistribution` descendants. + Use `my_distribution.iloc[index]` for `pandas`-like row/column subsetting + of `BaseDistribution` descendants. `index` can be any `pandas` `iloc` compatible index subsetter. `my_distribution.iloc[index]` or `my_distribution.iloc[row_index, col_index]` - subset `my_distribution` to rows defined by `row_index`, cols by `col_index`, to - exactly the same/cols rows as `pandas` `iloc` would subset rows in - `my_distribution.index` and columns in `my_distribution.columns`. + subset `my_distribution` to rows defined by `row_index`, cols by `col_index`, + to exactly the same/cols rows as `pandas` `iloc` would subset + rows in `my_distribution.index` and columns in `my_distribution.columns`. """ return _Indexer(ref=self, method="_iloc") @@ -90,9 +99,12 @@ def _subset_params(self, rowidx, colidx): subset_param_dict = {} for param, val in params.items(): - arr = np.array(val) - if len(arr.shape) == 0: - subset_param_dict + if val is not None: + arr = np.array(val) + else: + arr = None + # if len(arr.shape) == 0: + # do nothing with arr if len(arr.shape) >= 1 and rowidx is not None: arr = arr[rowidx] if len(arr.shape) >= 2 and colidx is not None: @@ -225,7 +237,7 @@ def pdf(self, x): "this may be numerically unstable" ) warn(self._method_error_msg("pdf", fill_in=approx_method)) - return self.log_pdf(x=x).applymap(np.exp) + return df_map(self.log_pdf(x=x))(np.exp) raise NotImplementedError(self._method_error_msg("pdf", "error")) @@ -265,7 +277,7 @@ def log_pdf(self, x): ) warn(self._method_error_msg("log_pdf", fill_in=approx_method)) - return self.pdf(x=x).applymap(np.log) + return df_map(self.pdf(x=x))(np.log) raise NotImplementedError(self._method_error_msg("log_pdf", "error")) @@ -282,10 +294,42 @@ def cdf(self, x): spl = self.sample(N) ind = splx <= spl - return ind.groupby(level=1).mean() + return ind.groupby(level=1, sort=False).mean() def ppf(self, p): """Quantile function = percent point function = inverse cdf.""" + if self._has_implementation_of("cdf"): + from scipy.optimize import bisect + + max_iter = self.get_tag("bisect_iter") + approx_method = ( + "by using the bisection method (scipy.optimize.bisect) on " + f"the cdf, at {max_iter} maximum iterations" + ) + warn(self._method_error_msg("cdf", fill_in=approx_method)) + + result = pd.DataFrame(index=p.index, columns=p.columns, dtype="float") + for ix in p.index: + for col in p.columns: + d_ix = self.loc[[ix], [col]] + p_ix = p.loc[ix, col] + + def opt_fun(x): + """Optimization function, to find x s.t. cdf(x) = p_ix.""" + x = pd.DataFrame(x, index=[ix], columns=[col]) # noqa: B023 + return d_ix.cdf(x).values[0][0] - p_ix # noqa: B023 + + left_bd = -1e6 + right_bd = 1e6 + while opt_fun(left_bd) > 0: + left_bd *= 10 + while opt_fun(right_bd) < 0: + right_bd *= 10 + result.loc[ix, col] = bisect( + opt_fun, left_bd, right_bd, maxiter=max_iter + ) + return result + raise NotImplementedError(self._method_error_msg("ppf", "error")) def energy(self, x=None): @@ -330,7 +374,8 @@ def energy(self, x=None): # approx E[abs(X-Y)] via mean of samples of abs(X-Y) obtained from splx, sply spl = splx - sply - energy = spl.apply(np.linalg.norm, axis=1, ord=1).groupby(level=1).mean() + energy = spl.apply(np.linalg.norm, axis=1, ord=1) + energy = energy.groupby(level=1, sort=False).mean() energy = pd.DataFrame(energy, index=self.index, columns=["energy"]) return energy @@ -353,7 +398,7 @@ def mean(self): warn(self._method_error_msg("mean", fill_in=approx_method)) spl = self.sample(approx_spl_size) - return spl.groupby(level=1).mean() + return spl.groupby(level=1, sort=False).mean() def var(self): r"""Return element/entry-wise variance of the distribution. @@ -376,10 +421,10 @@ def var(self): spl1 = self.sample(approx_spl_size) spl2 = self.sample(approx_spl_size) spl = (spl1 - spl2) ** 2 - return spl.groupby(level=1).mean() + return spl.groupby(level=1, sort=False).mean() def pdfnorm(self, a=2): - r"""A-norm of pdf, defaults to 2-norm. + r"""a-norm of pdf, defaults to 2-norm. computes a-norm of the entry marginal pdf, i.e., :math:`\mathbb{E}[p_X(X)^{a-1}] = \int p(x)^a dx`, @@ -408,7 +453,7 @@ def pdfnorm(self, a=2): # uses formula int p(x)^a dx = E[p(X)^{a-1}], and MC approximates the RHS spl = [self.pdf(self.sample()) ** (a - 1) for _ in range(approx_spl_size)] - return pd.concat(spl, axis=0).groupby(level=1).mean() + return pd.concat(spl, axis=0).groupby(level=1, sort=False).mean() def _coerce_to_self_index_df(self, x): x = np.array(x) @@ -430,7 +475,7 @@ def quantile(self, alpha): The `ppf` method also computes quantiles, but broadcasts differently, in `numpy` style closer to `tensorflow`. In contrast, this `quantile` method broadcasts - as forecaster `predict_quantiles`, i.e., columns first. + as ``sktime`` forecaster `predict_quantiles`, i.e., columns first. Parameters ---------- diff --git a/sktime/proba/empirical.py b/sktime/proba/empirical.py index b19ab5f0e6d..dfb20a141ab 100644 --- a/sktime/proba/empirical.py +++ b/sktime/proba/empirical.py @@ -16,12 +16,12 @@ class Empirical(BaseDistribution): ---------- spl : pd.DataFrame with pd.MultiIndex empirical sample - last (highest) index is time, first (lowest) index is sample + last (highest) index is instance, first (lowest) index is sample weights : pd.Series, with same index and length as spl, optional, default=None if not passed, ``spl`` is assumed to be unweighted time_indep : bool, optional, default=True - if True, ``sample`` will sample individual time indices independently - if False, ``sample`` will sample etire instances from ``spl`` + if True, ``sample`` will sample individual instance indices independently + if False, ``sample`` will sample entire instances from ``spl`` index : pd.Index, optional, default = RangeIndex columns : pd.Index, optional, default = RangeIndex @@ -43,6 +43,7 @@ class Empirical(BaseDistribution): """ _tags = { + "authors": ["fkiraly"], "capabilities:approx": [], "capabilities:exact": ["mean", "var", "energy", "cdf", "ppf"], "distr:measuretype": "discrete", @@ -79,7 +80,6 @@ def _init_sorted(self): sorted = {} weights = {} - weights for t in times: sorted[t] = {} weights[t] = {} @@ -124,8 +124,42 @@ def _apply_per_ix(self, func, params, x=None): x_t = x.loc[ix, col] else: x_t = x - res.loc[ix, col] = func(spl=spl_t, weights=weights_t, x=x_t, **params) - return res.convert_dtypes() + res.at[ix, col] = func(spl=spl_t, weights=weights_t, x=x_t, **params) + return res.apply(pd.to_numeric) + + def _iloc(self, rowidx=None, colidx=None): + index = self.index + columns = self.columns + weights = self.weights + + spl_subset = self.spl + + if rowidx is not None: + rowidx_loc = index[rowidx] + # subset multiindex to rowidx by last level + spl_subset = self.spl.loc[(slice(None), rowidx_loc), :] + if weights is not None: + weights_subset = weights.loc[(slice(None), rowidx_loc)] + else: + weights_subset = None + subs_rowidx = index[rowidx] + else: + subs_rowidx = index + weights_subset = weights + + if colidx is not None: + spl_subset = spl_subset.iloc[:, colidx] + subs_colidx = columns[colidx] + else: + subs_colidx = columns + + return Empirical( + spl_subset, + weights=weights_subset, + time_indep=self.time_indep, + index=subs_rowidx, + columns=subs_colidx, + ) def energy(self, x=None): r"""Energy of self, w.r.t. self or a constant frame x. @@ -162,9 +196,9 @@ def mean(self): """ spl = self.spl if self.weights is None: - mean_df = spl.groupby(level=-1).mean() + mean_df = spl.groupby(level=-1, sort=False).mean() else: - mean_df = spl.groupby(level=-1).apply( + mean_df = spl.groupby(level=-1, sort=False).apply( lambda x: np.average(x, weights=self.weights.loc[x.index], axis=0) ) mean_df = pd.DataFrame(mean_df.tolist(), index=mean_df.index) @@ -186,11 +220,11 @@ def var(self): spl = self.spl N = self._N if self.weights is None: - var_df = spl.groupby(level=-1).var(ddof=0) + var_df = spl.groupby(level=-1, sort=False).var(ddof=0) else: mean = self.mean() means = pd.concat([mean] * N, axis=0, keys=self._spl_instances) - var_df = spl.groupby(level=-1).apply( + var_df = spl.groupby(level=-1, sort=False).apply( lambda x: np.average( (x - means.loc[x.index]) ** 2, weights=self.weights.loc[x.index], @@ -334,7 +368,7 @@ def _energy_np(spl, x=None, weights=None, assume_sorted=False): if x is None: cum_fwd = np.cumsum(weights[:-1]) - cum_back = np.cumsum(weights[1::-1])[::-1] + cum_back = np.cumsum(weights[1:][::-1])[::-1] energy = 2 * np.sum(cum_fwd * cum_back * spl_diff) else: spl_diff = np.abs(spl - x) diff --git a/sktime/proba/laplace.py b/sktime/proba/laplace.py index 8f8345c6162..11158d7abc1 100644 --- a/sktime/proba/laplace.py +++ b/sktime/proba/laplace.py @@ -29,6 +29,7 @@ class Laplace(BaseDistribution): """ _tags = { + "authors": ["fkiraly"], "capabilities:approx": ["pdfnorm"], "capabilities:exact": ["mean", "var", "energy", "pdf", "log_pdf", "cdf", "ppf"], "distr:measuretype": "continuous", @@ -43,7 +44,7 @@ def __init__(self, mu, scale, index=None, columns=None): # todo: untangle index handling # and broadcast of parameters. # move this functionality to the base class - self._mu, self._scale = self._get_bc_params() + self._mu, self._scale = self._get_bc_params(self.mu, self.scale) shape = self._mu.shape if index is None: @@ -54,16 +55,6 @@ def __init__(self, mu, scale, index=None, columns=None): super().__init__(index=index, columns=columns) - def _get_bc_params(self): - """Fully broadcast parameters of self, given param shapes and index, columns.""" - to_broadcast = [self.mu, self.scale] - if hasattr(self, "index") and self.index is not None: - to_broadcast += [self.index.to_numpy().reshape(-1, 1)] - if hasattr(self, "columns") and self.columns is not None: - to_broadcast += [self.columns.to_numpy()] - bc = np.broadcast_arrays(*to_broadcast) - return bc[0], bc[1] - def energy(self, x=None): r"""Energy of self, w.r.t. self or a constant frame x. diff --git a/sktime/proba/normal.py b/sktime/proba/normal.py index 6de59c06e33..8edbc5ac0a8 100644 --- a/sktime/proba/normal.py +++ b/sktime/proba/normal.py @@ -30,6 +30,7 @@ class Normal(BaseDistribution): """ _tags = { + "authors": ["fkiraly"], "capabilities:approx": ["pdfnorm"], "capabilities:exact": ["mean", "var", "energy", "pdf", "log_pdf", "cdf", "ppf"], "distr:measuretype": "continuous", @@ -44,7 +45,6 @@ def __init__(self, mu, sigma, index=None, columns=None): # todo: untangle index handling # and broadcast of parameters. # move this functionality to the base class - # 0.19.0? self._mu, self._sigma = self._get_bc_params(self.mu, self.sigma) shape = self._mu.shape diff --git a/sktime/proba/t.py b/sktime/proba/t.py index e273cbeecf6..c290f553fc3 100644 --- a/sktime/proba/t.py +++ b/sktime/proba/t.py @@ -32,6 +32,8 @@ class TDistribution(BaseDistribution): """ _tags = { + "authors": ["Alex-JG3"], + "maintainers": ["Alex-JG3"], "capabilities:approx": ["pdfnorm", "energy"], "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], "distr:measuretype": "continuous", diff --git a/sktime/proba/tests/test_all_distrs.py b/sktime/proba/tests/test_all_distrs.py index 5808d9d2cbc..0a12fc98c70 100644 --- a/sktime/proba/tests/test_all_distrs.py +++ b/sktime/proba/tests/test_all_distrs.py @@ -1,7 +1,7 @@ # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) """Tests for BaseDistribution API points.""" -__author__ = ["fkiraly"] +__author__ = ["fkiraly", "Alex-JG3"] import numpy as np import pandas as pd @@ -9,6 +9,7 @@ from sktime.datatypes import check_is_mtype from sktime.tests.test_all_estimators import BaseFixtureGenerator, QuickTester +from sktime.utils.index import random_ss_ix class DistributionFixtureGenerator(BaseFixtureGenerator): @@ -54,12 +55,16 @@ def _has_capability(distr, method): class TestAllDistributions(DistributionFixtureGenerator, QuickTester): - """Module level tests for all sktime parameter fitters.""" + """Module level tests for all sktime distributions.""" - def test_sample(self, estimator_instance): + @pytest.mark.parametrize("shuffled", [False, True]) + def test_sample(self, estimator_instance, shuffled): """Test sample expected return.""" d = estimator_instance + if shuffled: + d = _shuffle_distr(d) + res = d.sample() assert d.shape == res.shape @@ -72,36 +77,50 @@ def test_sample(self, estimator_instance): assert (res_panel.index == dummy_panel.index).all() assert (res_panel.columns == dummy_panel.columns).all() + @pytest.mark.parametrize("shuffled", [False, True]) @pytest.mark.parametrize("method", METHODS_SCALAR, ids=METHODS_SCALAR) - def test_methods_scalar(self, estimator_instance, method): + def test_methods_scalar(self, estimator_instance, method, shuffled): """Test expected return of scalar methods.""" if not _has_capability(estimator_instance, method): return None d = estimator_instance - res = getattr(estimator_instance, method)() + if shuffled: + d = _shuffle_distr(d) + + res = getattr(d, method)() _check_output_format(res, d, method) + @pytest.mark.parametrize("shuffled", [False, True]) @pytest.mark.parametrize("method", METHODS_X, ids=METHODS_X) - def test_methods_x(self, estimator_instance, method): + def test_methods_x(self, estimator_instance, method, shuffled): """Test expected return of methods that take sample-like argument.""" if not _has_capability(estimator_instance, method): return None d = estimator_instance + + if shuffled: + d = _shuffle_distr(d) + x = d.sample() - res = getattr(estimator_instance, method)(x) + res = getattr(d, method)(x) _check_output_format(res, d, method) + @pytest.mark.parametrize("shuffled", [False, True]) @pytest.mark.parametrize("method", METHODS_P, ids=METHODS_P) - def test_methods_p(self, estimator_instance, method): + def test_methods_p(self, estimator_instance, method, shuffled): """Test expected return of methods that take percentage-like argument.""" if not _has_capability(estimator_instance, method): return None d = estimator_instance + + if shuffled: + d = _shuffle_distr(d) + np_unif = np.random.uniform(size=d.shape) p = pd.DataFrame(np_unif, index=d.index, columns=d.columns) res = getattr(estimator_instance, method)(p) @@ -117,7 +136,9 @@ def test_quantile(self, estimator_instance, q): d = estimator_instance def _check_quantile_output(obj, q): - assert check_is_mtype(obj, "pred_quantiles", "Proba") + assert check_is_mtype( + obj, "pred_quantiles", "Proba", msg_return_dict="list" + ) assert (obj.index == d.index).all() if not isinstance(q, list): @@ -128,6 +149,63 @@ def _check_quantile_output(obj, q): res = d.quantile(q) _check_quantile_output(res, q) + @pytest.mark.parametrize("subset_row", [True, False]) + @pytest.mark.parametrize("subset_col", [True, False]) + def test_subsetting(self, estimator_instance, subset_row, subset_col): + """Test subsetting of distribution.""" + d = estimator_instance + + if subset_row: + ix_loc = random_ss_ix(d.index, 3) + ix_iloc = d.index.get_indexer(ix_loc) + else: + ix_loc = d.index + ix_iloc = pd.RangeIndex(len(d.index)) + + if subset_col: + iy_loc = random_ss_ix(d.columns, 1) + iy_iloc = d.columns.get_indexer(iy_loc) + else: + iy_loc = d.columns + iy_iloc = pd.RangeIndex(len(d.columns)) + + res_loc = d.loc[ix_loc, iy_loc] + + assert isinstance(res_loc, type(d)) + assert res_loc.shape == (len(ix_loc), len(iy_loc)) + assert (res_loc.index == ix_loc).all() + assert (res_loc.columns == iy_loc).all() + + res_iloc = d.iloc[ix_iloc, iy_iloc] + + assert isinstance(res_iloc, type(d)) + assert res_iloc.shape == (len(ix_iloc), len(iy_iloc)) + assert (res_iloc.index == ix_loc).all() + assert (res_iloc.columns == iy_loc).all() + + def test_log_pdf_and_pdf(self, estimator_instance): + """Test that the log of the pdf and log_pdf function are similar.""" + d = estimator_instance + capabilities_exact = d.get_tags()["capabilities:exact"] + + if "log_pdf" not in capabilities_exact or "pdf" not in capabilities_exact: + return + x = d.sample() + pdf = d.pdf(x) + log_pdf = d.log_pdf(x) + assert np.allclose(np.log(pdf), log_pdf) + + def test_ppf_and_cdf(self, estimator_instance): + """Test that the ppf is the inverse of the cdf.""" + d = estimator_instance + capabilities_exact = d.get_tags()["capabilities:exact"] + + if "ppf" not in capabilities_exact or "cdf" not in capabilities_exact: + return + x = d.sample() + x_approx = d.ppf(d.cdf(x)) + assert np.allclose(x.values, x_approx.values) + def _check_output_format(res, dist, method): """Check output format expectations for BaseDistribution tests.""" @@ -142,3 +220,16 @@ def _check_output_format(res, dist, method): if method in METHODS_SCALAR_POS or method in METHODS_X_POS: assert (res >= 0).all().all() + + if isinstance(res, pd.DataFrame): + assert res.apply(pd.api.types.is_numeric_dtype).all() + elif isinstance(res, pd.Series): + assert pd.api.types.is_numeric_dtype(res) + else: + raise TypeError("res must be a pandas DataFrame or Series.") + + +def _shuffle_distr(d): + """Shuffle distribution row index.""" + shuffled_index = pd.DataFrame(d.index).sample(frac=1).index + return d.loc[shuffled_index] diff --git a/sktime/proba/tfp.py b/sktime/proba/tfp.py index 3d363a2d236..9b3674c544a 100644 --- a/sktime/proba/tfp.py +++ b/sktime/proba/tfp.py @@ -30,6 +30,7 @@ class TFNormal(_BaseTFDistribution): """ _tags = { + "authors": ["fkiraly"], "python_dependencies": "tensorflow_probability", "capabilities:approx": ["pdfnorm"], "capabilities:exact": ["mean", "var", "energy", "pdf", "log_pdf", "cdf"], diff --git a/sktime/registry/_lookup.py b/sktime/registry/_lookup.py index aa0694c3c86..6c484d32096 100644 --- a/sktime/registry/_lookup.py +++ b/sktime/registry/_lookup.py @@ -62,35 +62,46 @@ def all_estimators( Which kind of estimators should be returned. if None, no filter is applied and all estimators are returned. if str or list of str, strings define scitypes specified in search - only estimators that are of (at least) one of the scitypes are returned - possible str values are entries of registry.BASE_CLASS_REGISTER (first col) - for instance 'classifier', 'regressor', 'transformer', 'forecaster' + only estimators that are of (at least) one of the scitypes are returned + possible str values are entries of registry.BASE_CLASS_REGISTER (first col) + for instance 'classifier', 'regressor', 'transformer', 'forecaster' + return_names: bool, optional (default=True) - if True, estimator class name is included in the all_estimators() - return in the order: name, estimator class, optional tags, either as - a tuple or as pandas.DataFrame columns - if False, estimator class name is removed from the all_estimators() - return. + + if True, estimator class name is included in the ``all_estimators`` + return in the order: name, estimator class, optional tags, either as + a tuple or as pandas.DataFrame columns + + if False, estimator class name is removed from the ``all_estimators`` return. + filter_tags: dict of (str or list of str), optional (default=None) For a list of valid tag strings, use the registry.all_tags utility. - subsets the returned estimators as follows: - each key/value pair is statement in "and"/conjunction - key is tag name to sub-set on - value str or list of string are tag values - condition is "key must be equal to value, or in set(value)" + + ``filter_tags`` subsets the returned estimators as follows: + + * each key/value pair is statement in "and"/conjunction + * key is tag name to sub-set on + * value str or list of string are tag values + * condition is "key must be equal to value, or in set(value)" + exclude_estimators: str, list of str, optional (default=None) Names of estimators to exclude. + as_dataframe: bool, optional (default=False) - if True, all_estimators will return a pandas.DataFrame with named - columns for all of the attributes being returned. - if False, all_estimators will return a list (either a list of - estimators or a list of tuples, see Returns) + + True: ``all_estimators`` will return a pandas.DataFrame with named + columns for all of the attributes being returned. + + False: ``all_estimators`` will return a list (either a list of + estimators or a list of tuples, see Returns) + return_tags: str or list of str, optional (default=None) Names of tags to fetch and return each estimator's value of. For a list of valid tag strings, use the registry.all_tags utility. if str or list of str, - the tag values named in return_tags will be fetched for each - estimator and will be appended as either columns or tuple entries. + the tag values named in return_tags will be fetched for each + estimator and will be appended as either columns or tuple entries. + suppress_import_stdout : bool, optional. Default=True whether to suppress stdout printout upon import. @@ -99,7 +110,7 @@ def all_estimators( all_estimators will return one of the following: 1. list of estimators, if return_names=False, and return_tags is None 2. list of tuples (optional estimator name, class, ~optional estimator - tags), if return_names=True or return_tags is not None. + tags), if return_names=True or return_tags is not None. 3. pandas.DataFrame if as_dataframe = True if list of estimators: entries are estimators matching the query, @@ -109,10 +120,10 @@ def all_estimators( tags) matching the query, in alphabetical order of estimator name, where ``name`` is the estimator name as string, and is an - optional return + optional return ``estimator`` is the actual estimator ``tags`` are the estimator's values for each tag in return_tags - and is an optional return. + and is an optional return. if dataframe: all_estimators will return a pandas.DataFrame. column names represent the attributes contained in each column. @@ -271,15 +282,16 @@ def all_tags( Parameters ---------- estimator_types: string, list of string, optional (default=None) - Which kind of estimators should be returned. - - If None, no filter is applied and all estimators are returned. + Ta gs for hich kind of estimators should be returned. + + - If None, no filter is applied and tags for all estimators are returned. - Possible values are 'classifier', 'regressor', 'transformer' and - 'forecaster' to get estimators only of these specific types, or a list of - these to get the estimators that fit at least one of the types. + 'forecaster' to get estimator tags only fo these specific types, or a list of + these to get tags for estimators that fit at least one of the types. + as_dataframe: bool, optional (default=False) - if False, return is as described below; - if True, return is converted into a pandas.DataFrame for pretty - display + if False, return is as described below; + if True, return is converted into a pandas.DataFrame for pretty display Returns ------- @@ -287,14 +299,16 @@ def all_tags( in alphabetical order by a a : string - name of the tag as used in the _tags dictionary b : string - name of the scitype this tag applies to - must be in _base_classes.BASE_CLASS_SCITYPE_LIST + must be in _base_classes.BASE_CLASS_SCITYPE_LIST c : string - expected type of the tag value - should be one of: - "bool" - valid values are True/False - "int" - valid values are all integers - "str" - valid values are all strings - ("str", list_of_string) - any string in list_of_string is valid - ("list", list_of_string) - any individual string and sub-list is valid + should be one of: + + * ``"bool"`` - valid values are True/False + * ``"int"`` - valid values are all integers + * ``"str"`` - valid values are all strings + * ``("str", "list_of_string")`` - any string in ``list_of_string`` is valid + * ``("list", "list_of_string")`` - any string element or sub-list is valid + d : string - plain English description of the tag """ @@ -305,6 +319,10 @@ def is_tag_for_type(tag, estimator_types): if isinstance(estimator_types, str): estimator_types = [estimator_types] + # also retrieve all tags for topmost base classes + # "estimator" has also been used for object tags, so is always included + estimator_types += ["estimator", "object"] + tag_types = set(tag_types) estimator_types = set(estimator_types) is_valid_tag_for_type = len(tag_types.intersection(estimator_types)) > 0 diff --git a/sktime/registry/_scitype.py b/sktime/registry/_scitype.py index 054dcd4babe..b140ccde848 100644 --- a/sktime/registry/_scitype.py +++ b/sktime/registry/_scitype.py @@ -7,7 +7,9 @@ from sktime.registry._base_classes import BASE_CLASS_REGISTER -def scitype(obj, force_single_scitype=True, coerce_to_list=False): +def scitype( + obj, force_single_scitype=True, coerce_to_list=False, raise_on_unknown=True +): """Determine scitype string of obj. Parameters @@ -18,15 +20,19 @@ def scitype(obj, force_single_scitype=True, coerce_to_list=False): if True, only the *first* scitype found will be returned order is determined by the order in BASE_CLASS_REGISTER coerce_to_list : bool, optional, default = False - whether return should be coerced to list, even if only one scitype is identified + determines the return type: if True, returns a single str, + if False, returns a list of str + raise_on_unknown : bool, optional, default = True + if True, raises an error if no scitype can be determined for obj + if False, returns "object" scitype Returns ------- scitype : str, or list of str of sktime scitype strings from BASE_CLASS_REGISTER str, sktime scitype string, if exactly one scitype can be determined for obj - or force_single_scitype is True, and if coerce_to_list is False + or force_single_scitype is True, and if coerce_to_list is False list of str, of scitype strings, if more than one scitype are determined, - or if coerce_to_list is True + or if coerce_to_list is True obj has scitype if it inherits from class in same row of BASE_CLASS_REGISTER Raises @@ -40,7 +46,14 @@ def scitype(obj, force_single_scitype=True, coerce_to_list=False): else: tag_type = obj.get_tag("object_type", None, raise_error=False) if tag_type is not None: - return tag_type + if not isinstance(tag_type, list): + tag_type = [tag_type] + if force_single_scitype and len(tag_type) > 1: + tag_type = [tag_type[0]] + if coerce_to_list: + return tag_type + else: + return tag_type[0] # if the tag is not present, determine scitype from legacy base class logic if isclass(obj): @@ -49,7 +62,10 @@ def scitype(obj, force_single_scitype=True, coerce_to_list=False): scitypes = [sci[0] for sci in BASE_CLASS_REGISTER if isinstance(obj, sci[1])] if len(scitypes) == 0: - raise TypeError("Error, no scitype could be determined for obj") + if raise_on_unknown: + raise TypeError("Error, no scitype could be determined for obj") + else: + scitypes = ["object"] if len(scitypes) > 1 and "object" in scitypes: scitypes = list(set(scitypes).difference(["object"])) diff --git a/sktime/registry/_tags.py b/sktime/registry/_tags.py index 512204ca180..99dab113665 100644 --- a/sktime/registry/_tags.py +++ b/sktime/registry/_tags.py @@ -135,7 +135,7 @@ [ "pd.Series", "pd.DataFrame", - "np.array", + "np.ndarray", "nested_univ", "pd-multiindex", "numpy3D", @@ -158,7 +158,7 @@ [ "pd.Series", "pd.DataFrame", - "np.array", + "np.ndarray", "nested_univ", "pd-multiindex", "numpy3D", @@ -492,6 +492,30 @@ "int", "sample size used in approximating other statistics if not available", ), + ( + "bisect_iter", + "distribution", + "int", + "max iters for bisection method in ppf", + ), + ( + "capability:multioutput", + ["classifier", "regressor"], # might need to add "early_classifier" here + "bool", + "can the estimator handle multioutput data?", + ), + ( + "maintainers", + "object", + ("list", "str"), + "list of current maintainers of the object, each maintainer a GitHub handle", + ), + ( + "authors", + "object", + ("list", "str"), + "list of authors of the object, each author a GitHub handle", + ), ] ESTIMATOR_TAG_TABLE = pd.DataFrame(ESTIMATOR_TAG_REGISTER) diff --git a/sktime/registry/tests/test_lookup.py b/sktime/registry/tests/test_lookup.py index 3d58b02c322..e37380db78b 100644 --- a/sktime/registry/tests/test_lookup.py +++ b/sktime/registry/tests/test_lookup.py @@ -15,18 +15,9 @@ from sktime.registry._lookup import _check_estimator_types VALID_SCITYPES_SET = set( - BASE_CLASS_SCITYPE_LIST + TRANSFORMER_MIXIN_SCITYPE_LIST + ["estimator"] + BASE_CLASS_SCITYPE_LIST + TRANSFORMER_MIXIN_SCITYPE_LIST + ["estimator", "object"] ) -# some scitypes have no associated tags yet -SCITYPES_WITHOUT_TAGS = [ - "series-annotator", - "clusterer", - "object", - "splitter", - "network", -] - # shorthands for easy reading b = BASE_CLASS_SCITYPE_LIST n = len(b) @@ -101,14 +92,8 @@ def test_all_tags(estimator_scitype): assert isinstance(tags, list) # there should be at least one tag returned - # exception: scitypes which we know don't have tags associated - est_list = ( - estimator_scitype - if isinstance(estimator_scitype, list) - else [estimator_scitype] - ) - if not set(est_list).issubset(SCITYPES_WITHOUT_TAGS): - assert len(tags) > 0 + # even scitypes without tags should return those for "object" + assert len(tags) > 0 # checks return type specification (see docstring) for tag in tags: @@ -122,6 +107,11 @@ def test_all_tags(estimator_scitype): assert isinstance(tag[2][1], (str, list)) assert isinstance(tag[3], str) + # check some tags that all object types should have + tags_strs = [tag[0] for tag in tags] + assert "python_dependencies" in tags_strs + assert "python_version" in tags_strs + @pytest.mark.parametrize("return_names", [True, False]) def test_all_estimators_return_names(return_names): diff --git a/sktime/registry/tests/test_scitype.py b/sktime/registry/tests/test_scitype.py new file mode 100644 index 00000000000..c3370a07c45 --- /dev/null +++ b/sktime/registry/tests/test_scitype.py @@ -0,0 +1,94 @@ +"""Tests for scitype typipng function.""" + +import pytest + +from sktime.registry._scitype import scitype + + +@pytest.mark.parametrize("coerce_to_list", [True, False]) +def test_scitype(coerce_to_list): + """Test that the scitype function recovers the correct scitype(s).""" + from sktime.forecasting.arima import ARIMA + from sktime.forecasting.naive import NaiveForecaster + from sktime.transformations.series.exponent import ExponentTransformer + + # test that scitype works for classes with soft dependencies + result_arima = scitype(ARIMA, coerce_to_list=coerce_to_list) + if coerce_to_list: + assert isinstance(result_arima, list) + assert "forecaster" == result_arima[0] + else: + assert "forecaster" == result_arima + + # test that scitype works for instances + result_naive = scitype(NaiveForecaster(), coerce_to_list=coerce_to_list) + if coerce_to_list: + assert isinstance(result_naive, list) + assert "forecaster" == result_naive[0] + else: + assert "forecaster" == result_naive + + # test transformer object + result_transformer = scitype(ExponentTransformer, coerce_to_list=coerce_to_list) + if coerce_to_list: + assert isinstance(result_transformer, list) + assert "transformer" == result_transformer[0] + else: + assert "transformer" == result_transformer + + +@pytest.mark.parametrize("force_single_scitype", [True, False]) +@pytest.mark.parametrize("coerce_to_list", [True, False]) +def test_scitype_generic(force_single_scitype, coerce_to_list): + """Test that the scitype function recovers the correct scitype(s).""" + from sktime.base import BaseObject + + class _DummyClass(BaseObject): + _tags = {"object_type": ["foo", "bar"]} + + scitype_inferred = scitype( + _DummyClass(), + force_single_scitype=force_single_scitype, + coerce_to_list=coerce_to_list, + ) + + if force_single_scitype and coerce_to_list: + expected = ["foo"] + if not force_single_scitype and coerce_to_list: + expected = ["foo", "bar"] + if not coerce_to_list: + expected = "foo" + + assert scitype_inferred == expected + + class _DummyClass2(BaseObject): + _tags = {"object_type": "foo"} + + scitype_inferred = scitype( + _DummyClass2(), + force_single_scitype=force_single_scitype, + coerce_to_list=coerce_to_list, + ) + + if coerce_to_list: + expected = ["foo"] + if not coerce_to_list: + expected = "foo" + + assert scitype_inferred == expected + + class _DummyClass3(BaseObject): + _tags = {"object_type": ["foo"]} + + scitype_inferred = scitype( + _DummyClass3(), + force_single_scitype=force_single_scitype, + coerce_to_list=coerce_to_list, + ) + + if coerce_to_list: + expected = ["foo"] + if not coerce_to_list: + expected = "foo" + + assert scitype_inferred == expected diff --git a/sktime/registry/tests/test_tags.py b/sktime/registry/tests/test_tags.py index e3e73816359..74064d28637 100644 --- a/sktime/registry/tests/test_tags.py +++ b/sktime/registry/tests/test_tags.py @@ -1,4 +1,4 @@ -"""Tests for tag register an tag functionality.""" +"""Tests for tag register and tag functionality.""" from sktime.registry._tags import ESTIMATOR_TAG_REGISTER diff --git a/sktime/regression/base.py b/sktime/regression/base.py index 1ac7cdad25f..ef95794ea0e 100644 --- a/sktime/regression/base.py +++ b/sktime/regression/base.py @@ -23,19 +23,16 @@ class name: BaseRegressor __author__ = ["mloning", "fkiraly"] import time -from abc import ABC, abstractmethod import numpy as np -import pandas as pd -from sktime.base import BaseEstimator -from sktime.datatypes import check_is_scitype, convert_to +from sktime.base import BasePanelMixin +from sktime.datatypes import VectorizedDF from sktime.utils.sklearn import is_sklearn_transformer from sktime.utils.validation import check_n_jobs -from sktime.utils.warnings import warn -class BaseRegressor(BaseEstimator, ABC): +class BaseRegressor(BasePanelMixin): """Abstract base class for time series regressors. The base regressor specifies the methods and method signatures that all @@ -52,13 +49,17 @@ class BaseRegressor(BaseEstimator, ABC): _tags = { "object_type": "regressor", # type of object "X_inner_mtype": "numpy3D", # which type do _fit/_predict, support for X? + "y_inner_mtype": "numpy1D", # which type do _fit/_predict, support for y? # it should be either "numpy3D" or "nested_univ" (nested pd.DataFrame) + "capability:multioutput": False, # whether regressor supports multioutput "capability:multivariate": False, "capability:unequal_length": False, "capability:missing_values": False, "capability:train_estimate": False, "capability:contractable": False, "capability:multithreading": False, + "authors": "sktime developers", # author(s) of the object + "maintainers": "sktime developers", # current maintainer(s) of the object } # convenience constant to control which metadata of input data @@ -70,6 +71,14 @@ class BaseRegressor(BaseEstimator, ABC): "is_equal_length", ] + # attribute name where vectorized estimators are stored + VECTORIZATION_ATTR = "regressors_" # e.g., classifiers_, regressors_ + + # used in error messages + TASK = "regression" # e.g., classification, regression + EST_TYPE = "regressor" # e.g., classifier, regressor + EST_TYPE_PLURAL = "regressors" # e.g., classifiers, regressors + def __init__(self): self.fit_time_ = 0 self._class_dictionary = {} @@ -79,6 +88,9 @@ def __init__(self): # required for compatibility with some sklearn interfaces # i.e. CalibratedRegressorCV self._estimator_type = "regressor" + self._is_vectorized = False + self._is_timed = False + self._converter_store_y = {} super().__init__() @@ -123,19 +135,31 @@ def __rmul__(self, other): def fit(self, X, y): """Fit time series regressor to training data. + State change: + Changes state to "fitted". + + Writes to self: + Sets self.is_fitted to True. + Sets fitted model attributes ending in "_". + Parameters ---------- - X : 3D np.array (any number of dimensions, equal length series) - of shape [n_instances, n_dimensions, series_length] - or 2D np.array (univariate, equal length series) - of shape [n_instances, series_length] - or pd.DataFrame with each column a dimension, each cell a pd.Series - (any number of dimensions, equal or unequal length series) + X : sktime compatible time series panel data container, Panel scitype, e.g., + pd-multiindex: pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices + numpy3D: 3D np.array (any number of dimensions, equal length series) + of shape [n_instances, n_dimensions, series_length] or of any other supported Panel mtype - for list of mtypes, see datatypes.SCITYPE_REGISTER - for specifications, see examples/AA_datatypes_and_datasets.ipynb - y : 1D np.array of float, of shape [n_instances] - regression labels for fitting - indices correspond to instance indices in X + for list of mtypes, see datatypes.SCITYPE_REGISTER + for specifications, see examples/AA_datatypes_and_datasets.ipynb + y : sktime compatible tabular data container, Table scitype + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + class labels for fitting + 0-th indices correspond to instance indices in X + 1-st indices (if applicable) correspond to multioutput vector indices in X + supported sktime types: np.ndarray (1D, 2D), pd.Series, pd.DataFrame Returns ------- @@ -146,21 +170,47 @@ def fit(self, X, y): Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ + self.reset() + + # fit timer start start = int(round(time.time() * 1000)) + + # check and convert y for multioutput vectorization + y, y_metadata, y_inner_mtype = self._check_y(y, return_to_mtype=True) + self._y_metadata = y_metadata + self._y_inner_mtype = y_inner_mtype + self._is_vectorized = isinstance(y, VectorizedDF) + + if self._is_vectorized: + self._vectorize("fit", X=X, y=y) + # fit timer end + self.fit_time_ = int(round(time.time() * 1000)) - start + # this should happen last: fitted state is set to True + self._is_fitted = True + return self + + # no vectorization needed, proceed with normal fit + # convenience conversions to allow user flexibility: # if X is 2D array, convert to 3D, if y is Series, convert to numpy - X, y = _internal_convert(X, y) - X_metadata = _check_regressor_input( + X, y = self._internal_convert(X, y) + + # y float coercion + if y is not None and isinstance(y, np.ndarray): + y = y.astype("float") + + # input checks + X_metadata = self._check_input( X, y, return_metadata=self.METADATA_REQ_IN_CHECKS ) self._X_metadata = X_metadata - missing = X_metadata["has_nans"] - multivariate = not X_metadata["is_univariate"] - unequal = not X_metadata["is_equal_length"] + X_mtype = X_metadata["mtype"] + # Check this regressor can handle characteristics - self._check_capabilities(missing, multivariate, unequal) + self._check_capabilities(X_metadata) + # Convert data as dictated by the regressor tags - X = self._convert_X(X) + X = self._convert_X(X, X_mtype) multithread = self.get_tag("capability:multithreading") if multithread: try: @@ -172,7 +222,8 @@ def fit(self, X, y): self._fit(X, y) self.fit_time_ = int(round(time.time() * 1000)) - start - # this should happen last + + # this should happen last: fitted state is set to True self._is_fitted = True return self @@ -181,44 +232,64 @@ def predict(self, X) -> np.ndarray: Parameters ---------- - X : 3D np.array (any number of dimensions, equal length series) - of shape [n_instances, n_dimensions, series_length] - or 2D np.array (univariate, equal length series) - of shape [n_instances, series_length] - or pd.DataFrame with each column a dimension, each cell a pd.Series - (any number of dimensions, equal or unequal length series) + X : sktime compatible time series panel data container, Panel scitype, e.g., + pd-multiindex: pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices + numpy3D: 3D np.array (any number of dimensions, equal length series) + of shape [n_instances, n_dimensions, series_length] or of any other supported Panel mtype - for list of mtypes, see datatypes.SCITYPE_REGISTER - for specifications, see examples/AA_datatypes_and_datasets.ipynb + for list of mtypes, see datatypes.SCITYPE_REGISTER + for specifications, see examples/AA_datatypes_and_datasets.ipynb Returns ------- - y : 1D np.array of float, of shape [n_instances] - predicted regression labels - indices correspond to instance indices in X + y_pred : sktime compatible tabular data container, Table scitype + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + predicted class labels + 0-th indices correspond to instance indices in X + 1-st indices (if applicable) correspond to multioutput vector indices in X + 1D np.npdarray, if y univariate (one dimension) + otherwise, same type as y passed in fit """ self.check_is_fitted() + # enter vectorized mode if needed + if self._is_vectorized: + return self._vectorize("predict", X=X) + # boilerplate input checks for predict-like methods X = self._check_convert_X_for_predict(X) - return self._predict(X) + # call internal _predict, convert output + y_pred_inner = self._predict(X) + y_pred = self._convert_output_y(y_pred_inner) + return y_pred - def score(self, X, y) -> float: + def score(self, X, y, multioutput="uniform_average") -> float: """Scores predicted labels against ground truth labels on X. Parameters ---------- - X : 3D np.array (any number of dimensions, equal length series) - of shape [n_instances, n_dimensions, series_length] - or 2D np.array (univariate, equal length series) - of shape [n_instances, series_length] - or pd.DataFrame with each column a dimension, each cell a pd.Series - (any number of dimensions, equal or unequal length series) + X : sktime compatible time series panel data container, Panel scitype, e.g., + pd-multiindex: pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices + numpy3D: 3D np.array (any number of dimensions, equal length series) + of shape [n_instances, n_dimensions, series_length] or of any other supported Panel mtype - for list of mtypes, see datatypes.SCITYPE_REGISTER - for specifications, see examples/AA_datatypes_and_datasets.ipynb - y : 1D np.array of float, of shape [n_instances] - regression labels (gnr truth) - indices correspond to instance indices in X + for list of mtypes, see datatypes.SCITYPE_REGISTER + for specifications, see examples/AA_datatypes_and_datasets.ipynb + y : 2D np.array of int, of shape [n_instances, n_dimensions] - regression labels + for fitting indices correspond to instance indices in X + or 1D np.array of int, of shape [n_instances] - regression labels for + fitting indices correspond to instance indices in X + multioutput : str, optional (default="uniform_average") + {"raw_values", "uniform_average", "variance_weighted"}, array-like of shape + (n_outputs,) or None, default="uniform_average". + Defines aggregating of multiple output scores. Array-like value defines + weights used to average scores. Returns ------- @@ -228,9 +299,8 @@ def score(self, X, y) -> float: self.check_is_fitted() - return r2_score(y, self.predict(X), normalize=True) + return r2_score(y, self.predict(X), normalize=True, multioutput=multioutput) - @abstractmethod def _fit(self, X, y): """Fit time series regressor to training data. @@ -240,13 +310,21 @@ def _fit(self, X, y): ---------- X : guaranteed to be of a type in self.get_tag("X_inner_mtype") if self.get_tag("X_inner_mtype") = "numpy3D": - 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] + 3D np.ndarray of shape = [n_instances, n_dimensions, series_length] + if self.get_tag("X_inner_mtype") = "pd-multiindex": + pd.DataFrame with columns = variables, + index = pd.MultiIndex with first level = instance indices, + second level = time indices if self.get_tag("X_inner_mtype") = "nested_univ": - pd.DataFrame with each column a dimension, each cell a pd.Series + pd.DataFrame with each column a dimension, each cell a pd.Series for list of other mtypes, see datatypes.SCITYPE_REGISTER for specifications, see examples/AA_datatypes_and_datasets.ipynb - y : 1D np.array of float, of shape [n_instances] - regression labels for fitting - indices correspond to instance indices in X + y : guaranteed to be of a type in self.get_tag("y_inner_mtype") + 1D iterable, of shape [n_instances] + or 2D iterable, of shape [n_instances, n_dimensions] + class labels for fitting + if self.get_tag("capaility:multioutput") = False, guaranteed to be 1D + if self.get_tag("capaility:multioutput") = True, guaranteed to be 2D Returns ------- @@ -256,9 +334,8 @@ def _fit(self, X, y): ----- Changes state by creating a fitted model that updates attributes ending in "_" """ - ... + raise NotImplementedError("abstract method") - @abstractmethod def _predict(self, X) -> np.ndarray: """Predicts labels for sequences in X. @@ -279,207 +356,4 @@ def _predict(self, X) -> np.ndarray: y : 1D np.array of float, of shape [n_instances] - predicted regression labels indices correspond to instance indices in X """ - ... - - def _check_convert_X_for_predict(self, X): - """Input checks, capability checks, repeated in all predict/score methods. - - Parameters - ---------- - X : any object (to check/convert) - should be of a supported Panel mtype or 2D numpy.ndarray - - Returns - ------- - X: an object of a supported Panel mtype, numpy3D if X was a 2D numpy.ndarray - - Raises - ------ - ValueError if X is of invalid input data type, or there is not enough data - ValueError if the capabilities in self._tags do not handle the data. - """ - X = _internal_convert(X) - X_metadata = _check_regressor_input( - X, return_metadata=self.METADATA_REQ_IN_CHECKS - ) - missing = X_metadata["has_nans"] - multivariate = not X_metadata["is_univariate"] - unequal = not X_metadata["is_equal_length"] - # Check this regressor can handle characteristics - self._check_capabilities(missing, multivariate, unequal) - # Convert data as dictated by the regressor tags - X = self._convert_X(X) - - return X - - def _check_capabilities(self, missing, multivariate, unequal): - """Check whether this regressor can handle the data characteristics. - - Parameters - ---------- - missing : boolean, does the data passed to fit contain missing values? - multivariate : boolean, does the data passed to fit contain missing values? - unequal : boolea, do the time series passed to fit have variable lengths? - - Raises - ------ - ValueError if the capabilities in self._tags do not handle the data. - """ - allow_multivariate = self.get_tag("capability:multivariate") - allow_missing = self.get_tag("capability:missing_values") - allow_unequal = self.get_tag("capability:unequal_length") - - self_name = type(self).__name__ - - # identify problems, mismatch of capability and inputs - problems = [] - if missing and not allow_missing: - problems += ["missing values"] - if multivariate and not allow_multivariate: - problems += ["multivariate series"] - if unequal and not allow_unequal: - problems += ["unequal length series"] - - # construct error message - problems_and = " and ".join(problems) - problems_or = " or ".join(problems) - msg = ( - f"Data seen by {self_name} instance has {problems_and}, " - f"but this {self_name} instance cannot handle {problems_or}. " - f"Calls with {problems_or} may result in error or unreliable results." - ) - - # raise exception or warning with message - # if self is composite, raise a warning, since passing could be fine - # see discussion in PR 2366 why - if len(problems) > 0: - if self.is_composite(): - warn(msg, obj=self) - else: - raise ValueError(msg) - - def _convert_X(self, X): - """Convert equal length series from DataFrame to numpy array or vice versa. - - Parameters - ---------- - self : this regressor - X : pd.DataFrame or np.ndarray. Input attribute data - - Returns - ------- - X : input X converted to type in "X_inner_mtype" tag - usually a pd.DataFrame (nested) or 3D np.ndarray - Checked and possibly converted input data - """ - inner_type = self.get_tag("X_inner_mtype") - # convert pd.DataFrame - X = convert_to( - X, - to_type=inner_type, - as_scitype="Panel", - ) - return X - - -def _check_regressor_input( - X, - y=None, - enforce_min_instances=1, - return_metadata=True, -): - """Check whether input X and y are valid formats with minimum data. - - Raises a ValueError if the input is not valid. - - Parameters - ---------- - X : check whether conformant with any sktime Panel mtype specification - y : check whether a pd.Series or np.array - enforce_min_instances : int, optional (default=1) - check there are a minimum number of instances. - return_metadata : bool, str, or list of str - metadata fields to return with X_metadata, input to check_is_scitype - - Returns - ------- - metadata : dict with metadata for X returned by datatypes.check_is_scitype - - Raises - ------ - ValueError - If y or X is invalid input data type, or there is not enough data - """ - # Check X is valid input type and recover the data characteristics - X_valid, _, X_metadata = check_is_scitype( - X, scitype="Panel", return_metadata=return_metadata - ) - if not X_valid: - raise TypeError( - f"X is not of a supported input data type." - f"X must be in a supported mtype format for Panel, found {type(X)}" - f"Use datatypes.check_is_mtype to check conformance with specifications." - ) - n_cases = X_metadata["n_instances"] - if n_cases < enforce_min_instances: - raise ValueError( - f"Minimum number of cases required is {enforce_min_instances} but X " - f"has : {n_cases}" - ) - - # Check y if passed - if y is not None: - # Check y valid input - if not isinstance(y, (pd.Series, np.ndarray)) and not ( - isinstance(y, pd.DataFrame) and y.shape[1] == 1 - ): - raise ValueError( - f"y must be a np.array, pd.Series or a 1-D pd.DataFrame," - f"but found type: {type(y)}" - ) - # Check matching number of labels - n_labels = y.shape[0] - if n_cases != n_labels: - raise ValueError( - f"Mismatch in number of cases. Number in X = {n_cases} nos in y = " - f"{n_labels}" - ) - if isinstance(y, np.ndarray): - if y.ndim > 1: - raise ValueError( - f"np.ndarray y must be 1-dimensional, " - f"but found {y.ndim} dimensions" - ) - return X_metadata - - -def _internal_convert(X, y=None): - """Convert X and y if necessary as a user convenience. - - Convert X to a 3D numpy array if already a 2D and convert y into an 1D numpy - array if passed as a Series. - - Parameters - ---------- - X : an object of a supported Panel mtype, or 2D numpy.ndarray - y : np.ndarray or pd.Series - - Returns - ------- - X: an object of a supported Panel mtype, numpy3D if X was a 2D numpy.ndarray - y: np.ndarray - """ - if isinstance(X, np.ndarray): - # Temporary fix to insist on 3D numpy. For univariate problems, - # most regressors simply convert back to 2D. This squeezing should be - # done here, but touches a lot of files, so will get this to work first. - if X.ndim == 2: - X = X.reshape(X.shape[0], 1, X.shape[1]) - if y is not None and isinstance(y, pd.Series): - # y should be a numpy array, although we allow Series for user convenience - y = pd.Series.to_numpy(y) - if y is not None and isinstance(y, np.ndarray): - y = y.astype("float") - if y is None: - return X - return X, y + raise NotImplementedError("abstract method") diff --git a/sktime/regression/compose/_ensemble.py b/sktime/regression/compose/_ensemble.py index ff5e84ff595..4b4c3d2964e 100644 --- a/sktime/regression/compose/_ensemble.py +++ b/sktime/regression/compose/_ensemble.py @@ -18,8 +18,8 @@ from sklearn.pipeline import Pipeline from sklearn.tree import DecisionTreeRegressor +from sktime.base._panel.forest._composable import BaseTimeSeriesForest from sktime.regression.base import BaseRegressor -from sktime.series_as_features.base.estimators._ensemble import BaseTimeSeriesForest from sktime.transformations.panel.summarize import RandomIntervalFeatureExtractor from sktime.utils.slope_and_trend import _slope from sktime.utils.validation.panel import check_X, check_X_y diff --git a/sktime/regression/deep_learning/__init__.py b/sktime/regression/deep_learning/__init__.py index cb7052b8a9a..7e144d0c03b 100644 --- a/sktime/regression/deep_learning/__init__.py +++ b/sktime/regression/deep_learning/__init__.py @@ -1,8 +1,14 @@ """Deep learning based regressors.""" __all__ = [ "CNNRegressor", + "MCDCNNRegressor", + "ResNetRegressor", + "SimpleRNNRegressor", "TapNetRegressor", ] from sktime.regression.deep_learning.cnn import CNNRegressor +from sktime.regression.deep_learning.mcdcnn import MCDCNNRegressor +from sktime.regression.deep_learning.resnet import ResNetRegressor +from sktime.regression.deep_learning.rnn import SimpleRNNRegressor from sktime.regression.deep_learning.tapnet import TapNetRegressor diff --git a/sktime/regression/deep_learning/cnn.py b/sktime/regression/deep_learning/cnn.py index c440c39f0cf..02d418ea7df 100644 --- a/sktime/regression/deep_learning/cnn.py +++ b/sktime/regression/deep_learning/cnn.py @@ -55,6 +55,15 @@ class CNNRegressor(BaseDeepRegressor): https://github.com/hfawaz/dl-4-tsc/blob/master/classifiers/cnn.py """ + _tags = { + # packaging info + # -------------- + "authors": ["AurumnPegasus", "achieveordie"], + "maintainers": ["AurumnPegasus", "achieveordie"], + "python_dependencies": "tensorflow", + # estimator type handled by parent class + } + def __init__( self, n_epochs=2000, diff --git a/sktime/regression/deep_learning/mcdcnn.py b/sktime/regression/deep_learning/mcdcnn.py index 1b667ed2766..2cf62b677fc 100644 --- a/sktime/regression/deep_learning/mcdcnn.py +++ b/sktime/regression/deep_learning/mcdcnn.py @@ -1,8 +1,6 @@ """Multi Channel Deep Convolutional Neural Regressor (MCDCNN).""" -__author__ = [ - "JamesLarge", -] +__author__ = ["James-Large"] from copy import deepcopy @@ -79,7 +77,13 @@ class MCDCNNRegressor(BaseDeepRegressor): MCDCNRegressor(...) """ - _tags = {"python_dependencies": "tensorflow"} + _tags = { + # packaging info + # -------------- + "authors": ["James-Large"], + "python_dependencies": "tensorflow", + # estimator type handled by parent class + } def __init__( self, diff --git a/sktime/regression/deep_learning/resnet.py b/sktime/regression/deep_learning/resnet.py index 684d9635757..ccc48120aaa 100644 --- a/sktime/regression/deep_learning/resnet.py +++ b/sktime/regression/deep_learning/resnet.py @@ -59,7 +59,14 @@ class ResNetRegressor(BaseDeepRegressor): ResNetRegressor(...) """ - _tags = {"python_dependencies": ["tensorflow"]} + _tags = { + # packaging info + # -------------- + "authors": ["James-Large", "Withington"], + "maintainers": ["Withington"], + "python_dependencies": "tensorflow", + # estimator type handled by parent class + } def __init__( self, diff --git a/sktime/regression/deep_learning/rnn.py b/sktime/regression/deep_learning/rnn.py index c32bd81999a..de2e773f5b0 100644 --- a/sktime/regression/deep_learning/rnn.py +++ b/sktime/regression/deep_learning/rnn.py @@ -49,9 +49,17 @@ class SimpleRNNRegressor(BaseDeepRegressor): https://github.com/Mcompetitions/M4-methods """ + _tags = { + # packaging info + # -------------- + "authors": ["mloning"], + "python_dependencies": "tensorflow", + # estimator type handled by parent class + } + def __init__( self, - num_epochs=100, + n_epochs=100, batch_size=1, units=6, callbacks=None, @@ -65,8 +73,9 @@ def __init__( optimizer=None, ): _check_dl_dependencies(severity="error") + super().__init__() - self.num_epochs = num_epochs + self.batch_size = batch_size self.verbose = verbose self.units = units @@ -80,6 +89,7 @@ def __init__( self.optimizer = optimizer self.history = None self._network = RNNNetwork(random_state=random_state, units=units) + self.n_epochs = n_epochs def build_model(self, input_shape, **kwargs): """Construct a compiled, un-trained, keras model that is ready for training. @@ -183,7 +193,7 @@ def _fit(self, X, y): X, y, batch_size=self.batch_size, - epochs=self.num_epochs, + epochs=self.n_epochs, verbose=self.verbose, callbacks=self.callbacks_, ) @@ -212,7 +222,7 @@ def get_test_params(cls, parameter_set="default"): """ params1 = {} params2 = { - "num_epochs": 50, + "n_epochs": 50, "batch_size": 2, "units": 5, "use_bias": False, diff --git a/sktime/regression/deep_learning/tapnet.py b/sktime/regression/deep_learning/tapnet.py index 73ac382e4db..7903ae69abf 100644 --- a/sktime/regression/deep_learning/tapnet.py +++ b/sktime/regression/deep_learning/tapnet.py @@ -1,11 +1,7 @@ """Time Convolutional Neural Network (CNN) for classification.""" -__author__ = [ - "Jack Russon", -] -__all__ = [ - "TapNetRegressor", -] +__author__ = ["jnrusson1"] +__all__ = ["TapNetRegressor"] from copy import deepcopy @@ -77,7 +73,14 @@ class TapNetRegressor(BaseDeepRegressor): or class based self attention. """ - _tags = {"python_dependencies": "tensorflow"} + _tags = { + # packaging info + # -------------- + "authors": ["jnrusson1"], + "maintainers": ["jnrusson1"], + "python_dependencies": "tensorflow", + # estimator type handled by parent class + } def __init__( self, diff --git a/sktime/regression/distance_based/_time_series_neighbors.py b/sktime/regression/distance_based/_time_series_neighbors.py index c801b938a24..824561b0891 100644 --- a/sktime/regression/distance_based/_time_series_neighbors.py +++ b/sktime/regression/distance_based/_time_series_neighbors.py @@ -90,11 +90,16 @@ class KNeighborsTimeSeriesRegressor(BaseRegressor): """ _tags = { + # packaging info + # -------------- + "authors": ["fkiraly"], + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multivariate": True, "capability:unequal_length": True, "capability:missing_values": True, "X_inner_mtype": ["pd-multiindex", "numpy3D"], - "python_dependencies": "numba", } def __init__( diff --git a/sktime/regression/dummy/_dummy.py b/sktime/regression/dummy/_dummy.py index f6864f51ce2..878f452b1d4 100644 --- a/sktime/regression/dummy/_dummy.py +++ b/sktime/regression/dummy/_dummy.py @@ -1,6 +1,6 @@ """Dummy time series regressor.""" -__author__ = ["Badr-Eddine Marani"] +__author__ = ["badrmarani"] __all__ = ["DummyRegressor"] import numpy as np @@ -58,6 +58,12 @@ class DummyRegressor(BaseRegressor): """ _tags = { + # packaging info + # -------------- + "authors": "badrmarani", + "maintainers": "badrmarani", + # estimator type + # -------------- "X_inner_mtype": "nested_univ", "capability:missing_values": True, "capability:unequal_length": True, diff --git a/sktime/regression/interval_based/_tsf.py b/sktime/regression/interval_based/_tsf.py index 597fba1f853..b3685f5edb0 100644 --- a/sktime/regression/interval_based/_tsf.py +++ b/sktime/regression/interval_based/_tsf.py @@ -8,11 +8,8 @@ from sklearn.ensemble._forest import ForestRegressor from sklearn.tree import DecisionTreeRegressor +from sktime.base._panel.forest._tsf import BaseTimeSeriesForest, _transform from sktime.regression.base import BaseRegressor -from sktime.series_as_features.base.estimators.interval_based._tsf import ( - BaseTimeSeriesForest, - _transform, -) class TimeSeriesForestRegressor(BaseTimeSeriesForest, ForestRegressor, BaseRegressor): @@ -77,6 +74,12 @@ class TimeSeriesForestRegressor(BaseTimeSeriesForest, ForestRegressor, BaseRegre """ _tags = { + # packaging info + # -------------- + "authors": ["TonyBagnall", "kkoziara", "luiszugasti", "kanand77", "mloning"], + "maintainers": ["kkoziara", "luiszugasti", "kanand77"], + # estimator type + # -------------- "capability:multivariate": False, "X_inner_mtype": "numpy3D", } diff --git a/sktime/regression/kernel_based/_rocket_regressor.py b/sktime/regression/kernel_based/_rocket_regressor.py index c6b99294d0e..1d9e65305c4 100644 --- a/sktime/regression/kernel_based/_rocket_regressor.py +++ b/sktime/regression/kernel_based/_rocket_regressor.py @@ -100,9 +100,14 @@ class RocketRegressor(_DelegatedRegressor, BaseRegressor): """ _tags = { + # packaging info + # -------------- + "authors": "fkiraly", + "python_dependencies": "numba", + # estimator type + # -------------- "capability:multivariate": True, "capability:multithreading": True, - "python_dependencies": "numba", } # valid rocket strings for input validity checking diff --git a/sktime/regression/tests/test_all_regressors.py b/sktime/regression/tests/test_all_regressors.py index 3dfea1768f6..c3e8b1c4ab9 100644 --- a/sktime/regression/tests/test_all_regressors.py +++ b/sktime/regression/tests/test_all_regressors.py @@ -4,10 +4,12 @@ import numpy as np +import pandas as pd import pytest from sktime.datatypes import check_is_scitype from sktime.tests.test_all_estimators import BaseFixtureGenerator, QuickTester +from sktime.utils._testing.panel import make_regression_problem from sktime.utils._testing.scenarios_classification import ( ClassifierFitPredictMultivariate, ) @@ -80,3 +82,25 @@ def test_regressor_output(self, estimator_instance, scenario): assert isinstance(y_pred, np.ndarray) assert y_pred.shape == (X_new_instances,) assert np.issubdtype(y_pred.dtype, np.floating) + + def test_multioutput(self, estimator_instance): + """Test multioutput regression for all classifiers. + + All classifiers should follow the same interface, + those that do not genuinely should vectorize/broadcast over y. + """ + n_instances = 20 + X, y = make_regression_problem(n_instances=n_instances) + y_mult = pd.DataFrame({"a": y, "b": y}) + + estimator_instance.fit(X, y_mult) + y_pred = estimator_instance.predict(X) + + assert isinstance(y_pred, pd.DataFrame) + assert y_pred.shape == y_mult.shape + + vectorized = estimator_instance.get_tag("capability:multioutput") + if vectorized: + assert hasattr(estimator_instance, "regressors_") + assert isinstance(estimator_instance.regressors_, pd.DataFrame) + assert estimator_instance.regressors_.shape == (1, 2) diff --git a/sktime/series_as_features/base/__init__.py b/sktime/series_as_features/base/__init__.py deleted file mode 100644 index 4ebe8357503..00000000000 --- a/sktime/series_as_features/base/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""sktime.series_as_features.base module.""" diff --git a/sktime/series_as_features/base/estimators/__init__.py b/sktime/series_as_features/base/estimators/__init__.py deleted file mode 100644 index 7ddff84176e..00000000000 --- a/sktime/series_as_features/base/estimators/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""sktime.series_as_features.base.estimators module.""" -__all__ = ["BaseTimeSeriesForest"] - -from ._ensemble import BaseTimeSeriesForest diff --git a/sktime/series_as_features/base/estimators/interval_based/__init__.py b/sktime/series_as_features/base/estimators/interval_based/__init__.py deleted file mode 100644 index 7ccd4695c4c..00000000000 --- a/sktime/series_as_features/base/estimators/interval_based/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Interval based series-as-features estimators.""" - -__all__ = ["BaseTimeSeriesForest"] - -from sktime.series_as_features.base.estimators.interval_based._tsf import ( - BaseTimeSeriesForest, -) diff --git a/sktime/series_as_features/base/estimators/tests/__init__.py b/sktime/series_as_features/base/estimators/tests/__init__.py deleted file mode 100644 index 5e0f0cc7f01..00000000000 --- a/sktime/series_as_features/base/estimators/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for sktime.series_as_features.base.estimators module.""" diff --git a/sktime/split/__init__.py b/sktime/split/__init__.py index 78e8d81a84b..a872aaf8fa2 100644 --- a/sktime/split/__init__.py +++ b/sktime/split/__init__.py @@ -2,6 +2,7 @@ __all__ = [ "CutoffSplitter", + "CutoffFhSplitter", "ExpandingGreedySplitter", "ExpandingWindowSplitter", "SameLocSplitter", @@ -12,7 +13,7 @@ "temporal_train_test_split", ] -from sktime.split.cutoff import CutoffSplitter +from sktime.split.cutoff import CutoffFhSplitter, CutoffSplitter from sktime.split.expandinggreedy import ExpandingGreedySplitter from sktime.split.expandingwindow import ExpandingWindowSplitter from sktime.split.sameloc import SameLocSplitter diff --git a/sktime/split/base/_base_splitter.py b/sktime/split/base/_base_splitter.py index b1325307eb7..07ceb8e6d39 100644 --- a/sktime/split/base/_base_splitter.py +++ b/sktime/split/base/_base_splitter.py @@ -95,6 +95,8 @@ class BaseSplitter(BaseObject): # split_series_uses: "iloc" or "loc", whether split_series under the hood # calls split ("iloc") or split_loc ("loc"). Setting this can give # performance advantages, e.g., if "loc" is faster to obtain. + "authors": "sktime developers", # author(s) of the object + "maintainers": "sktime developers", # current maintainer(s) of the object } def __init__( @@ -113,10 +115,10 @@ def split(self, y: ACCEPTED_Y_TYPES) -> SPLIT_GENERATOR_TYPE: Parameters ---------- y : pd.Index or time series in sktime compatible time series format, - time series can be in any Series, Panel, or Hierarchical mtype format + time series can be in any Series, Panel, or Hierarchical mtype format Index of time series to split, or time series to split If time series, considered as index of equivalent pandas type container: - pd.DataFrame, pd.Series, pd-multiindex, or pd_multiindex_hier mtype + pd.DataFrame, pd.Series, pd-multiindex, or pd_multiindex_hier mtype Yields ------ @@ -144,8 +146,8 @@ def _split(self, y: pd.Index) -> SPLIT_GENERATOR_TYPE: Parameters ---------- - y : pd.Index or time series in sktime compatible time series format - Time series to split, or index of time series to split + y : pd.Index + Index of time series to split Yields ------ @@ -197,8 +199,10 @@ def split_loc(self, y: ACCEPTED_Y_TYPES) -> Iterator[Tuple[pd.Index, pd.Index]]: Parameters ---------- y : pd.Index or time series in sktime compatible time series format, - time series can be in any Series, Panel, or Hierarchical mtype format - Time series to split, or index of time series to split + time series can be in any Series, Panel, or Hierarchical mtype format + Index of time series to split, or time series to split + If time series, considered as index of equivalent pandas type container: + pd.DataFrame, pd.Series, pd-multiindex, or pd_multiindex_hier mtype Yields ------ @@ -240,10 +244,11 @@ def split_series(self, y: ACCEPTED_Y_TYPES) -> Iterator[SPLIT_TYPE]: Parameters ---------- - y : time series in sktime compatible time series format, - time series can be in any Series, Panel, or Hierarchical mtype format - e.g., pd.Series, pd.DataFrame, np.ndarray - Time series to split, or index of time series to split + y : pd.Index or time series in sktime compatible time series format, + time series can be in any Series, Panel, or Hierarchical mtype format + Index of time series to split, or time series to split + If time series, considered as index of equivalent pandas type container: + pd.DataFrame, pd.Series, pd-multiindex, or pd_multiindex_hier mtype Yields ------ @@ -387,8 +392,11 @@ def get_n_splits(self, y: Optional[ACCEPTED_Y_TYPES] = None) -> int: Parameters ---------- - y : pd.Series or pd.Index, optional (default=None) - Time series to split + y : pd.Index or time series in sktime compatible time series format, + time series can be in any Series, Panel, or Hierarchical mtype format + Index of time series to split, or time series to split + If time series, considered as index of equivalent pandas type container: + pd.DataFrame, pd.Series, pd-multiindex, or pd_multiindex_hier mtype Returns ------- diff --git a/sktime/split/compose/__init__.py b/sktime/split/compose/__init__.py new file mode 100644 index 00000000000..1799457e131 --- /dev/null +++ b/sktime/split/compose/__init__.py @@ -0,0 +1,6 @@ +"""Compositors for splitters.""" +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) + +__all__ = ["Repeat"] + +from sktime.split.compose._repeat import Repeat diff --git a/sktime/split/compose/_repeat.py b/sktime/split/compose/_repeat.py new file mode 100644 index 00000000000..30e816b3c69 --- /dev/null +++ b/sktime/split/compose/_repeat.py @@ -0,0 +1,197 @@ +"""Splitter obtained from repeating another splitter.""" +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) + +__author__ = ["fkiraly"] + + +from sktime.split.base import BaseSplitter + + +class Repeat(BaseSplitter): + """Add repetitions to a splitter, element-wise or sequence-wise. + + Element-wise means: if the original splitter splits a series s into s1, s2, s3, + then a 2-times repeat splits s into s1, s1, s2, s2, s3, s3. + Sequence-wise means: if the original splitter splits a series s into s1, s2, s3, + then a 2-times repeat splits s into s1, s2, s3, s1, s2, s3. + + This splitter also allows to control whether repetitions are exact + or independent pseuo-random, for stochastic splitters. + + Parameters + ---------- + splitter : sktime splitter object, BaseSplitter descendant instance + splitter to repeat + times : int, default=1 + number of times to repeat the splitter + mode : str, one of "entry" and "sequence", default="entry" + mode of repetition + "entry" repeats each entry of the split ``times`` times + "sequence" repeats the entire sequence of splits ``times`` times + random_repeat : bool, default=False + whether repetitions should be exact or independent pseudo-random + If False, repetitions are exact (default) + If True, repetitions are random, ``splitter`` is cloned for each repetition. + Note: if a random seed is set in ``splitter``, the effect is the same + as setting ``random_repeat`` to False, even if ``random_repeat`` is True. + """ + + _tags = { + "split_hierarchical": True, + "split_series_uses": "iloc", + } + + def __init__(self, splitter, times=1, mode="entry", random_repeat=False): + self.splitter = splitter + self.times = times + self.mode = mode + self.random_repeat = random_repeat + + super().__init__() + + ALLOWED_MODES = ["entry", "sequence"] + if mode not in ALLOWED_MODES: + raise ValueError( + f"Mode in Repeat splitter should be one of {ALLOWED_MODES}, " + f"but found {mode}" + ) + + tags_to_clone = ["split_series_uses"] + + self.clone_tags(splitter, tags_to_clone) + + def _split(self, y): + """Get iloc references to train/test splits of `y`. + + private _split containing the core logic, called from split + + Parameters + ---------- + y : pd.Index + Index of time series to split + + Yields + ------ + train : 1D np.ndarray of dtype int + Training window indices, iloc references to training indices in y + test : 1D np.ndarray of dtype int + Test window indices, iloc references to test indices in y + """ + yield from self._repeat(y, method="split") + + def _split_loc(self, y): + """Get loc references to train/test splits of `y`. + + private _split containing the core logic, called from split_loc + + Default implements using split and y.index to look up the loc indices. + Can be overridden for faster implementation. + + Parameters + ---------- + y : pd.Index + index of time series to split + + Yields + ------ + train : pd.Index + Training window indices, loc references to training indices in y + test : pd.Index + Test window indices, loc references to test indices in y + """ + yield from self._repeat(y, method="split_loc") + + def _repeat(self, y, method="split"): + """Repeat the splitter. + + Parameters + ---------- + method : str, one of "split" and "split_loc", default="split" + method to repeat + + Yields + ------ + train : 1D np.ndarray of dtype int or pd.Index + Training window indices, iloc or loc references to training indices in y + test : 1D np.ndarray of dtype int or pd.Index + Test window indices, iloc or loc references to test indices in y + """ + random_repeat = self.random_repeat + + if random_repeat: + spl_clones = [self.splitter.clone() for _ in range(self.times)] + spl_gens = [getattr(spl, method)(y) for spl in spl_clones] + else: + one_clone = self.splitter.clone() + one_gen = getattr(one_clone, method)(y) + + if self.mode == "entry" and not random_repeat: + for train, test in one_gen: + for _ in range(self.times): + yield train, test + elif self.mode == "entry" and random_repeat: + for _ in range(self.times): + for spl_gen in spl_gens: + yield next(spl_gen) + elif self.mode == "sequence" and not random_repeat: + all_res = list(one_gen) + for _ in range(self.times): + for train, test in all_res: + yield train, test + elif self.mode == "sequence" and random_repeat: + for spl_gen in spl_gens: + yield from spl_gen + + def get_n_splits(self, y) -> int: + """Return the number of splits. + + Parameters + ---------- + y : pd.Series or pd.Index, optional (default=None) + Time series to split + + Returns + ------- + n_splits : int + The number of splits. + """ + return self._splitter.get_n_splits(y) * self.times + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the splitter. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + from sktime.split import ExpandingWindowSplitter + + modes = ["entry", "sequence"] + repeats = [True, False] + timess = [2, 3, 7] + + params = [] + for mode in modes: + for random_repeat in repeats: + for times in timess: + params.append( + { + "mode": mode, + "random_repeat": random_repeat, + "times": times, + "splitter": ExpandingWindowSplitter(), + } + ) + + return params diff --git a/sktime/split/cutoff.py b/sktime/split/cutoff.py index 8191514accb..b1b4554198e 100644 --- a/sktime/split/cutoff.py +++ b/sktime/split/cutoff.py @@ -6,6 +6,7 @@ __all__ = [ "CutoffSplitter", + "CutoffFhSplitter", ] from typing import Optional @@ -125,7 +126,7 @@ class CutoffSplitter(BaseSplitter): relative to the end of the training window. It will contain as many indices as there are forecasting horizons provided to the `fh` argument. - For a forecasating horizon :math:`(h_1,\ldots,h_H)`, the test window will + For a forecasting horizon :math:`(h_1,\ldots,h_H)`, the test window will consist of the indices :math:`(k_n+h_1,\ldots, k_n+h_H)`. The number of splits returned by `.get_n_splits` @@ -240,3 +241,117 @@ def get_test_params(cls, parameter_set="default"): `create_test_instance` uses the first (or only) dictionary in `params` """ return [{"cutoffs": np.array([3, 7, 10])}, {"cutoffs": [21, 22]}] + + +class CutoffFhSplitter(BaseSplitter): + r"""Temporal train-test splitter, based on cutoff and forecasting horizon. + + Train and test splits are determied as follows: + + for each cutoff point `k=cutoff[i]`, in `split`: + + * training fold is all loc indices up to and including `k` + * if `fh` is not passed, test fold is all loc indices strictly after `k` + * if `fh is passed`, test fold is all loc indices in `k + fh`, if `fh` is relative. + More precisely, `fh.to_absolute_index(cutoff=k) + If `fh` is absolute, then the test window is `fh` itself. + + It should be noted that, unlike in `CutoffSplitter`, + test folds are not determined by a window length, + but by indices of the forecasting horizon `fh`, i.e., test folds can be + non-contiguous, even if the data index is regular. + + Parameters + ---------- + cutoff : np.array or pd.Index + Cutoff points, positive and integer- or datetime-index like. + Type should match the type of `fh` input. + fh : None, ForecastingHorizon, int, timedelta, iterable of ints or timedeltas + Forecasting horizon, relative or absolute, to determine test folds. + Type should match the type of `cutoffs` input. + If not ForecastingHorizon, is coerced. + """ + + _tags = { + "split_hierarchical": False, + "split_series_uses": "loc", + } + + def __init__(self, cutoff, fh=None): + self.cutoff = cutoff + self.fh = fh + super().__init__(fh=fh) + + def _split_loc(self, y): + """Get loc references to train/test splits of `y`. + + private _split containing the core logic, called from split_loc + + Parameters + ---------- + y : pd.Index + index of time series to split + + Yields + ------ + train : pd.Index + Training window indices, loc references to training indices in y + test : pd.Index + Test window indices, loc references to test indices in y + """ + cutoff = self.cutoff + fh = self.fh + + if fh is not None: + from sktime.forecasting.base import ForecastingHorizon + + if not isinstance(fh, ForecastingHorizon): + fh = ForecastingHorizon(fh) + + for k in cutoff: + train = y[y <= k] + if fh is not None: + test = fh.to_absolute_index(cutoff=k) + else: + test = y[y > k] + yield train, test + + def get_n_splits(self, y=None) -> int: + """Return the number of splits. + + Since this splitter returns a single train/test split, + this number is trivially 1. + + Parameters + ---------- + y : pd.Series or pd.Index, optional (default=None) + Time series to split + + Returns + ------- + n_splits : int + The number of splits. + """ + return len(self.cutoffs) + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the splitter. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params1 = {"cutoff": 3} + params2 = {"cutoff": [3, 4], "fh": [1, 2]} + return [params1, params2] diff --git a/sktime/split/expandinggreedy.py b/sktime/split/expandinggreedy.py index 5baf1f7bf66..b67e16a5440 100644 --- a/sktime/split/expandinggreedy.py +++ b/sktime/split/expandinggreedy.py @@ -7,6 +7,7 @@ __all__ = [ "ExpandingGreedySplitter", ] +__author__ = ["davidgilbertson"] import numpy as np import pandas as pd @@ -18,21 +19,28 @@ class ExpandingGreedySplitter(BaseSplitter): """Splitter that successively cuts test folds off the end of the series. - Takes an integer `test_size` that defines the number of steps included in the + Takes an integer ``test_size`` that defines the number of steps included in the test set of each fold. The train set of each fold will contain all data before - the test set. If the data contains multiple instances, `test_size` is + the test set. If the data contains multiple instances, ``test_size`` is _per instance_. - If no `step_length` is defined, the test sets (one for each fold) will be - adjacent, taken from the end of the dataset. + If no ``step_length`` is defined, the test sets (one for each fold) will be + adjacent and disjoint, taken from the end of the dataset. - For example, with `test_size=7` and `folds=5`, the test sets in total will cover + For example, with ``test_size=7`` and ``folds=5``, the test sets in total will cover the last 35 steps of the data with no overlap. Parameters ---------- - test_size : int - The number of steps included in the test set of each fold. + test_size : int or float + If int: the number of steps included in the test set of each fold. + Formally, steps are consecutive ``iloc`` indices. + If float: the proportion of steps included in the test set of each fold, + as a proportion of the total number of consecutive ``iloc`` indices. + Must be between 0.0 and 1.0. Proportions are rounded to the + next higher integer count of samples (ceil). + Cave: not the ``loc`` proportion between start and end locations, + but a proportion of total number of consecutive ``iloc`` indices. folds : int, default = 5 The number of folds. step_length : int, optional @@ -61,18 +69,29 @@ def __init__(self, test_size: int, folds: int = 5, step_length: int = None): self.step_length = step_length self.fh = np.arange(test_size) + 1 + # no algorithm implemented that is faster for float than naive iteration + if isinstance(test_size, float): + self.set_tags(**{"split_hierarchical": False}) + def _split(self, y: pd.Index) -> SPLIT_GENERATOR_TYPE: + test_size = self.test_size + + if isinstance(test_size, float): + _test_size = np.ceil(len(y) * test_size) + else: + _test_size = test_size + if isinstance(y, pd.MultiIndex): - groups = pd.Series(index=y).groupby(y.names[:-1]) + groups = pd.Series(index=y, dtype="float64").groupby(y.names[:-1]) reverse_idx = groups.transform("size") - groups.cumcount() - 1 else: reverse_idx = np.arange(len(y))[::-1] - step_length = self.step_length or self.test_size + step_length = self.step_length or _test_size for i in reversed(range(self.folds)): tst_end = i * step_length - trn_end = tst_end + self.test_size + trn_end = tst_end + _test_size trn_indices = np.flatnonzero(reverse_idx >= trn_end) tst_indices = np.flatnonzero( (reverse_idx < trn_end) & (reverse_idx >= tst_end) @@ -99,4 +118,5 @@ def get_test_params(cls, parameter_set="default"): """ params1 = {"test_size": 1} params2 = {"test_size": 3, "folds": 2, "step_length": 2} - return [params1, params2] + params3 = {"test_size": 0.2, "folds": 2} + return [params1, params2, params3] diff --git a/sktime/split/sameloc.py b/sktime/split/sameloc.py index 040c748d301..c94e1bb6cb6 100644 --- a/sktime/split/sameloc.py +++ b/sktime/split/sameloc.py @@ -141,10 +141,7 @@ def get_test_params(cls, parameter_set="default"): `create_test_instance` uses the first (or only) dictionary in `params` """ from sktime.datasets import load_airline - from sktime.forecasting.model_selection import ( - ExpandingWindowSplitter, - SingleWindowSplitter, - ) + from sktime.split import ExpandingWindowSplitter, SingleWindowSplitter y = load_airline() y_temp = y[:60] diff --git a/sktime/split/slidingwindow.py b/sktime/split/slidingwindow.py index 031ffe6e28f..ed55667e4ed 100644 --- a/sktime/split/slidingwindow.py +++ b/sktime/split/slidingwindow.py @@ -51,9 +51,9 @@ class SlidingWindowSplitter(BaseWindowSplitter): Parameters ---------- - fh : int, list or np.array + fh : int, list or np.array, optional (default=1) Forecasting horizon - window_length : int or timedelta or pd.DateOffset + window_length : int or timedelta or pd.DateOffset, optional (default=10) Window length step_length : int or timedelta or pd.DateOffset, optional (default=1) Step length between windows diff --git a/sktime/split/temporal_train_test_split.py b/sktime/split/temporal_train_test_split.py index 2509b0ec9af..55e7e91956d 100644 --- a/sktime/split/temporal_train_test_split.py +++ b/sktime/split/temporal_train_test_split.py @@ -91,8 +91,8 @@ def temporal_train_test_split( Examples -------- - >>> from sktime.forecasting.model_selection import temporal_train_test_split >>> from sktime.datasets import load_airline, load_osuleaf + >>> from sktime.split import temporal_train_test_split >>> from sktime.utils._testing.panel import _make_panel >>> # univariate time series >>> y = load_airline() diff --git a/sktime/split/testplustrain.py b/sktime/split/testplustrain.py index 73981a9c661..a803a52c9f0 100644 --- a/sktime/split/testplustrain.py +++ b/sktime/split/testplustrain.py @@ -131,10 +131,7 @@ def get_test_params(cls, parameter_set="default"): `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params` """ - from sktime.forecasting.model_selection import ( - ExpandingWindowSplitter, - SingleWindowSplitter, - ) + from sktime.split import ExpandingWindowSplitter, SingleWindowSplitter cv_1 = ExpandingWindowSplitter(fh=[2, 4], initial_window=24, step_length=12) cv_2 = SingleWindowSplitter(fh=[2, 4], window_length=24) diff --git a/sktime/split/tests/test_cutoff.py b/sktime/split/tests/test_cutoff.py index ddd3aa94b4c..8afe761153e 100644 --- a/sktime/split/tests/test_cutoff.py +++ b/sktime/split/tests/test_cutoff.py @@ -2,6 +2,7 @@ """Tests for cutoff splitter.""" import numpy as np +import pandas as pd import pytest from sktime.forecasting.tests._config import ( @@ -11,7 +12,7 @@ TEST_WINDOW_LENGTHS, TEST_YS, ) -from sktime.split import CutoffSplitter +from sktime.split import CutoffFhSplitter, CutoffSplitter from sktime.split.base._common import _inputs_are_supported from sktime.split.tests.test_split import _check_cv @@ -30,3 +31,30 @@ def test_cutoff_window_splitter(y, cutoffs, fh, window_length): match = "Unsupported combination of types" with pytest.raises(TypeError, match=match): CutoffSplitter(cutoffs, fh=fh, window_length=window_length) + + +def test_cutoff_fh_splitter(): + """Test CutoffFhSplitter.""" + from sktime.forecasting.base import ForecastingHorizon + from sktime.utils._testing.series import _make_series + + y = _make_series() + cutoff = y.index[[10]] + cutoff.freq = y.index.freq + fh = ForecastingHorizon([1, 2, 3], freq=y.index.freq) + + spl = CutoffFhSplitter(cutoff, fh) + + spl_tt = list(spl.split_loc(y))[0] + spl_train = spl_tt[0] + spl_test = spl_tt[1] + + assert isinstance(spl_train, pd.DatetimeIndex) + assert isinstance(spl_test, pd.DatetimeIndex) + + assert np.all(spl_train == y.index[:11]) + + expected_test = pd.DatetimeIndex( + ["2000-01-12", "2000-01-13", "2000-01-14"], dtype="datetime64[ns]", freq="D" + ) + assert np.all(spl_test == expected_test) diff --git a/sktime/tests/_config.py b/sktime/tests/_config.py index 989d1335bd0..7a9aabbf502 100644 --- a/sktime/tests/_config.py +++ b/sktime/tests/_config.py @@ -77,6 +77,7 @@ "test_persistence_via_pickle", "test_fit_does_not_overwrite_hyper_params", "test_save_estimators_to_file", + "test_multiprocessing_idempotent", # see 5658 ], "ProximityForest": [ "test_persistence_via_pickle", @@ -116,6 +117,11 @@ "MLPClassifier": [ "test_fit_idempotent", ], + "CNTCClassifier": [ + "test_fit_idempotent", + "test_persistence_via_pickle", + "test_save_estimators_to_file", + ], "InceptionTimeClassifier": [ "test_fit_idempotent", ], @@ -178,6 +184,11 @@ "test_hierarchical_with_exogeneous", # refer to #4743 ], "Pipeline": ["test_inheritance"], # does not inherit from intermediate base classes + # networks do not support negative fh + "LTSFLinearForecaster": ["test_predict_time_index_in_sample_full"], + "LTSFDLinearForecaster": ["test_predict_time_index_in_sample_full"], + "LTSFNLinearForecaster": ["test_predict_time_index_in_sample_full"], + "WEASEL": ["test_multiprocessing_idempotent"], # see 5658 } # We use estimator tags in addition to class hierarchies to further distinguish diff --git a/sktime/tests/test_all_estimators.py b/sktime/tests/test_all_estimators.py index 6d6b35bcbe5..f7ad4ef4a78 100644 --- a/sktime/tests/test_all_estimators.py +++ b/sktime/tests/test_all_estimators.py @@ -42,7 +42,6 @@ from sktime.utils._testing._conditional_fixtures import ( create_conditional_fixtures_and_names, ) -from sktime.utils._testing.deep_equals import deep_equals from sktime.utils._testing.estimator_checks import ( _assert_array_almost_equal, _assert_array_equal, @@ -51,6 +50,7 @@ _list_required_methods, ) from sktime.utils._testing.scenarios_getter import retrieve_scenarios +from sktime.utils.deep_equals import deep_equals from sktime.utils.random_state import set_random_state from sktime.utils.sampling import random_partition from sktime.utils.validation._dependencies import _check_soft_dependencies @@ -876,6 +876,10 @@ def test_inheritance(self, estimator_class): f"estimator: {estimator_class} has fit method, but" f"is not a sub-class of BaseEstimator." ) + from sktime.pipeline import Pipeline + + if issubclass(estimator_class, Pipeline): + return # Usually estimators inherit only from one BaseEstimator type, but in some cases # they may be predictor and transformer at the same time (e.g. pipelines) @@ -1428,8 +1432,6 @@ def test_save_estimators_to_file( err_msg=msg, ) - # todo: this needs to be diagnosed and fixed - temporary skip - @pytest.mark.skip(reason="hangs on mac and unix remote tests") def test_multiprocessing_idempotent( self, estimator_instance, scenario, method_nsc_arraylike ): @@ -1443,29 +1445,39 @@ def test_multiprocessing_idempotent( method_nsc = method_nsc_arraylike params = estimator_instance.get_params() - if "n_jobs" in params: - # run on a single process - # ----------------------- - estimator = deepcopy(estimator_instance) - estimator.set_params(n_jobs=1) - set_random_state(estimator) - result_single_process = scenario.run( - estimator, method_sequence=["fit", method_nsc] - ) + # test runs only if n_jobs is a parameter of the estimator + if "n_jobs" not in params: + return None - # run on multiple processes - # ------------------------- - estimator = deepcopy(estimator_instance) - estimator.set_params(n_jobs=-1) - set_random_state(estimator) - result_multiple_process = scenario.run( - estimator, method_sequence=["fit", method_nsc] - ) - _assert_array_equal( - result_single_process, - result_multiple_process, - err_msg="Results are not equal for n_jobs=1 and n_jobs=-1", - ) + # skip test for predict_proba + # this produces a BaseDistribution object, for which no ready + # equality check is implemented + if method_nsc == "predict_proba": + return None + + # run on a single process + # ----------------------- + estimator = deepcopy(estimator_instance) + estimator.set_params(n_jobs=1) + set_random_state(estimator) + result_single_process = scenario.run( + estimator, method_sequence=["fit", method_nsc] + ) + + # run on multiple processes + # ------------------------- + estimator = deepcopy(estimator_instance) + estimator.set_params(n_jobs=-1) + set_random_state(estimator) + result_multiple_process = scenario.run( + estimator, method_sequence=["fit", method_nsc] + ) + + _assert_array_equal( + result_single_process, + result_multiple_process, + err_msg="Results are not equal for n_jobs=1 and n_jobs=-1", + ) def test_dl_constructor_initializes_deeply(self, estimator_class): """Test DL estimators that they pass custom parameters to underlying Network.""" diff --git a/sktime/tests/test_class_register.py b/sktime/tests/test_class_register.py new file mode 100644 index 00000000000..f7839bb358f --- /dev/null +++ b/sktime/tests/test_class_register.py @@ -0,0 +1,116 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Registry and dispatcher for test classes. + +Module does not contain tests, only test utilities. +""" + +__author__ = ["fkiraly"] + +from inspect import isclass + + +def get_test_class_registry(): + """Return test class registry. + + Wrapped in a function to avoid circular imports. + + Returns + ------- + testclass_dict : dict + test class registry + keys are scitypes, values are test classes TestAll[Scitype] + """ + from sktime.alignment.tests.test_all_aligners import TestAllAligners + from sktime.classification.early_classification.tests.test_all_early_classifiers import ( # noqa E501 + TestAllEarlyClassifiers, + ) + from sktime.classification.tests.test_all_classifiers import TestAllClassifiers + from sktime.dists_kernels.tests.test_all_dist_kernels import ( + TestAllPairwiseTransformers, + TestAllPanelTransformers, + ) + from sktime.forecasting.tests.test_all_forecasters import TestAllForecasters + from sktime.param_est.tests.test_all_param_est import TestAllParamFitters + from sktime.proba.tests.test_all_distrs import TestAllDistributions + from sktime.regression.tests.test_all_regressors import TestAllRegressors + from sktime.tests.test_all_estimators import TestAllEstimators, TestAllObjects + from sktime.transformations.tests.test_all_transformers import TestAllTransformers + + testclass_dict = dict() + # every object in sktime inherits from BaseObject + # "object" tests are run for all objects + testclass_dict["object"] = TestAllObjects + # fittable objects inherit from BaseEstimator + # "estimator" tests are run for all estimators + # estimators are also objects + testclass_dict["estimator"] = TestAllEstimators + # more specific base classes + # these inherit either from BaseEstimator or BaseObject, + # so also imply estimator and object tests, or only object tests + testclass_dict["aligner"] = TestAllAligners + testclass_dict["classifier"] = TestAllClassifiers + testclass_dict["distribution"] = TestAllDistributions + testclass_dict["early_classifier"] = TestAllEarlyClassifiers + testclass_dict["forecaster"] = TestAllForecasters + testclass_dict["param_est"] = TestAllParamFitters + testclass_dict["regressor"] = TestAllRegressors + testclass_dict["transformer"] = TestAllTransformers + testclass_dict["transformer-pairwise"] = TestAllPairwiseTransformers + testclass_dict["transformer-pairwise-panel"] = TestAllPanelTransformers + + return testclass_dict + + +def get_test_classes_for_obj(obj): + """Get all test classes relevant for an object or estimator. + + Parameters + ---------- + obj : object or estimator, descendant of sktime BaseObject or BaseEstimator + object or estimator for which to get test classes + + Returns + ------- + test_classes : list of test classes + list of test classes relevant for obj + these are references to the actual classes, not strings + if obj was not a descendant of BaseObject or BaseEstimator, returns empty list + """ + from sktime.base import BaseEstimator, BaseObject + from sktime.registry import scitype + + def is_object(obj): + """Return whether obj is an estimator class or estimator object.""" + if isclass(obj): + return issubclass(obj, BaseObject) + else: + return isinstance(obj, BaseObject) + + def is_estimator(obj): + """Return whether obj is an estimator class or estimator object.""" + if isclass(obj): + return issubclass(obj, BaseEstimator) + else: + return isinstance(obj, BaseEstimator) + + if not is_object(obj): + return [] + + testclass_dict = get_test_class_registry() + + # we always need to run "object" tests + test_clss = [testclass_dict["object"]] + + if is_estimator(obj): + test_clss += [testclass_dict["estimator"]] + + try: + obj_scitypes = scitype(obj, force_single_scitype=False, coerce_to_list=True) + except Exception: + obj_scitypes = [] + + for obj_scitype in obj_scitypes: + if obj_scitype in testclass_dict: + test_clss += [testclass_dict[obj_scitype]] + + return test_clss diff --git a/sktime/tests/test_switch.py b/sktime/tests/test_switch.py index 1daf04cd692..742cc457d03 100644 --- a/sktime/tests/test_switch.py +++ b/sktime/tests/test_switch.py @@ -1,5 +1,8 @@ # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) -"""Switch utility for determining whether tests for a class should be run or not.""" +"""Switch utility for determining whether tests for a class should be run or not. + +Module does not contain tests, only test utilities. +""" __author__ = ["fkiraly"] @@ -13,17 +16,36 @@ def run_test_for_class(cls): 1. whether all required soft dependencies are present. If not, does not run the test. - 2. If yes: - * if ONLY_CHANGED_MODULES setting is on, runs the test if and only - if the module containing the class/func has changed according to is_class_changed - * if ONLY_CHANGED_MODULES if off, always runs the test if all soft dependencies - are present. + If yes, behaviour depends on ONLY_CHANGED_MODULES setting: + if off (False), always runs the test (return True); + if on (True), runs test if and only if + at least one of conditions 2, 3, 4 below are met. + + 2. Condition 2: + + If the module containing the class/func has changed according to is_class_changed, + or one of the modules containing any parent classes in sktime, + then condition 2 is met. + + 3. Condition 3: + + If the object is an sktime ``BaseObject``, and one of the test classes + covering the class have changed, then condition 3 is met. + + 4. Condition 4: + + If the object is an sktime ``BaseObject``, and the package requirements + for any of its dependencies have changed in ``pyproject.toml``, + condition 4 is met. cls can also be a list of classes or functions, - in this case the test is run if and only if: + in this case the test is run if and only if both of the following are True: * all required soft dependencies are present - * if yes, if any of the estimators in the list should be tested by criterion 2 above + * if ``ONLY_CHANGED_MODULES`` is True, additionally, + if any of the estimators in the list should be tested by + at least one of criteria 2-4 above. + If ``ONLY_CHANGED_MODULES`` is False, this condition is always True. Parameters ---------- @@ -39,9 +61,11 @@ class for which to determine whether it should be tested cls = [cls] from sktime.tests.test_all_estimators import ONLY_CHANGED_MODULES - from sktime.utils.git_diff import is_class_changed + from sktime.utils.git_diff import get_packages_with_changed_specs, is_class_changed from sktime.utils.validation._dependencies import _check_estimator_deps + PACKAGE_REQ_CHANGED = get_packages_with_changed_specs() + def _required_deps_present(obj): """Check if all required soft dependencies are present, return bool.""" if hasattr(obj, "get_class_tag"): @@ -62,15 +86,53 @@ def _is_class_changed_or_sktime_parents(cls): ] return any(is_class_changed(x) for x in cls_and_sktime_parents) + def _tests_covering_class_changed(cls): + """Check if any of the tests covering cls have changed, return bool.""" + from sktime.tests.test_class_register import get_test_classes_for_obj + + test_classes = get_test_classes_for_obj(cls) + return any(is_class_changed(x) for x in test_classes) + + def _is_impacted_by_pyproject_change(cls): + """Check if the dep specifcations of cls have changed, return bool.""" + from packaging.requirements import Requirement + + if not isclass(cls) or not hasattr(cls, "get_class_tags"): + return False + + cls_reqs = cls.get_class_tag("python_dependencies", []) + if cls_reqs is None: + cls_reqs = [] + if not isinstance(cls_reqs, list): + cls_reqs = [cls_reqs] + package_deps = [Requirement(req).name for req in cls_reqs] + + return any(x in PACKAGE_REQ_CHANGED for x in package_deps) + + # Condition 1: # if any of the required soft dependencies are not present, do not run the test if not all(_required_deps_present(x) for x in cls): return False + # otherwise, continue + + # if ONLY_CHANGED_MODULES is off: always True + # tests are always run if soft dependencies are present + if not ONLY_CHANGED_MODULES: + return True - # if ONLY_CHANGED_MODULES is on, run the test if and only if + # Condition 2: # any of the modules containing any of the classes in the list have changed - if ONLY_CHANGED_MODULES: - return any(_is_class_changed_or_sktime_parents(x) for x in cls) + # or any of the modules containing any parent classes in sktime have changed + cond2 = any(_is_class_changed_or_sktime_parents(x) for x in cls) + + # Condition 3: + # if the object is an sktime BaseObject, and one of the test classes + # covering the class have changed, then run the test + cond3 = any(_tests_covering_class_changed(x) for x in cls) + + # Condition 4: + # the package requirements for any dependency in pyproject.toml have changed + cond4 = any(_is_impacted_by_pyproject_change(x) for x in cls) - # otherwise - # i.e., dependencies are present, and differential testing is disabled - return True + # run the test if and only if at least one of the conditions 2-4 are met + return cond2 or cond3 or cond4 diff --git a/sktime/transformations/base.py b/sktime/transformations/base.py index 430037f06d4..7bc2c0577db 100644 --- a/sktime/transformations/base.py +++ b/sktime/transformations/base.py @@ -56,6 +56,7 @@ class name: BaseTransformer from sktime.base import BaseEstimator from sktime.datatypes import ( VectorizedDF, + check_is_error_msg, check_is_mtype, check_is_scitype, convert, @@ -132,11 +133,14 @@ class BaseTransformer(BaseEstimator): # todo: rename to capability:missing_values "capability:missing_values:removes": False, # is transform result always guaranteed to contain no missing values? - "python_version": None, # PEP 440 python version specifier to limit versions "remember_data": False, # whether all data seen is remembered as self._X + "python_version": None, # PEP 440 python version specifier to limit versions + "authors": "sktime developers", # author(s) of the object + "maintainers": "sktime developers", # current maintainer(s) of the object } # default config values + # see set_config documentation for details _config = { "input_conversion": "on", # controls input checks and conversions, @@ -155,46 +159,31 @@ class BaseTransformer(BaseEstimator): # {None, "dask", "loky", "multiprocessing", "threading"} # None: no parallelization # "loky", "multiprocessing" and "threading": uses `joblib` Parallel loops + # "joblib": uses custom joblib backend, set via `joblib_backend` tag # "dask": uses `dask`, requires `dask` package in environment "backend:parallel:params": None, # params for parallelization backend } _config_doc = { - "backend:parallel": """ - backend:parallel : str, optional, default="None" - backend to use for parallelization when broadcasting/vectorizing, one of - - - "None": executes loop sequentally, simple list comprehension - - "loky", "multiprocessing" and "threading": uses ``joblib`` ``Parallel`` - - "dask": uses ``dask``, requires ``dask`` package in environment - """, - "backend:parallel:params": """ - backend:parallel:params : dict, optional, default={} (no parameters passed) - additional parameters passed to the parallelization backend as config. - Valid keys depend on the value of ``backend:parallel``: - - - "None": no additional parameters, ``backend_params`` is ignored - - "loky", "multiprocessing" and "threading": - any valid keys for ``joblib.Parallel`` can be passed here, - e.g., ``n_jobs``, with the exception of ``backend`` which is directly - controlled by ``backend:parallel`` - - "dask": any valid keys for ``dask.compute`` - can be passed, e.g., ``scheduler`` - """, "input_conversion": """ - input_conversion : str, one of "on", "off", valid mtype string + input_conversion : str, one of "on" (default), "off", or valid mtype string controls input checks and conversions, - for _fit, _transform, _inverse_transform, _update - "on" - input check and conversion is carried out - "off" - input check and conversion not done before passing to inner methods - valid mtype string - input is assumed to specified mtype + for ``_fit``, ``_transform``, ``_inverse_transform``, ``_update`` + + * ``"on"`` - input check and conversion is carried out + * ``"off"`` - input check and conversion are not carried out + before passing data to inner methods + * valid mtype string - input is assumed to specified mtype, + conversion is carried out but no check """, "output_conversion": """ output_conversion : str, one of "on", "off", valid mtype string - controls output conversion for _transform, _inverse_transform - "on" - if input_conversion is "on", output conversion is carried out - "off" - output of _transform, _inverse_transform is directly returned - valid mtype string - output is converted to specified mtype + controls output conversion for ``_transform``, ``_inverse_transform`` + + * ``"on"`` - if input_conversion is "on", output conversion is carried out + * ``"off"`` - output of ``_transform``, ``_inverse_transform`` + is directly returned + * valid mtype string - output is converted to specified mtype """, } @@ -219,6 +208,24 @@ def __init__(self): super().__init__() _check_estimator_deps(self) + def _is_transformer(self, other): + """Check whether other is a transformer - sklearn or sktime. + + Returns True iff at least one of the following is True: + + * ``is_sklearn_transformer(other)`` + * ``scitype(other) == "transformer"`` + + Parameters + ---------- + other : object + object to check + """ + from sktime.registry import scitype + + is_sktime_transformr = scitype(other, raise_on_unknown=False) == "transformer" + return is_sklearn_transformer(other) or is_sktime_transformr + def __mul__(self, other): """Magic * method, return (right) concatenated TransformerPipeline. @@ -226,7 +233,7 @@ def __mul__(self, other): Parameters ---------- - other: `sktime` transformer, must inherit from BaseTransformer + other: ``sktime`` or ``sklearn`` compatible transformer otherwise, `NotImplemented` is returned Returns @@ -239,11 +246,10 @@ def __mul__(self, other): # we wrap self in a pipeline, and concatenate with the other # the TransformerPipeline does the rest, e.g., case distinctions on other if ( - isinstance(other, BaseTransformer) + self._is_transformer(other) or is_sklearn_classifier(other) or is_sklearn_clusterer(other) or is_sklearn_regressor(other) - or is_sklearn_transformer(other) ): self_as_pipeline = TransformerPipeline(steps=[self]) return self_as_pipeline * other @@ -257,7 +263,7 @@ def __rmul__(self, other): Parameters ---------- - other: `sktime` transformer, must inherit from BaseTransformer + other: ``sktime`` or ``sklearn`` compatible transformer otherwise, `NotImplemented` is returned Returns @@ -269,7 +275,7 @@ def __rmul__(self, other): # we wrap self in a pipeline, and concatenate with the other # the TransformerPipeline does the rest, e.g., case distinctions on other - if isinstance(other, BaseTransformer) or is_sklearn_transformer(other): + if self._is_transformer(other): self_as_pipeline = TransformerPipeline(steps=[self]) return other * self_as_pipeline else: @@ -282,7 +288,8 @@ def __or__(self, other): Parameters ---------- - other: `sktime` transformer or sktime MultiplexTransformer + other: ``sktime`` or ``sklearn`` compatible transformer + otherwise, `NotImplemented` is returned Returns ------- @@ -290,7 +297,7 @@ def __or__(self, other): """ from sktime.transformations.compose import MultiplexTransformer - if isinstance(other, BaseTransformer): + if self._is_transformer(other): multiplex_self = MultiplexTransformer([self]) return multiplex_self | other else: @@ -303,7 +310,7 @@ def __add__(self, other): Parameters ---------- - other: `sktime` transformer, must inherit from BaseTransformer + other: ``sktime`` or ``sklearn`` compatible transformer otherwise, `NotImplemented` is returned Returns @@ -315,7 +322,7 @@ def __add__(self, other): # we wrap self in a pipeline, and concatenate with the other # the FeatureUnion does the rest, e.g., case distinctions on other - if isinstance(other, BaseTransformer): + if self._is_transformer(other): self_as_pipeline = FeatureUnion(transformer_list=[self]) return self_as_pipeline + other else: @@ -328,7 +335,7 @@ def __radd__(self, other): Parameters ---------- - other: `sktime` transformer, must inherit from BaseTransformer + other: ``sktime`` or ``sklearn`` compatible transformer otherwise, `NotImplemented` is returned Returns @@ -340,7 +347,7 @@ def __radd__(self, other): # we wrap self in a pipeline, and concatenate with the other # the TransformerPipeline does the rest, e.g., case distinctions on other - if isinstance(other, BaseTransformer): + if self._is_transformer(other): self_as_pipeline = FeatureUnion(transformer_list=[self]) return other + self_as_pipeline else: @@ -591,7 +598,7 @@ def transform(self, X, y=None): output_conv = configs["output_conversion"] # convert to output mtype - if X is None: + if X is None or Xt is None: X_out = Xt elif input_conv and output_conv: X_out = self._convert_output(Xt, metadata=metadata) @@ -952,7 +959,11 @@ def _most_complex_scitype(scitypes, smaller_equal_than=None): elif smaller_equal_than is not None: return _most_complex_scitype(scitypes) else: - raise ValueError("no series scitypes supported, bug in estimator") + raise ValueError( + f"Error in {type(self).__name__}, no series scitypes supported, " + "likely a bug in estimator: scitypes arg passed to " + f"_most_complex_scitype are {scitypes}" + ) def _scitype_A_higher_B(scitypeA, scitypeB): """Compare two scitypes regarding complexity.""" @@ -983,32 +994,27 @@ def _scitype_A_higher_B(scitypeA, scitypeB): var_name="X", ) - msg_invalid_input = ( - f"must be in an sktime compatible format, " - f"of scitype Series, Panel or Hierarchical, " - f"for instance a pandas.DataFrame with sktime compatible time indices, " - f"or with MultiIndex and last(-1) level an sktime compatible time index. " - f"Allowed compatible mtype format specifications are: {ALLOWED_MTYPES} ." - # f"See the transformers tutorial examples/05_transformers.ipynb, or" - f" See the data format tutorial examples/AA_datatypes_and_datasets.ipynb. " - f"If you think the data is already in an sktime supported input format, " - f"run sktime.datatypes.check_raise(data, mtype) to diagnose the error, " - f"where mtype is the string of the type specification you want. " - f"Error message for checked mtypes, in format [mtype: message], as follows:" - ) - if not X_valid: - for mtype, err in msg.items(): - msg_invalid_input += f" [{mtype}: {err}] " - raise TypeError("X " + msg_invalid_input) - X_scitype = X_metadata["scitype"] X_mtype = X_metadata["mtype"] # remember these for potential back-conversion (in transform etc) metadata["_X_mtype_last_seen"] = X_mtype metadata["_X_input_scitype"] = X_scitype - if X_mtype not in ALLOWED_MTYPES: - raise TypeError("X " + msg_invalid_input) + # raise informative error message if X is in wrong format + allowed_msg = ( + f"Allowed scitypes for X in transformations are " + f"Series, Panel or Hierarchical, " + f"for instance a pandas.DataFrame with sktime compatible time indices, " + f"or with MultiIndex and last(-1) level an sktime compatible time index. " + f"Allowed compatible mtype format specifications are: {ALLOWED_MTYPES} ." + ) + msg_start = f"Unsupported input data type in {self.__class__.__name__}, input " + msg_X = msg_start + "X" + if not X_valid or X_mtype not in ALLOWED_MTYPES: + msg = {k: v for k, v in msg.items() if k in ALLOWED_MTYPES} + check_is_error_msg( + msg, var_name=msg_X, allowed_msg=allowed_msg, raise_exception=True + ) if X_scitype in X_inner_scitype: case = "case 1: scitype supported" @@ -1041,14 +1047,21 @@ def _scitype_A_higher_B(scitypeA, scitypeB): y_valid, msg, y_metadata = check_is_scitype( y, scitype=y_possible_scitypes, return_metadata=[], var_name="y" ) - if not y_valid: - for mtype, err in msg.items(): - msg_invalid_input += f" [{mtype}: {err}] " - raise TypeError("y " + msg_invalid_input) - y_scitype = y_metadata["scitype"] y_mtype = y_metadata["mtype"] + # raise informative error message if y is is in wrong format + if not y_valid: + allowed_msg = ( + f"Allowed scitypes for y in transformations depend on X passed. " + f"Passed X scitype was {X_scitype}, " + f"so allowed scitypes for y are {y_possible_scitypes}. " + ) + msg_y = msg_start + "y" + check_is_error_msg( + msg, var_name=msg_y, allowed_msg=allowed_msg, raise_exception=True + ) + else: # y_scitype is used below - set to None if y is None y_scitype = None @@ -1177,10 +1190,20 @@ def _convert_output(self, X, metadata, inverse=False): # skipped for output_scitype = "Primitives" # since then the output always is a pd.DataFrame if case == "case 2: higher scitype supported" and output_scitype == "Series": - Xt = convert_to( - Xt, - to_type=["pd-multiindex", "numpy3D", "df-list", "pd_multiindex_hier"], - ) + if self.get_tags()["scitype:transform-input"] == "Panel": + # Conversion from Series to Panel done for being compatible with + # algorithm. Thus, the returned Series should stay a Series. + pass + else: + Xt = convert_to( + Xt, + to_type=[ + "pd-multiindex", + "numpy3D", + "df-list", + "pd_multiindex_hier", + ], + ) Xt = convert_to_scitype(Xt, to_scitype=X_input_scitype) # now, in all cases, Xt is in the right scitype, @@ -1194,7 +1217,6 @@ def _convert_output(self, X, metadata, inverse=False): if output_scitype == "Series": # output mtype is input mtype X_output_mtype = X_input_mtype - # exception to this: if the transformer outputs multivariate series, # we cannot convert back to pd.Series, do pd.DataFrame instead then # this happens only for Series, not Panel @@ -1207,6 +1229,7 @@ def _convert_output(self, X, metadata, inverse=False): valid, msg, metadata = check_is_mtype( Xt, ["pd.DataFrame", "pd.Series", "np.ndarray"], + msg_return_dict="list", return_metadata=Xt_metadata_required, ) @@ -1219,6 +1242,13 @@ def _convert_output(self, X, metadata, inverse=False): ) if X_input_mtype == "pd.Series" and not metadata["is_univariate"]: X_output_mtype = "pd.DataFrame" + elif self.get_tags()["scitype:transform-input"] == "Panel": + # Input has always to be Panel + X_output_mtype = "pd.DataFrame" + else: + # Input can be Panel or Hierarchical, since it is supported + # by the used mtype + output_scitype = X_input_scitype # Xt_mtype = metadata["mtype"] # else: # Xt_mtype = X_input_mtype @@ -1231,10 +1261,10 @@ def _convert_output(self, X, metadata, inverse=False): # store=_converter_store_X, # store_behaviour="freeze", # ) - Xt = convert_to( + return convert_to( Xt, to_type=X_output_mtype, - as_scitype=X_input_scitype, + as_scitype=output_scitype, store=_converter_store_X, store_behaviour="freeze", ) @@ -1250,7 +1280,7 @@ def _convert_output(self, X, metadata, inverse=False): # else this is only zeros and should be reset to RangeIndex else: Xt = Xt.reset_index(drop=True) - Xt = convert_to( + return convert_to( Xt, to_type="pd_DataFrame_Table", as_scitype="Table", @@ -1305,9 +1335,19 @@ def _vectorize(self, methodname, **kwargs): n_fit = n * m if n_trafos != n_fit: raise RuntimeError( + f"{type(self).__name__} is a transformer that applies per " + "individual time series, and broadcasts across instances. " + f"In fit, {type(self).__name__} makes one fit per instance, " + "and applies that fit to the instance with the same index in " + "transform. Vanilla use therefore requires the same number " + "of instances in fit and transform, but" "found different number of instances in transform than in fit. " f"number of instances seen in fit: {n_fit}; " - f"number of instances seen in transform: {n_trafos}" + f"number of instances seen in transform: {n_trafos}. " + "For fit/transforming per instance, e.g., for pre-processinng " + "in a time series classification, regression or clustering " + "pipeline, wrap this transformer in " + "FitInTransform, from sktime.transformations.compose." ) transformers_ = self.transformers_ @@ -1441,6 +1481,10 @@ def _update(self, X, y=None): return self +# initialize dynamic docstrings +BaseTransformer._init_dynamic_doc() + + class _SeriesToPrimitivesTransformer(BaseTransformer): """Transformer base class for series to primitive(s) transforms.""" diff --git a/sktime/transformations/bootstrap/_mbb.py b/sktime/transformations/bootstrap/_mbb.py index 53bef156d80..574e6b6be1b 100644 --- a/sktime/transformations/bootstrap/_mbb.py +++ b/sktime/transformations/bootstrap/_mbb.py @@ -40,7 +40,7 @@ class STLBootstrapTransformer(BaseTransformer): Parameters ---------- n_series : int, optional - The number of bootstraped time series that will be generated, by default 10. + The number of bootstrapped time series that will be generated, by default 10. sp : int, optional Seasonal periodicity of the data in integer form, by default 12. Must be an integer >= 2 @@ -165,6 +165,12 @@ class STLBootstrapTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": "ltsaprounis", + "python_dependencies": "statsmodels", + # estimator type + # -------------- # todo: what is the scitype of X: Series, or Panel "scitype:transform-input": "Series", # todo: what scitype is returned: Primitives, Series, Panel @@ -183,7 +189,6 @@ class STLBootstrapTransformer(BaseTransformer): "enforce_index_type": None, # index type that needs to be enforced in X/y "fit_is_empty": False, # is fit empty and can be skipped? Yes = True "transform-returns-same-time-index": False, - "python_dependencies": "statsmodels", } def __init__( @@ -422,7 +427,7 @@ class MovingBlockBootstrapTransformer(BaseTransformer): Parameters ---------- n_series : int, optional - The number of bootstraped time series that will be generated, by default 10 + The number of bootstrapped time series that will be generated, by default 10 block_length : int, optional The length of the block in the MBB method, by default None. If not provided, the following heuristic is used, the block length will the diff --git a/sktime/transformations/bootstrap/_splitterboot.py b/sktime/transformations/bootstrap/_splitterboot.py index 4804eca2c1b..7c4b13cbf92 100644 --- a/sktime/transformations/bootstrap/_splitterboot.py +++ b/sktime/transformations/bootstrap/_splitterboot.py @@ -86,11 +86,11 @@ class SplitterBootstrapTransformer(BaseTransformer): """ _tags = { - # todo: what is the scitype of X: Series, or Panel + # what is the scitype of X: Series, or Panel "scitype:transform-input": "Series", - # todo: what scitype is returned: Primitives, Series, Panel + # what scitype is returned: Primitives, Series, Panel "scitype:transform-output": "Panel", - # todo: what is the scitype of y: None (not needed), Primitives, Series, Panel + # what is the scitype of y: None (not needed), Primitives, Series, Panel "scitype:transform-labels": "None", "scitype:instancewise": True, # is this an instance-wise transform? "X_inner_mtype": "pd.DataFrame", # which mtypes do _fit/_predict support for X? diff --git a/sktime/transformations/compose/_column.py b/sktime/transformations/compose/_column.py index e0c1af86bbe..5e09e2a71c2 100644 --- a/sktime/transformations/compose/_column.py +++ b/sktime/transformations/compose/_column.py @@ -120,6 +120,7 @@ class ColumnEnsembleTransformer( """ _tags = { + "authors": ["fkiraly", "mloning"], "X_inner_mtype": PANDAS_MTYPES, "y_inner_mtype": PANDAS_MTYPES, "fit_is_empty": False, diff --git a/sktime/transformations/compose/_featureunion.py b/sktime/transformations/compose/_featureunion.py index db6e80937d9..d2c83880d4c 100644 --- a/sktime/transformations/compose/_featureunion.py +++ b/sktime/transformations/compose/_featureunion.py @@ -8,6 +8,7 @@ from sktime.base._meta import _HeterogenousMetaEstimator from sktime.transformations.base import BaseTransformer +from sktime.transformations.compose._common import _coerce_to_sktime from sktime.utils.multiindex import flatten_multiindex @@ -42,6 +43,7 @@ class FeatureUnion(_HeterogenousMetaEstimator, BaseTransformer): """ _tags = { + "authors": ["fkiraly", "mloning"], "scitype:transform-input": "Series", "scitype:transform-output": "Series", "scitype:transform-labels": "None", @@ -132,6 +134,7 @@ def __add__(self, other): TransformerPipeline object, concatenation of `self` (first) with `other` (last). not nested, contains only non-FeatureUnion `sktime` transformers """ + other = _coerce_to_sktime(other) return self._dunder_concat( other=other, base_class=BaseTransformer, @@ -155,6 +158,7 @@ def __radd__(self, other): TransformerPipeline object, concatenation of `self` (last) with `other` (first). not nested, contains only non-FeatureUnion `sktime` transformers """ + other = _coerce_to_sktime(other) return self._dunder_concat( other=other, base_class=BaseTransformer, diff --git a/sktime/transformations/compose/_fitintransform.py b/sktime/transformations/compose/_fitintransform.py index 190b1638240..d5d2b28bdf6 100644 --- a/sktime/transformations/compose/_fitintransform.py +++ b/sktime/transformations/compose/_fitintransform.py @@ -58,6 +58,8 @@ class FitInTransform(BaseTransformer): >>> y_pred = pipe.predict(fh=fh, X=X_test) """ + _tags = {"authors": ["aiwalter", "fkiraly"]} + def __init__(self, transformer, skip_inverse_transform=True): self.transformer = transformer self.skip_inverse_transform = skip_inverse_transform diff --git a/sktime/transformations/compose/_grouped.py b/sktime/transformations/compose/_grouped.py index 6dcfbf6ebca..ff6664cf711 100644 --- a/sktime/transformations/compose/_grouped.py +++ b/sktime/transformations/compose/_grouped.py @@ -57,6 +57,7 @@ class TransformByLevel(_DelegatedTransformer): """ _tags = { + "authors": ["fkiraly"], "requires-fh-in-fit": False, "handles-missing-data": True, "X_inner_mtype": ALL_TIME_SERIES_MTYPES, diff --git a/sktime/transformations/compose/_id.py b/sktime/transformations/compose/_id.py index b60026461b3..7466ac094a8 100644 --- a/sktime/transformations/compose/_id.py +++ b/sktime/transformations/compose/_id.py @@ -16,6 +16,7 @@ class Id(BaseTransformer): """Identity transformer, returns data unchanged in transform/inverse_transform.""" _tags = { + "authors": "fkiraly", "capability:inverse_transform": True, # can the transformer inverse transform? "univariate-only": False, # can the transformer handle multivariate X? "X_inner_mtype": CORE_MTYPES, # which mtypes do _fit/_predict support for X? diff --git a/sktime/transformations/compose/_invert.py b/sktime/transformations/compose/_invert.py index 686b459ef2e..3282d197210 100644 --- a/sktime/transformations/compose/_invert.py +++ b/sktime/transformations/compose/_invert.py @@ -35,6 +35,7 @@ class InvertTransform(_DelegatedTransformer): """ _tags = { + "authors": ["fkiraly"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/compose/_ixtox.py b/sktime/transformations/compose/_ixtox.py index 26a98c58eeb..3715bdc2b50 100644 --- a/sktime/transformations/compose/_ixtox.py +++ b/sktime/transformations/compose/_ixtox.py @@ -4,7 +4,8 @@ __author__ = ["fkiraly"] __all__ = ["IxToX"] -from pandas.api.types import is_datetime64_any_dtype, is_period_dtype +import pandas as pd +from pandas.api.types import is_datetime64_any_dtype from sktime.transformations.base import BaseTransformer @@ -63,6 +64,7 @@ class IxToX(BaseTransformer): """ _tags = { + "authors": "fkiraly", "transform-returns-same-time-index": True, "skip-inverse-transform": False, "univariate-only": False, @@ -101,7 +103,7 @@ def _transform(self, X, y=None): level = self.level def is_date_like(x): - return is_datetime64_any_dtype(x) or is_period_dtype(x) + return is_datetime64_any_dtype(x) or isinstance(x, pd.PeriodDtype) if ix_source == "y" and y is not None: X = y diff --git a/sktime/transformations/compose/_multiplex.py b/sktime/transformations/compose/_multiplex.py index a23bce396bc..731eb4ca44a 100644 --- a/sktime/transformations/compose/_multiplex.py +++ b/sktime/transformations/compose/_multiplex.py @@ -90,6 +90,7 @@ class MultiplexTransformer(_HeterogenousMetaEstimator, _DelegatedTransformer): # tags will largely be copied from selected_transformer _tags = { + "authors": ["miraep8", "fkiraly"], "fit_is_empty": False, "univariate-only": False, "X_inner_mtype": ALL_TIME_SERIES_MTYPES, diff --git a/sktime/transformations/compose/_optional.py b/sktime/transformations/compose/_optional.py index c54ed2d5921..4df2416c61f 100644 --- a/sktime/transformations/compose/_optional.py +++ b/sktime/transformations/compose/_optional.py @@ -71,6 +71,7 @@ class OptionalPassthrough(_DelegatedTransformer): """ _tags = { + "authors": ["aiwalter", "fkiraly"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/compose/_pipeline.py b/sktime/transformations/compose/_pipeline.py index 2da71e4d1f5..47a0d682667 100644 --- a/sktime/transformations/compose/_pipeline.py +++ b/sktime/transformations/compose/_pipeline.py @@ -118,6 +118,7 @@ class TransformerPipeline(_HeterogenousMetaEstimator, BaseTransformer): """ _tags = { + "authors": "fkiraly", # we let all X inputs through to be handled by first transformer "X_inner_mtype": CORE_MTYPES, "univariate-only": False, @@ -145,7 +146,6 @@ def __init__(self, steps): # abbreviate for readability ests = self.steps_ first_trafo = ests[0][1] - last_trafo = ests[-1][1] # input mtype and input type are as of the first estimator self.clone_tags(first_trafo, ["scitype:transform-input"]) @@ -153,7 +153,7 @@ def __init__(self, steps): # if "Primitives" occur in the middle, then output is set to that too # this is in a case where "Series-to-Series" is applied to primitive df # e.g., in a case of pipelining with scikit-learn transformers - last_out = last_trafo.get_tag("scitype:transform-output") + last_out = self._trafo_out() self._anytagis_then_set( "scitype:transform-output", "Primitives", last_out, ests ) @@ -397,3 +397,63 @@ def get_test_params(cls, parameter_set="default"): params3 = {"steps": [("foo", t1), ("foo", t2), ("foo_1", t3)]} return [params1, params2, params3] + + def _to_dim(self, x): + """Translate scitype:transform-input or output tag to data dimension. + + Parameters + ---------- + x : str, one of "Series", "Panel", "Hierarchical" + scitype:transform-input or output tag + + Returns + ------- + int + data dimension corresponding to x + """ + if x == "Series": + return 1 + elif x == "Panel": + return 2 + else: + return 3 + + def _dim_diff(self, obj): + """Compute difference between input and output dimension.""" + inp = obj.get_tag("scitype:transform-input") + out = obj.get_tag("scitype:transform-output") + return self._to_dim(out) - self._to_dim(inp) + + def _dim_to_sci(self, d): + """Translate data dimension to scitype:transform-output tag. + + Parameters + ---------- + d : int + data dimension + + Returns + ------- + str + scitype:transform-output tag corresponding to data dimension + """ + if d <= 1: + return "Series" + elif d == 2: + return "Panel" + else: + return "Hierarchical" + + def _trafo_out(self): + """Infer scitype:transform-output tag. + + Uses the self.steps_ attribute, assumes it is initialized already. + """ + ests = self.steps_ + est_list = [x[1] for x in ests] + inp_dim = self._to_dim(est_list[0].get_tag("scitype:transform-input")) + out_dim = inp_dim + for est in est_list: + dim_diff = self._dim_diff(est) + out_dim = out_dim + dim_diff + return self._dim_to_sci(out_dim) diff --git a/sktime/transformations/compose/_transformif.py b/sktime/transformations/compose/_transformif.py index 5d77b3f4bc3..f0807ca14c2 100644 --- a/sktime/transformations/compose/_transformif.py +++ b/sktime/transformations/compose/_transformif.py @@ -82,6 +82,7 @@ class TransformIf(_DelegatedTransformer): """ _tags = { + "authors": ["fkiraly"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/compose/_ytox.py b/sktime/transformations/compose/_ytox.py index 002b3ccc3e0..2f50b8c5733 100644 --- a/sktime/transformations/compose/_ytox.py +++ b/sktime/transformations/compose/_ytox.py @@ -22,6 +22,7 @@ class YtoX(BaseTransformer): """ _tags = { + "authors": ["fkiraly"], "transform-returns-same-time-index": True, "skip-inverse-transform": False, "univariate-only": False, diff --git a/sktime/transformations/hierarchical/aggregate.py b/sktime/transformations/hierarchical/aggregate.py index bf4334b38a1..cb3f6ba9b71 100644 --- a/sktime/transformations/hierarchical/aggregate.py +++ b/sktime/transformations/hierarchical/aggregate.py @@ -49,6 +49,12 @@ class Aggregator(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": "ciaran-g", + "maintainers": "ciaran-g", + # estimator type + # -------------- "scitype:transform-input": "Series", "scitype:transform-output": "Series", "scitype:transform-labels": "None", diff --git a/sktime/transformations/hierarchical/reconcile.py b/sktime/transformations/hierarchical/reconcile.py index 50e7b827da8..6a742ca29aa 100644 --- a/sktime/transformations/hierarchical/reconcile.py +++ b/sktime/transformations/hierarchical/reconcile.py @@ -73,6 +73,12 @@ class Reconciler(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["ciaran-g", "eenticott-shell", "k1m190r"], + "maintainers": "ciaran-g", + # estimator type + # -------------- "scitype:transform-input": "Series", "scitype:transform-output": "Series", "scitype:transform-labels": "None", diff --git a/sktime/transformations/merger.py b/sktime/transformations/merger.py new file mode 100644 index 00000000000..ea6b55e381c --- /dev/null +++ b/sktime/transformations/merger.py @@ -0,0 +1,140 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Implements a merger for panel data.""" +import numpy as np + +__author__ = ["benHeid"] + + +from sktime.transformations.base import BaseTransformer + + +class Merger(BaseTransformer): + """Aggregates Panel data containing overlapping windows of one time series. + + The input data contains multiple overlapping time series elements that could + arranged as follows: + xxxx..... + .xxxx.... + ..xxxx... + ...xxxx.. + ....xxxx. + .....xxxx + ......xxxx + .......xxxx + ........xxxx + .........xxxx + The merger aggregates the data by aligning the time series windows as shown above + and applying a aggregation function to the overlapping data points. + The aggregation function can be one of "mean" or "median". I.e., the `mean` or + `median` of each column is calculated, resulting in a univariate time series. + + Parameters + ---------- + method : {`median`, `mean`}, default="median" + The method to use for aggregation. Can be one of "mean" or "median". + stride : int, default=0 + The stride to use for the aggregation. The stride determines the number of + shifts between consecutive instances. A stride of 0 means no shift. A + stride of 1 means that the time series is aggregated as above. + + Examples + -------- + >>> from sktime.transformations.merger import Merger + >>> from sktime.utils._testing.panel import _make_panel + >>> y = _make_panel(n_instances=10, n_columns=3, n_timepoints=5) + >>> result = Merger(method="median").fit_transform(y) + >>> result.shape + (3, 5) + + >>> from sktime.transformations.merger import Merger + >>> from sktime.utils._testing.panel import _make_panel + >>> y = _make_panel(n_instances=10, n_columns=3, n_timepoints=5) + >>> result = Merger(method="median", stride=1).fit_transform(y) + >>> result.shape + (14, 3) + """ + + _tags = { + # packaging info + # -------------- + "authors": "benHeid", + "maintainers": "benHeid", + # estimator type + # --------------- + "scitype:transform-input": "Panel", + "scitype:transform-output": "Series", + "X_inner_mtype": "numpy3D", + "fit_is_empty": True, + } + + def __init__(self, method="median", stride=0): + self.stride = stride + if method not in ["median", "mean"]: + raise ValueError(f"{method} must be 'mean' or 'median'.") + self.method = method + super().__init__() + + def _transform(self, X=None, y=None): + """Merge the Panel data by aligning them temporally. + + Parameters + ---------- + X : pd.DataFrame + The input panel data. + y : pd.Series + ignored + + Returns + ------- + returns a single time series + """ + horizon = X.shape[-1] + + if self.method == "mean": + result = np.nanmean(self._align_temporal(horizon, X), axis=0) + elif self.method == "median": + result = np.nanmedian(self._align_temporal(horizon, X), axis=0) + else: + raise ValueError(f"{self.method} must be 'mean' or 'median'.") + return result + + def _align_temporal(self, horizon, x): + x = x.astype(float) + if self.stride > 0: + x = np.insert( + x, np.arange(1, x.shape[0]).repeat(self.stride - 1), np.nan, axis=0 + ) + elif self.stride == 0: + return x + r = [] + for i in range(horizon): + _res = np.concatenate( + [ + np.full(fill_value=np.nan, shape=(i, x.shape[1])), + x[:, :, i], + np.full(fill_value=np.nan, shape=((horizon - 1 - i), x.shape[1])), + ] + ) + r.append(_res) + return np.stack(r) + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are currently no reserved values for forecasters. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + return [{"method": "mean"}, {"method": "median"}] diff --git a/sktime/transformations/panel/catch22.py b/sktime/transformations/panel/catch22.py index 53ac4e29058..2339a9f6020 100644 --- a/sktime/transformations/panel/catch22.py +++ b/sktime/transformations/panel/catch22.py @@ -159,13 +159,18 @@ class Catch22(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst"], + "python_dependencies": "numba", + # estimator type + # -------------- "scitype:transform-input": "Series", "scitype:transform-output": "Primitives", "scitype:instancewise": True, "X_inner_mtype": "nested_univ", "y_inner_mtype": "None", "fit_is_empty": True, - "python_dependencies": "numba", } def __init__( @@ -323,7 +328,7 @@ def _transform_case(self, X, f_idx): f_count = -1 for i in range(len(X)): - series = np.array(X[i]) + series = np.array(X.iloc[i]) dim = i * len(f_idx) outlier_series = None smin = None @@ -403,9 +408,7 @@ def _transform_case(self, X, f_idx): return c22 def _transform_single_feature(self, X, feature, case_id=None): - if isinstance(feature, (int, np.integer)) or isinstance( - feature, (float, float) - ): + if isinstance(feature, (int, np.integer)) or isinstance(feature, float): if feature > 21 or feature < 0: raise ValueError("Invalid catch22 feature ID") elif isinstance(feature, str): diff --git a/sktime/transformations/panel/catch22wrapper.py b/sktime/transformations/panel/catch22wrapper.py index 2a6cadfa2fd..62a389a68c9 100644 --- a/sktime/transformations/panel/catch22wrapper.py +++ b/sktime/transformations/panel/catch22wrapper.py @@ -71,13 +71,19 @@ class Catch22Wrapper(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["MatthewMiddlehurst"], + "maintainers": "benfulcher", + "python_dependencies": "pycatch22", + # estimator type + # -------------- "scitype:transform-input": "Series", "scitype:transform-output": "Primitives", "scitype:instancewise": True, "X_inner_mtype": "nested_univ", "y_inner_mtype": "None", "fit_is_empty": True, - "python_dependencies": "pycatch22", } def __init__( @@ -192,7 +198,7 @@ def _transform_case(self, X, f_idx, features): f_count = -1 for i in range(len(X)): dim = i * len(f_idx) - series = list(X[i]) + series = list(X.iloc[i]) if self.outlier_norm and (3 in f_idx or 4 in f_idx): outlier_series = np.array(series) diff --git a/sktime/transformations/panel/channel_selection.py b/sktime/transformations/panel/channel_selection.py index 8da0ee2fc3b..906e6fb8fd8 100644 --- a/sktime/transformations/panel/channel_selection.py +++ b/sktime/transformations/panel/channel_selection.py @@ -177,6 +177,8 @@ class pair across each channel. """ _tags = { + "authors": ["haskarb", "a-pasos-ruiz", "TonyBagnall", "fkiraly"], + "maintainers": ["haskarb"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel # "scitype:transform-output": "Primitives", diff --git a/sktime/transformations/panel/compose.py b/sktime/transformations/panel/compose.py index 6b17de3ebe7..65ec33eae8e 100644 --- a/sktime/transformations/panel/compose.py +++ b/sktime/transformations/panel/compose.py @@ -15,6 +15,7 @@ from sktime.transformations.base import BaseTransformer, _PanelToPanelTransformer from sktime.utils.multiindex import flatten_multiindex +from sktime.utils.validation._dependencies import _check_soft_dependencies from sktime.utils.validation.panel import check_X @@ -106,7 +107,10 @@ class ColumnTransformer(_ColumnTransformer, _PanelToPanelTransformer): of the individual transformations and the `sparse_threshold` keyword. """ - _tags = {"python_dependencies": "scipy"} + _tags = { + "authors": ["mloning", "sajaysurya", "fkiraly"], + "python_dependencies": ["scipy", "sklearn<1.4"], + } def __init__( self, @@ -117,6 +121,8 @@ def __init__( transformer_weights=None, preserve_dataframe=True, ): + self.preserve_dataframe = preserve_dataframe + warn( "ColumnTransformer is not fully compliant with the sktime interface " "and will be replaced by sktime.transformations.ColumnEnsembleTransformer " @@ -127,6 +133,18 @@ def __init__( "ColumnTransformer can simply be replaced by ColumnEnsembleTransformer." ) + if not _check_soft_dependencies("sklearn<1.4", severity="none"): + raise ModuleNotFoundError( + "ColumnTransformer is not fully compliant with the sktime interface " + "and distributed only for reasons of downwards compatibility. " + "ColumnTransformer requires scikit-learn<1.4 " + "to be present in the python environment, with version, " + "due to reliance on sklearn.compose.ColumnTransformer, " + "and is not compatible with scikit-learn>=1.4. " + "Please use sktime.transformations.ColumnEnsembleTransformer instead, " + "if you have scikit-learn>=1.4 installed." + ) + super().__init__( transformers=transformers, remainder=remainder, @@ -135,7 +153,7 @@ def __init__( transformer_weights=transformer_weights, ) BaseTransformer.__init__(self) - self.preserve_dataframe = preserve_dataframe + self._is_fitted = False def _hstack(self, Xs): diff --git a/sktime/transformations/panel/compose_distance.py b/sktime/transformations/panel/compose_distance.py index 6e28f38e64e..576f82f4f49 100644 --- a/sktime/transformations/panel/compose_distance.py +++ b/sktime/transformations/panel/compose_distance.py @@ -45,6 +45,7 @@ class DistanceFeatures(BaseTransformer): """ _tags = { + "authors": "fkiraly", "X_inner_mtype": ["pd-multiindex", "pd_multiindex_hier"], "y_inner_mtype": "None", "scitype:transform-input": "Series", diff --git a/sktime/transformations/panel/dictionary_based/__init__.py b/sktime/transformations/panel/dictionary_based/__init__.py index aca0dbda125..8e3b9c56dc6 100644 --- a/sktime/transformations/panel/dictionary_based/__init__.py +++ b/sktime/transformations/panel/dictionary_based/__init__.py @@ -1,7 +1,10 @@ """Transformers.""" -__all__ = ["PAA", "SFA", "SFAFast", "SAX"] -from sktime.transformations.panel.dictionary_based._paa import PAA -from sktime.transformations.panel.dictionary_based._sax import SAX +# TODO 0.28.0 - remove exports of PAA, SAX +__all__ = ["PAA", "SFA", "SFAFast", "SAX", "PAAlegacy", "SAXlegacy"] + +# TODO 0.28.0 - remove exports of PAA, SAX +from sktime.transformations.panel.dictionary_based._paa import PAA, PAAlegacy +from sktime.transformations.panel.dictionary_based._sax import SAX, SAXlegacy from sktime.transformations.panel.dictionary_based._sfa import SFA from sktime.transformations.panel.dictionary_based._sfa_fast import SFAFast diff --git a/sktime/transformations/panel/dictionary_based/_paa.py b/sktime/transformations/panel/dictionary_based/_paa.py index 42059ed92bb..b0ce795d765 100644 --- a/sktime/transformations/panel/dictionary_based/_paa.py +++ b/sktime/transformations/panel/dictionary_based/_paa.py @@ -3,10 +3,12 @@ from sktime.datatypes._panel._convert import from_nested_to_2d_array from sktime.transformations.base import BaseTransformer +from sktime.utils.warnings import warn __author__ = ["MatthewMiddlehurst"] +# TODO 0.27.0: rename the class PAA to PAAlegacy class PAA(BaseTransformer): """Piecewise Aggregate Approximation Transformer (PAA). @@ -29,6 +31,7 @@ class PAA(BaseTransformer): """ _tags = { + "authors": ["MatthewMiddlehurst"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -42,6 +45,23 @@ def __init__(self, num_intervals=8): self.num_intervals = num_intervals super().__init__() + warn( + "panel.dictionary_based.PAA will be renamed to PAAlegacy in sktime 0.27.0, " + "while sktime.transformations.series.PAA2 will be renamed to PAA. " + "PAA2 will become the primary PAA implementation in sktime, " + "while the current PAA will continue to be available as PAAlegacy. " + "Both estimators are also available under their future name at their " + "current location, and will be available under their deprecated name " + "until 0.28.0. " + "To prepare for the name change, do one of the following: " + "1. replace use of PAA from sktime.transformations.panel.dictionary_based " + "by use of PAA2 from sktime.transformations.series.paa, or " + "2. replace use of PAA from sktime.transformations.panel.dictionary_based " + "by use of PAAlegacy from sktime.transformations.panel.dictionary_based. ", + DeprecationWarning, + obj=self, + ) + def set_num_intervals(self, n): """Set self.num_intervals to n.""" self.num_intervals = n @@ -151,3 +171,8 @@ def _check_parameters(self, num_atts): + type(self.num_intervals).__name__ + "' instead." ) + + +# TODO 0.27.0: switch the line to PAA = PAAlegacy +# TODO 0.28.0: remove this alias altogether +PAAlegacy = PAA diff --git a/sktime/transformations/panel/dictionary_based/_sax.py b/sktime/transformations/panel/dictionary_based/_sax.py index 8346100326b..a3c754b064a 100644 --- a/sktime/transformations/panel/dictionary_based/_sax.py +++ b/sktime/transformations/panel/dictionary_based/_sax.py @@ -7,17 +7,13 @@ import scipy.stats from sktime.transformations.base import BaseTransformer -from sktime.transformations.panel.dictionary_based import PAA - -# TO DO: verify this returned pandas is consistent with sktime -# definition. Timestamps? - -# from numba import types -# from numba.experimental import jitclass +from sktime.transformations.panel.dictionary_based import PAAlegacy as PAA +from sktime.utils.warnings import warn __author__ = ["MatthewMiddlehurst"] +# TODO 0.27.0: rename the class SAX to SAXlegacy class SAX(BaseTransformer): """Symbolic Aggregate approXimation (SAX) transformer. @@ -60,6 +56,7 @@ class SAX(BaseTransformer): """ _tags = { + "authors": ["MatthewMiddlehurst"], "univariate-only": True, "fit_is_empty": True, "scitype:transform-input": "Series", @@ -89,6 +86,24 @@ def __init__( self.words = [] super().__init__() + + warn( + "panel.dictionary_based.SAX will be renamed to SAXlegacy in sktime 0.27.0, " + "while sktime.transformations.series.SAX2 will be renamed to SAX. " + "SAX2 will become the primary SAX implementation in sktime, " + "while the current SAX will continue to be available as SAXlegacy. " + "Both estimators are also available under their future name at their " + "current location, and will be available under their deprecated name " + "until 0.28.0. " + "To prepare for the name change, do one of the following: " + "1. replace use of SAX from sktime.transformations.panel.dictionary_based " + "by use of SAX2 from sktime.transformations.series.sax, or " + "2. replace use of SAX from sktime.transformations.panel.dictionary_based " + "by use of SAXlegacy from sktime.transformations.panel.dictionary_based. ", + DeprecationWarning, + obj=self, + ) + self.set_config(**{"output_conversion": "off"}) # todo: looks like this just loops over series instances @@ -219,3 +234,8 @@ def get_test_params(cls, parameter_set="default"): # small word length, window size for testing params = {"word_length": 2, "window_size": 4} return params + + +# TODO 0.27.0: switch the line to SAX = SAXlegacy +# TODO 0.28.0: remove this alias altogether +SAXlegacy = SAX diff --git a/sktime/transformations/panel/dictionary_based/_sfa.py b/sktime/transformations/panel/dictionary_based/_sfa.py index 75dac238571..ffd997d85e4 100644 --- a/sktime/transformations/panel/dictionary_based/_sfa.py +++ b/sktime/transformations/panel/dictionary_based/_sfa.py @@ -101,6 +101,7 @@ class SFA(BaseTransformer): """ _tags = { + "authors": ["MatthewMiddlehurst", "patrickzib"], "univariate-only": True, "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel diff --git a/sktime/transformations/panel/dictionary_based/_sfa_fast.py b/sktime/transformations/panel/dictionary_based/_sfa_fast.py index 08db7f42989..3d5bf772252 100644 --- a/sktime/transformations/panel/dictionary_based/_sfa_fast.py +++ b/sktime/transformations/panel/dictionary_based/_sfa_fast.py @@ -123,6 +123,7 @@ class SFAFast(BaseTransformer): """ _tags = { + "authors": ["patrickzib"], "univariate-only": True, "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel diff --git a/sktime/transformations/panel/dictionary_based/tests/test_paa.py b/sktime/transformations/panel/dictionary_based/tests/test_paa.py index 822216aaac4..aa631d9918c 100644 --- a/sktime/transformations/panel/dictionary_based/tests/test_paa.py +++ b/sktime/transformations/panel/dictionary_based/tests/test_paa.py @@ -4,7 +4,7 @@ import pandas as pd import pytest -from sktime.transformations.panel.dictionary_based._paa import PAA +from sktime.transformations.panel.dictionary_based._paa import PAAlegacy as PAA from sktime.utils._testing.panel import _make_nested_from_array diff --git a/sktime/transformations/panel/dwt.py b/sktime/transformations/panel/dwt.py index be0a70cf7e9..a2e020b815a 100644 --- a/sktime/transformations/panel/dwt.py +++ b/sktime/transformations/panel/dwt.py @@ -7,7 +7,7 @@ from sktime.datatypes import convert from sktime.transformations.base import BaseTransformer -__author__ = ["Vincent Nicholson"] +__author__ = ["vnicholson1"] class DWTTransformer(BaseTransformer): @@ -22,6 +22,7 @@ class DWTTransformer(BaseTransformer): """ _tags = { + "authors": "vnicholson1", "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/panel/hog1d.py b/sktime/transformations/panel/hog1d.py index 7c39cc0be7d..58b1a217e0d 100644 --- a/sktime/transformations/panel/hog1d.py +++ b/sktime/transformations/panel/hog1d.py @@ -41,6 +41,7 @@ class HOG1DTransformer(BaseTransformer): """ _tags = { + "authors": ["vnicholson1"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/panel/interpolate.py b/sktime/transformations/panel/interpolate.py index 05631e7f77a..3805c26c81d 100644 --- a/sktime/transformations/panel/interpolate.py +++ b/sktime/transformations/panel/interpolate.py @@ -3,6 +3,7 @@ import pandas as pd from sktime.transformations.base import BaseTransformer +from sktime.utils.pandas import df_map __author__ = ["mloning"] @@ -24,6 +25,7 @@ class TSInterpolator(BaseTransformer): """ _tags = { + "authors": ["mloning"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -84,7 +86,7 @@ def _transform(self, X, y=None): pandas DataFrame : Transformed pandas DataFrame of shape [n_samples, n_features] follows nested_univ format """ - return X.applymap(self._resize_cell) + return df_map(X)(self._resize_cell) @classmethod def get_test_params(cls): @@ -98,5 +100,6 @@ def get_test_params(cls): `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params`. """ - params = {"length": 10} - return params + params1 = {"length": 10} + params2 = {"length": 5} + return [params1, params2] diff --git a/sktime/transformations/panel/matrix_profile.py b/sktime/transformations/panel/matrix_profile.py index 2b7b623dd28..189b14a4194 100644 --- a/sktime/transformations/panel/matrix_profile.py +++ b/sktime/transformations/panel/matrix_profile.py @@ -206,6 +206,7 @@ class MatrixProfile(BaseTransformer): """ _tags = { + "authors": "Claudia Rincon Sanchez", "univariate-only": True, "fit_is_empty": True, "scitype:transform-input": "Series", diff --git a/sktime/transformations/panel/padder.py b/sktime/transformations/panel/padder.py index b3893cedf23..d32ca69bdf0 100644 --- a/sktime/transformations/panel/padder.py +++ b/sktime/transformations/panel/padder.py @@ -3,6 +3,7 @@ import pandas as pd from sktime.transformations.base import BaseTransformer +from sktime.utils.pandas import df_map __all__ = ["PaddingTransformer"] __author__ = ["abostrom"] @@ -23,6 +24,8 @@ class PaddingTransformer(BaseTransformer): """ _tags = { + "authors": ["abostrom"], + "maintainers": ["abostrom"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -103,7 +106,7 @@ def _transform(self, X, y=None): ) pad = [pd.Series([self._create_pad(series) for series in out]) for out in arr] - Xt = pd.DataFrame(pad).applymap(pd.Series) + Xt = df_map(pd.DataFrame(pad))(pd.Series) return Xt diff --git a/sktime/transformations/panel/pca.py b/sktime/transformations/panel/pca.py index 8c4dbd6e11f..3303f424630 100644 --- a/sktime/transformations/panel/pca.py +++ b/sktime/transformations/panel/pca.py @@ -74,6 +74,8 @@ class PCATransformer(BaseTransformer): """ _tags = { + "authors": ["prockenschaub", "fkiraly"], + "maintainers": ["prockenschaub"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/panel/random_intervals.py b/sktime/transformations/panel/random_intervals.py index 1e64a64fb55..2c25b333fb8 100644 --- a/sktime/transformations/panel/random_intervals.py +++ b/sktime/transformations/panel/random_intervals.py @@ -46,6 +46,7 @@ class RandomIntervals(BaseTransformer): """ _tags = { + "authors": ["MatthewMiddlehurst"], "scitype:transform-input": "Series", "scitype:transform-output": "Primitives", "scitype:instancewise": True, diff --git a/sktime/transformations/panel/reduce.py b/sktime/transformations/panel/reduce.py index b60ea0d0006..42af4eaf27f 100644 --- a/sktime/transformations/panel/reduce.py +++ b/sktime/transformations/panel/reduce.py @@ -11,6 +11,7 @@ from sktime.datatypes import convert, convert_to from sktime.transformations.base import BaseTransformer +from sktime.utils.pandas import df_map class Tabularizer(BaseTransformer): @@ -24,6 +25,7 @@ class Tabularizer(BaseTransformer): """ _tags = { + "authors": ["mloning", "fkiraly", "kcc-lion"], "fit_is_empty": True, "univariate-only": False, "scitype:transform-input": "Series", @@ -95,6 +97,8 @@ class TimeBinner(BaseTransformer): """ _tags = { + "authors": ["kcc-lion", "fkiraly"], + "maintainers": ["kcc-lion"], "fit_is_empty": True, "univariate-only": False, "scitype:transform-input": "Series", @@ -150,7 +154,7 @@ def _transform(self, X, y=None): transformed version of X """ idx = pd.cut(X.iloc[0, 0].index, bins=self.idx, include_lowest=True) - Xt = X.applymap(lambda x: x.groupby(idx).apply(self._aggfunc)) + Xt = df_map(X)(lambda x: x.groupby(idx).apply(self._aggfunc)) Xt = convert_to(Xt, to_type="numpyflat", as_scitype="Panel") return Xt diff --git a/sktime/transformations/panel/rocket/_minirocket.py b/sktime/transformations/panel/rocket/_minirocket.py index 1254403742c..e482d664030 100644 --- a/sktime/transformations/panel/rocket/_minirocket.py +++ b/sktime/transformations/panel/rocket/_minirocket.py @@ -20,6 +20,14 @@ class MiniRocket(BaseTransformer): MiniRocket is for unviariate time series only. Use class MiniRocketMultivariate for multivariate time series. + This transformer fits one set of paramereters per individual series, + and applies them to series of the same number in the test set. + + To fit and transform at the same time, + without an identification of fit/transform instances, + wrap this transformer in ``FitInTransform``, + from ``sktime.transformations.compose``. + Parameters ---------- num_kernels : int, default=10,000 @@ -57,6 +65,13 @@ class MiniRocket(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["angus924"], + "maintainers": ["angus924"], + "python_dependencies": "numba", + # estimator tags + # -------------- "univariate-only": True, "fit_is_empty": False, "scitype:transform-input": "Series", @@ -66,7 +81,6 @@ class MiniRocket(BaseTransformer): "scitype:instancewise": False, # is this an instance-wise transform? "X_inner_mtype": "numpy3D", # which mtypes do _fit/_predict support for X? "y_inner_mtype": "None", # which mtypes do _fit/_predict support for X? - "python_dependencies": "numba", } def __init__( diff --git a/sktime/transformations/panel/rocket/_minirocket_multivariate.py b/sktime/transformations/panel/rocket/_minirocket_multivariate.py index 74569fd3789..a60b579e706 100644 --- a/sktime/transformations/panel/rocket/_minirocket_multivariate.py +++ b/sktime/transformations/panel/rocket/_minirocket_multivariate.py @@ -19,6 +19,15 @@ class MiniRocketMultivariate(BaseTransformer): convolutions with six of one weight, three of the second weight to seed dilations. MiniRocketMultivariate works with univariate and multivariate time series. + This transformer fits one set of paramereters per individual series, + and applies the transform with fitted parameter i to the i-th series in transform. + Vanilla use requires same number of series in fit and transform. + + To fit and transform series at the same time, + without an identification of fit/transform instances, + wrap this transformer in ``FitInTransform``, + from ``sktime.transformations.compose``. + Parameters ---------- num_kernels : int, default=10,000 @@ -56,6 +65,8 @@ class MiniRocketMultivariate(BaseTransformer): """ _tags = { + "authors": ["angus924"], + "maintainers": ["angus924"], "univariate-only": False, "fit_is_empty": False, "scitype:transform-input": "Series", diff --git a/sktime/transformations/panel/rocket/_minirocket_multivariate_variable.py b/sktime/transformations/panel/rocket/_minirocket_multivariate_variable.py index ada0429d24a..059abe2586d 100644 --- a/sktime/transformations/panel/rocket/_minirocket_multivariate_variable.py +++ b/sktime/transformations/panel/rocket/_minirocket_multivariate_variable.py @@ -25,6 +25,15 @@ class MiniRocketMultivariateVariable(BaseTransformer): performance, use the sktime class MiniRocket for univariate input, and MiniRocketMultivariate to equal length multivariate input. + This transformer fits one set of paramereters per individual series, + and applies the transform with fitted parameter i to the i-th series in transform. + Vanilla use requires same number of series in fit and transform. + + To fit and transform series at the same time, + without an identification of fit/transform instances, + wrap this transformer in ``FitInTransform``, + from ``sktime.transformations.compose``. + Parameters ---------- num_kernels : int, default=10,000 @@ -80,6 +89,8 @@ class MiniRocketMultivariateVariable(BaseTransformer): """ _tags = { + "authors": ["angus924", "michaelfeil"], + "maintainers": ["angus924", "michaelfeil"], "univariate-only": False, "fit_is_empty": False, "scitype:transform-input": "Series", diff --git a/sktime/transformations/panel/rocket/_multirocket.py b/sktime/transformations/panel/rocket/_multirocket.py index dbb904c44ee..03b068d6a61 100644 --- a/sktime/transformations/panel/rocket/_multirocket.py +++ b/sktime/transformations/panel/rocket/_multirocket.py @@ -8,6 +8,8 @@ from sktime.datatypes import convert from sktime.transformations.base import BaseTransformer +__author__ = ["ChangWeiTan", "fstinner", "angus924"] + class MultiRocket(BaseTransformer): """Multi RandOm Convolutional KErnel Transform (MultiRocket). @@ -20,6 +22,15 @@ class MultiRocket(BaseTransformer): Positive Values (LSPV). This version is for univariate time series only. Use class MultiRocketMultivariate for multivariate input. + This transformer fits one set of paramereters per individual series, + and applies the transform with fitted parameter i to the i-th series in transform. + Vanilla use requires same number of series in fit and transform. + + To fit and transform series at the same time, + without an identification of fit/transform instances, + wrap this transformer in ``FitInTransform``, + from ``sktime.transformations.compose``. + Parameters ---------- num_kernels : int, default = 6,250 @@ -45,7 +56,6 @@ class MultiRocket(BaseTransformer): parameter (dilations, num_features_per_dilation, biases) for transformation of input X1 = np.diff(X, 1) - See Also -------- MultiRocketMultivariate, MiniRocket, MiniRocketMultivariate, Rocket @@ -72,6 +82,13 @@ class MultiRocket(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["ChangWeiTan", "fstinner", "angus924"], + "maintainers": ["ChangWeiTan", "fstinner", "angus924"], + "python_dependencies": "numba", + # estimator type + # -------------- "univariate-only": True, "fit_is_empty": False, "scitype:transform-input": "Series", @@ -81,7 +98,6 @@ class MultiRocket(BaseTransformer): "scitype:instancewise": False, # is this an instance-wise transform? "X_inner_mtype": "numpy3D", # which mtypes do _fit/_predict support for X? "y_inner_mtype": "None", # which mtypes do _fit/_predict support for X? - "python_dependencies": "numba", } def __init__( diff --git a/sktime/transformations/panel/rocket/_multirocket_multivariate.py b/sktime/transformations/panel/rocket/_multirocket_multivariate.py index 7ebe0f7a676..07f6f0516a5 100644 --- a/sktime/transformations/panel/rocket/_multirocket_multivariate.py +++ b/sktime/transformations/panel/rocket/_multirocket_multivariate.py @@ -5,6 +5,8 @@ from sktime.transformations.base import BaseTransformer +__author__ = ["ChangWeiTan", "fstinner", "angus924"] + class MultiRocketMultivariate(BaseTransformer): """Multi RandOm Convolutional KErnel Transform (MultiRocket). @@ -16,6 +18,15 @@ class MultiRocketMultivariate(BaseTransformer): Values (MPV); Mean of Indices of Positive Values (MIPV); and Longest Stretch of Positive Values (LSPV). This version is the multivariate version. + This transformer fits one set of paramereters per individual series, + and applies the transform with fitted parameter i to the i-th series in transform. + Vanilla use requires same number of series in fit and transform. + + To fit and transform series at the same time, + without an identification of fit/transform instances, + wrap this transformer in ``FitInTransform``, + from ``sktime.transformations.compose``. + Parameters ---------- num_kernels : int, default=6,250 @@ -41,7 +52,6 @@ class MultiRocketMultivariate(BaseTransformer): parameter (dilations, num_features_per_dilation, biases) for transformation of input X1 = np.diff(X, 1) - See Also -------- MultiRocketMultivariate, MiniRocket, MiniRocketMultivariate, Rocket @@ -68,6 +78,13 @@ class MultiRocketMultivariate(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["ChangWeiTan", "fstinner", "angus924"], + "maintainers": ["ChangWeiTan", "fstinner", "angus924"], + "python_dependencies": "numba", + # estimator type + # -------------- "univariate-only": False, "fit_is_empty": False, "scitype:transform-input": "Series", @@ -77,7 +94,6 @@ class MultiRocketMultivariate(BaseTransformer): "scitype:instancewise": False, # is this an instance-wise transform? "X_inner_mtype": "numpy3D", # which mtypes do _fit/_predict support for X? "y_inner_mtype": "None", # which mtypes do _fit/_predict support for X? - "python_dependencies": "numba", } def __init__( @@ -190,6 +206,9 @@ def _get_parameter(self, X): _quantiles, ) + if self.random_state is not None: + np.random.seed(self.random_state) + _, num_channels, input_length = X.shape num_kernels = 84 diff --git a/sktime/transformations/panel/rocket/_rocket.py b/sktime/transformations/panel/rocket/_rocket.py index 8218b26e458..e478162de26 100755 --- a/sktime/transformations/panel/rocket/_rocket.py +++ b/sktime/transformations/panel/rocket/_rocket.py @@ -18,6 +18,14 @@ class Rocket(BaseTransformer): dilation. It transforms the time series with two features per kernel. The first feature is global max pooling and the second is proportion of positive values. + This transformer fits one set of paramereters per individual series, + and applies the transform with fitted parameter i to the i-th series in transform. + Vanilla use requires same number of series in fit and transform. + + To fit and transform series at the same time, + without an identification of fit/transform instances, + wrap this transformer in ``FitInTransform``, + from ``sktime.transformations.compose``. Parameters ---------- @@ -57,6 +65,13 @@ class Rocket(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["angus924"], + "maintainers": ["angus924"], + "python_dependencies": "numba", + # estimator type + # -------------- "univariate-only": False, "fit_is_empty": False, "scitype:transform-input": "Series", @@ -66,7 +81,6 @@ class Rocket(BaseTransformer): "scitype:instancewise": False, # is this an instance-wise transform? "X_inner_mtype": "numpy3D", # which mtypes do _fit/_predict support for X? "y_inner_mtype": "None", # which mtypes do _fit/_predict support for X? - "python_dependencies": "numba", } def __init__(self, num_kernels=10_000, normalise=True, n_jobs=1, random_state=None): diff --git a/sktime/transformations/panel/segment.py b/sktime/transformations/panel/segment.py index ce180363b39..00b71651f32 100644 --- a/sktime/transformations/panel/segment.py +++ b/sktime/transformations/panel/segment.py @@ -35,6 +35,7 @@ class IntervalSegmenter(BaseTransformer): """ _tags = { + "authors": "mloning", "univariate-only": True, "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel @@ -188,6 +189,7 @@ class RandomIntervalSegmenter(_DelegatedTransformer): """ _tags = { + "authors": "mloning", "X_inner_mtype": ["pd.DataFrame", "pd-multiindex"], # which mtype do _fit/_predict support for X? "y_inner_mtype": "pd_Series_Table", diff --git a/sktime/transformations/panel/shapelet_transform.py b/sktime/transformations/panel/shapelet_transform.py index 91f169cc399..f3e91fd5bf2 100644 --- a/sktime/transformations/panel/shapelet_transform.py +++ b/sktime/transformations/panel/shapelet_transform.py @@ -123,6 +123,8 @@ class ShapeletTransform(BaseTransformer): """ _tags = { + "authors": ["MatthewMiddlehurst", "jasonlines", "dguijo"], + "maintainers": ["dguijo"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Primitives", @@ -1066,6 +1068,8 @@ class RandomShapeletTransform(BaseTransformer): """ _tags = { + "authors": ["MatthewMiddlehurst", "jasonlines", "dguijo"], + "maintainers": ["dguijo"], "fit_is_empty": False, "univariate-only": False, "scitype:transform-input": "Series", diff --git a/sktime/transformations/panel/signature_based/_augmentations.py b/sktime/transformations/panel/signature_based/_augmentations.py index 0a3d2305ea4..8eb88358ef1 100644 --- a/sktime/transformations/panel/signature_based/_augmentations.py +++ b/sktime/transformations/panel/signature_based/_augmentations.py @@ -5,7 +5,7 @@ def _make_augmentation_pipeline(augmentation_list): - """Buids an sklearn pipeline of augmentations from a tuple of strings. + """Build an sklearn pipeline of augmentations from a tuple of strings. Parameters ---------- @@ -43,11 +43,11 @@ def _make_augmentation_pipeline(augmentation_list): if augmentation_list is not None: if isinstance(augmentation_list, str): augmentation_list = (augmentation_list,) - assert all( - [x in list(AUGMENTATIONS.keys()) for x in augmentation_list] - ), "augmentation_list must only contain string elements from {}. Given: {}.".format( - list(AUGMENTATIONS.keys()), augmentation_list - ) + if not [x in list(AUGMENTATIONS.keys()) for x in augmentation_list]: + raise ValueError( + "augmentation_list must only contain string elements from " + f" {list(AUGMENTATIONS.keys())}. Found: {augmentation_list}" + ) # Setup pipeline if augmentation_list is not None: @@ -68,6 +68,12 @@ class _AddTime(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": "jambo6", + "maintainers": "jambo6", + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/panel/signature_based/_compute.py b/sktime/transformations/panel/signature_based/_compute.py index c75269b8e3d..9d2ee3fa2ff 100644 --- a/sktime/transformations/panel/signature_based/_compute.py +++ b/sktime/transformations/panel/signature_based/_compute.py @@ -45,6 +45,7 @@ def __init__( sig_tfm=None, sig_depth=None, rescaling=None, + backend=None, ): super().__init__() self.window_name = window_name @@ -54,6 +55,7 @@ def __init__( self.sig_tfm = sig_tfm self.sig_depth = sig_depth self.rescaling = rescaling + self.backend = backend self.window = _window_getter( self.window_name, self.window_depth, self.window_length, self.window_step @@ -62,6 +64,9 @@ def __init__( def _transform(self, X, y=None): import esig + if self.backend == "iisignature": + esig.set_backend("iisignature") + depth = self.sig_depth data = np.swapaxes(X, 1, 2) diff --git a/sktime/transformations/panel/signature_based/_signature_method.py b/sktime/transformations/panel/signature_based/_signature_method.py index 0ef27595f2c..b01e36bff00 100644 --- a/sktime/transformations/panel/signature_based/_signature_method.py +++ b/sktime/transformations/panel/signature_based/_signature_method.py @@ -9,6 +9,8 @@ from sktime.transformations.panel.signature_based._compute import ( _WindowSignatureTransform, ) +from sktime.utils.validation._dependencies import _check_soft_dependencies +from sktime.utils.warnings import warn class SignatureTransformer(BaseTransformer): @@ -19,19 +21,34 @@ class SignatureTransformer(BaseTransformer): Parameters ---------- - augmentation_list: tuple of strings, contains the augmentations to be - applied before application of the signature transform. - window_name: str, The name of the window transform to apply. - window_depth: int, The depth of the dyadic window. (Active only if - `window_name == 'dyadic'`). - window_length: int, The length of the sliding/expanding window. (Active - only if `window_name in ['sliding, 'expanding']`. - window_step: int, The step of the sliding/expanding window. (Active - only if `window_name in ['sliding, 'expanding']`. - rescaling: str or None, The method of signature rescaling. - sig_tfm: str, String to specify the type of signature transform. One of: - ['signature', 'logsignature']). - depth: int, Signature truncation depth. + augmentation_list: list or tuple of strings, possible strings are + ['leadlag', 'ir', 'addtime', 'cumsum', 'basepoint'] + Augmentations to apply to the data before computing the signature. + The order of the augmentations is the order in which they are applied. + default: ('basepoint', 'addtime') + window_name: str, one of ``['global', 'sliding', 'expanding', 'dyadic']`` + default: 'dyadic' + Type of the window to use for the signature transform. + window_depth: int, default=3 + The depth of the dyadic window. + Ignored unless ``window_name`` is ``'dyadic'``. + window_length: None (default) or int + The length of the sliding/expanding window. (Active + Ignored unless ``window_name`` is one of ``['sliding, 'expanding']``. + window_step: None (default) or int + The step of the sliding/expanding window. + Ignored unless ``window_name`` is one of ``['sliding, 'expanding']``. + rescaling: None (default) or str, "pre" or "post", + None: No rescaling is applied. + "pre": rescale the path last signature term should be roughly O(1) + "post": Rescales the output signature by multiplying the depth-d term by d!. + Aim is that every term becomes ~O(1). + sig_tfm: str, one of ``['signature', 'logsignature']``. default: ``'signature'`` + The type of signature transform to use, plain or logarithmic. + depth: int, default=4 + Signature truncation depth. + backend: str, one of: ``'esig'`` (default), or ``'iisignature'``. + The backend to use for signature computation. Attributes ---------- @@ -40,6 +57,14 @@ class SignatureTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": "jambo6", + "maintainers": "jambo6", + "python_dependencies": "esig", + "python_version": "<3.10", + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Primitives", @@ -48,8 +73,6 @@ class SignatureTransformer(BaseTransformer): "X_inner_mtype": "numpy3D", # which mtypes do _fit/_predict support for X? "y_inner_mtype": "None", # which mtypes do _fit/_predict support for X?# "fit_is_empty": False, - "python_dependencies": "esig", - "python_version": "<3.10", } def __init__( @@ -62,6 +85,7 @@ def __init__( rescaling=None, sig_tfm="signature", depth=4, + backend="esig", ): self.augmentation_list = augmentation_list self.window_name = window_name @@ -71,8 +95,27 @@ def __init__( self.rescaling = rescaling self.sig_tfm = sig_tfm self.depth = depth + self.backend = backend super().__init__() + + if backend == "esig": + _check_soft_dependencies("esig") + elif backend == "iisignature": + _check_soft_dependencies("iisignature") + warn( + "iisignature backend of SignatureTransformer is experimental " + "and not systematically tested, due to lack of stable installation " + "process for iisignature via pip. Kindly exercise caution, " + "and report any issues on the sktime issue tracker.", + stacklevel=2, + ) + else: + raise ValueError( + "Error in SignatureTransformer, backend " + "must be one of 'esig' or 'iisignature'" + ) + self.setup_feature_pipeline() def setup_feature_pipeline(self): @@ -86,6 +129,7 @@ def setup_feature_pipeline(self): sig_tfm=self.sig_tfm, sig_depth=self.depth, rescaling=self.rescaling, + backend=self.backend, ) # The so-called 'signature method' as defined in the reference paper diff --git a/sktime/transformations/panel/slope.py b/sktime/transformations/panel/slope.py index 1a45d23fea8..756fa925e5e 100644 --- a/sktime/transformations/panel/slope.py +++ b/sktime/transformations/panel/slope.py @@ -27,6 +27,7 @@ class SlopeTransformer(BaseTransformer): """ _tags = { + "authors": ["mloning"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/panel/summarize/_extract.py b/sktime/transformations/panel/summarize/_extract.py index 801f07311fc..a7222d1dd34 100644 --- a/sktime/transformations/panel/summarize/_extract.py +++ b/sktime/transformations/panel/summarize/_extract.py @@ -10,6 +10,7 @@ from sktime.datatypes import convert_to from sktime.transformations.base import BaseTransformer from sktime.transformations.panel.segment import RandomIntervalSegmenter +from sktime.utils.pandas import df_map class PlateauFinder(BaseTransformer): @@ -30,6 +31,7 @@ class PlateauFinder(BaseTransformer): """ _tags = { + "authors": ["mloning"], "fit_is_empty": True, "univariate-only": True, "scitype:transform-input": "Series", @@ -103,7 +105,7 @@ def _transform(self, X, y=None): Xt["%s_starts" % column_prefix] = pd.Series(self._starts) Xt["%s_lengths" % column_prefix] = pd.Series(self._lengths) - Xt = Xt.applymap(lambda x: pd.Series(x)) + Xt = df_map(Xt)(lambda x: pd.Series(x)) return Xt @@ -140,7 +142,10 @@ def row_wise_get_der(X): def get_der(x): der = [] for i in range(1, len(x) - 1): - der.append(((x[i] - x[i - 1]) + ((x[i + 1] - x[i - 1]) / 2)) / 2) + xi = x.iloc[i] + xim1 = x.iloc[i - 1] + xip1 = x.iloc[i + 1] + der.append(0.5 * ((xi - xim1) + 0.5 * (xip1 - xim1))) return pd.Series([der[0]] + der + [der[-1]]) return [get_der(x) for x in X] diff --git a/sktime/transformations/panel/supervised_intervals.py b/sktime/transformations/panel/supervised_intervals.py index 86dcf78741a..1b0b5c067c6 100644 --- a/sktime/transformations/panel/supervised_intervals.py +++ b/sktime/transformations/panel/supervised_intervals.py @@ -92,6 +92,7 @@ class SupervisedIntervals(BaseTransformer): """ _tags = { + "authors": ["MatthewMiddlehurst"], "scitype:transform-input": "Series", "scitype:transform-output": "Primitives", "scitype:instancewise": False, diff --git a/sktime/transformations/panel/tests/test_compose.py b/sktime/transformations/panel/tests/test_compose.py index a95031ba534..28766ab7dd6 100644 --- a/sktime/transformations/panel/tests/test_compose.py +++ b/sktime/transformations/panel/tests/test_compose.py @@ -1,5 +1,6 @@ """Tests for panel compositors.""" import numpy as np +import pytest from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import Pipeline from sklearn.preprocessing import FunctionTransformer @@ -7,8 +8,13 @@ from sktime.datasets import load_basic_motions from sktime.transformations.panel.compose import ColumnTransformer from sktime.transformations.panel.reduce import Tabularizer +from sktime.utils.validation._dependencies import _check_soft_dependencies +@pytest.mark.skipif( + not _check_soft_dependencies("sklearn<1.4", severity="none"), + reason="ColumnTransformer requires sklearn<1.4 due to reliance on private methods", +) def test_ColumnTransformer_pipeline(): """Test pipeline with ColumnTransformer.""" X_train, y_train = load_basic_motions(split="train", return_X_y=True) diff --git a/sktime/transformations/panel/tests/test_interpolate.py b/sktime/transformations/panel/tests/test_interpolate.py index 25d9c3fe5ee..bbc4d4b4669 100644 --- a/sktime/transformations/panel/tests/test_interpolate.py +++ b/sktime/transformations/panel/tests/test_interpolate.py @@ -28,14 +28,14 @@ def test_resizing(): # 1) Check that lengths of all time series (all via the axis=1 - for # all dims in first row) are equal. - ts_lens_before = [len(X.iloc[0][i]) for i in range(len(X.iloc[0]))] + ts_lens_before = [len(X.iloc[0].iloc[i]) for i in range(len(X.iloc[0]))] # all lengths are equal to first length in array assert all([length == ts_lens_before[0] for length in ts_lens_before]) # 2) cutting each time series in each cell of X to make lengths different X = cut_X_ts(X) # get lengths to ensure that they are really different - ts_lens_after_cut = [len(X.iloc[0][i]) for i in range(len(X.iloc[0]))] + ts_lens_after_cut = [len(X.iloc[0].iloc[i]) for i in range(len(X.iloc[0]))] assert not all( [length == ts_lens_after_cut[0] for length in ts_lens_after_cut] ) # are different @@ -46,5 +46,5 @@ def test_resizing(): # 4) check that result time series have lengths equal to `target_len # that we set above - ts_lens_after_resize = [len(Xt.iloc[0][i]) for i in range(len(Xt.iloc[0]))] + ts_lens_after_resize = [len(Xt.iloc[0].iloc[i]) for i in range(len(Xt.iloc[0]))] assert all([length == target_len for length in ts_lens_after_resize]) diff --git a/sktime/transformations/panel/truncation.py b/sktime/transformations/panel/truncation.py index de4f6a2d8bb..ec46191e7b4 100644 --- a/sktime/transformations/panel/truncation.py +++ b/sktime/transformations/panel/truncation.py @@ -25,6 +25,8 @@ class TruncationTransformer(BaseTransformer): """ _tags = { + "authors": ["abostrom"], + "maintainers": ["abostrom"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/panel/tsfresh.py b/sktime/transformations/panel/tsfresh.py index 48ce7661c0c..561205d2e7f 100644 --- a/sktime/transformations/panel/tsfresh.py +++ b/sktime/transformations/panel/tsfresh.py @@ -12,6 +12,8 @@ class _TSFreshFeatureExtractor(BaseTransformer): """Base adapter class for tsfresh transformations.""" _tags = { + "authors": ["AyushmaanSeth", "mloning", "alwinw", "MatthewMiddlehurst"], + "maintainers": ["AyushmaanSeth"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Primitives", @@ -98,6 +100,12 @@ def _get_extraction_params(self): if value is not None: extraction_params[name] = value + # Fixes key mismatch between tsfresh and sktime + # tsfresh uses "profile" while sktime uses "profiling" + # This fix keeps compatibility + if name == "profile": + extraction_params[name] = self.profiling + self.n_jobs = n_jobs # Convert convenience string arguments to tsfresh parameters classes @@ -708,4 +716,9 @@ def get_test_params(cls, parameter_set="default"): "show_warnings": False, "fdr_level": 0.01, } - return params + params2 = { + "default_fc_parameters": "minimal", + "disable_progressbar": True, + "show_warnings": False, + } + return [params, params2] diff --git a/sktime/transformations/series/acf.py b/sktime/transformations/series/acf.py index 1f91e2db6f1..2923be30d6e 100644 --- a/sktime/transformations/series/acf.py +++ b/sktime/transformations/series/acf.py @@ -67,6 +67,13 @@ class AutoCorrelationTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": "afzal442", + "maintainers": "afzal442", + "python_dependencies": "statsmodels", + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -76,7 +83,6 @@ class AutoCorrelationTransformer(BaseTransformer): "y_inner_mtype": "None", # which mtypes do _fit/_predict support for y? "univariate-only": True, "fit_is_empty": True, - "python_dependencies": "statsmodels", } def __init__( diff --git a/sktime/transformations/series/adapt.py b/sktime/transformations/series/adapt.py index a654c59165f..1c7f3c15a3f 100644 --- a/sktime/transformations/series/adapt.py +++ b/sktime/transformations/series/adapt.py @@ -101,6 +101,11 @@ class TabularToSeriesAdaptor(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["mloning", "fkiraly"], + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/augmenter.py b/sktime/transformations/series/augmenter.py index 18ae7aadc32..7e53683ad44 100755 --- a/sktime/transformations/series/augmenter.py +++ b/sktime/transformations/series/augmenter.py @@ -19,6 +19,12 @@ class _AugmenterTags: _tags = { + # packaging info + # ---------------- + "authors": ["MrPr3ntice", "MFehsenfeld", "iljamaurer"], + "maintainers": ["MrPr3ntice", "MFehsenfeld", "iljamaurer"], + # estimator type + # -------------- "scitype:transform-input": "Series", "scitype:transform-output": "Series", "scitype:transform-labels": "None", diff --git a/sktime/transformations/series/binning.py b/sktime/transformations/series/binning.py index 34129bd66e3..1db22b992d9 100644 --- a/sktime/transformations/series/binning.py +++ b/sktime/transformations/series/binning.py @@ -54,6 +54,7 @@ class TimeBinAggregate(BaseTransformer): """ _tags = { + "authors": "fkiraly", "fit_is_empty": True, "univariate-only": False, "scitype:transform-input": "Series", diff --git a/sktime/transformations/series/bkfilter.py b/sktime/transformations/series/bkfilter.py index 353d1148ee4..f4c9372aab3 100644 --- a/sktime/transformations/series/bkfilter.py +++ b/sktime/transformations/series/bkfilter.py @@ -63,6 +63,13 @@ class BKFilter(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["klam-data", "pyyim", "mgorlin"], + "maintainers": "klam-data", + "python_dependencies": "statsmodels", + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -83,7 +90,6 @@ class BKFilter(BaseTransformer): # can the transformer handle unequal length time series (if passed Panel)? "handles-missing-data": False, # can estimator handle missing data? "remember_data": False, # whether all data seen is remembered as self._X - "python_dependencies": "statsmodels", } def __init__( diff --git a/sktime/transformations/series/boxcox.py b/sktime/transformations/series/boxcox.py index 4e7ce83c8e6..9889d966c3a 100644 --- a/sktime/transformations/series/boxcox.py +++ b/sktime/transformations/series/boxcox.py @@ -158,6 +158,12 @@ class BoxCoxTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["mloning", "aiwalter", "fkiraly"], + "python_dependencies": "scipy", + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -169,7 +175,6 @@ class BoxCoxTransformer(BaseTransformer): "fit_is_empty": False, "univariate-only": True, "capability:inverse_transform": True, - "python_dependencies": "scipy", } def __init__( @@ -528,7 +533,8 @@ def _guerrero(x, sp, bounds=None): if sp is None or not is_int(sp) or sp < 2: raise ValueError( - "Guerrero method requires an integer seasonal periodicity (sp) value >= 2." + "In BoxCoxTransformer, method='guerrero' requires an integer seasonal " + f"periodicity (sp) value >= 2, but found sp={sp}" ) x = np.asarray(x) diff --git a/sktime/transformations/series/cffilter.py b/sktime/transformations/series/cffilter.py index 7507d1914e8..628649ae93b 100644 --- a/sktime/transformations/series/cffilter.py +++ b/sktime/transformations/series/cffilter.py @@ -51,6 +51,13 @@ class CFFilter(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["ken-maeda"], + "maintainers": ["ken-maeda"], + "python_dependencies": "statsmodels", + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -71,7 +78,6 @@ class CFFilter(BaseTransformer): # can the transformer handle unequal length time series (if passed Panel)? "handles-missing-data": False, # can estimator handle missing data? "remember_data": False, # whether all data seen is remembered as self._X - "python_dependencies": "statsmodels", } def __init__( diff --git a/sktime/transformations/series/clasp.py b/sktime/transformations/series/clasp.py index f08a9ecdd8e..819ece24a98 100644 --- a/sktime/transformations/series/clasp.py +++ b/sktime/transformations/series/clasp.py @@ -58,6 +58,13 @@ class ClaSPTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["ermshaua", "patrickzib"], + "maintainers": ["ermshaua"], + "python_dependencies": "numba", + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -67,7 +74,6 @@ class ClaSPTransformer(BaseTransformer): "y_inner_mtype": "None", # which mtypes do _fit/_predict support for y? "univariate-only": True, "fit_is_empty": True, - "python_dependencies": "numba", } def __init__( diff --git a/sktime/transformations/series/clear_sky.py b/sktime/transformations/series/clear_sky.py index 49b36e5d010..6410f47eba3 100644 --- a/sktime/transformations/series/clear_sky.py +++ b/sktime/transformations/series/clear_sky.py @@ -73,6 +73,13 @@ class ClearSky(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["ciaran-g"], + "maintainers": ["ciaran-g"], + "python_dependencies": ["statsmodels", "scipy"], + # estimator type + # -------------- "scitype:transform-input": "Series", "scitype:transform-output": "Series", "scitype:transform-labels": "None", @@ -96,8 +103,6 @@ class ClearSky(BaseTransformer): "capability:unequal_length:removes": True, # ? "handles-missing-data": False, "capability:missing_values:removes": True, - "python_version": None, # PEP 440 python version specifier to limit versions - "python_dependencies": ["statsmodels", "scipy"], } def __init__( diff --git a/sktime/transformations/series/cos.py b/sktime/transformations/series/cos.py index c911a5c6cb6..d5fd1bf5abc 100644 --- a/sktime/transformations/series/cos.py +++ b/sktime/transformations/series/cos.py @@ -31,6 +31,12 @@ class CosineTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": "afzal442", + "maintainers": "afzal442", + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/date.py b/sktime/transformations/series/date.py index 412df68dfb0..7fab30ae2c6 100644 --- a/sktime/transformations/series/date.py +++ b/sktime/transformations/series/date.py @@ -169,6 +169,13 @@ class DateTimeFeatures(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["danbartl", "KishManani", "VyomkeshVyas"], + "maintainers": ["VyomkeshVyas"], + "python_dependencies": "pandas>=1.2.0", # from DateTimeProperties + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -187,7 +194,6 @@ class DateTimeFeatures(BaseTransformer): "transform-returns-same-time-index": True, "enforce_index_type": [pd.DatetimeIndex, pd.PeriodIndex], "skip-inverse-transform": True, - "python_dependencies": "pandas>=1.2.0", # from DateTimeProperties } def __init__( diff --git a/sktime/transformations/series/detrend/_deseasonalize.py b/sktime/transformations/series/detrend/_deseasonalize.py index b79095a82e5..7211d048153 100644 --- a/sktime/transformations/series/detrend/_deseasonalize.py +++ b/sktime/transformations/series/detrend/_deseasonalize.py @@ -63,6 +63,12 @@ class Deseasonalizer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["mloning", "eyalshafran", "aiwalter"], + "python_dependencies": "statsmodels", + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -75,7 +81,6 @@ class Deseasonalizer(BaseTransformer): "capability:inverse_transform": True, "transform-returns-same-time-index": True, "univariate-only": True, - "python_dependencies": "statsmodels", } def __init__(self, sp=1, model="additive"): @@ -363,16 +368,25 @@ def _fit(self, X, y=None): class STLTransformer(BaseTransformer): """Remove seasonal components from a time-series using STL. - Interfaces STL from statsmodels as an sktime transformer. + Interfaces ``statsmodels.tsa.seasonal.STL`` as an sktime transformer. + + ``STLTransformer`` can be used to perform deseasonalization or decomposition: + + If ``return_components=False``, it will return the deseasonalized series, i.e., + the trend component from ``statsmodels`` ``STL``. + + If ``return_components=True``, it will transform the series into a decomposition + of component, returning the trend, seasonal, and residual components. + + ``STLTransformer`` performs ``inverse_transform`` by summing any components, + and can be used for pipelining in a ``TransformedTargetForecaster``. - The STLTransformer is a descriptive transformer to remove seasonality - from a series and is based on statsmodels.STL. It returns deseasonalized - data. Components are returned in addition if return_components=True - STLTransformer can not inverse_transform on indices not seen in fit(). - This means that for pipelining, the Deseasonalizer or Detrender must be - used instead of STLTransformer. + Important: for separate forecasts of trend and seasonality, and an + inverse transform that respects seasonality, ensure + that ``return_components=True`` is set, otherwise the inverse will just + return the trend component. - Important note: the returned series has seasonality removed, but not trend. + An alternative for pipeline-style composition is ``STLForecaster``. Parameters ---------- @@ -414,7 +428,7 @@ class STLTransformer(BaseTransformer): the two are linearly interpolated. Higher values reduce estimation time. return_components : bool, default=False - if False, will return only the STL transformed series + if False, will return only the trend component if True, will return the transformed series, as well as three components as variables in the returned multivariate series (DataFrame cols) "transformed" - the transformed series diff --git a/sktime/transformations/series/detrend/_detrend.py b/sktime/transformations/series/detrend/_detrend.py index be410df7eae..69e705dfe2f 100644 --- a/sktime/transformations/series/detrend/_detrend.py +++ b/sktime/transformations/series/detrend/_detrend.py @@ -68,6 +68,12 @@ class Detrender(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["mloning", "SveaMeyer13", "KishManani", "fkiraly"], + "maintainers": ["SveaMeyer13", "KishManani"], + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/detrend/mstl.py b/sktime/transformations/series/detrend/mstl.py index 8d19ad6b9e2..87d8e48d740 100644 --- a/sktime/transformations/series/detrend/mstl.py +++ b/sktime/transformations/series/detrend/mstl.py @@ -99,6 +99,13 @@ class MSTL(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["luca-miniati"], + "maintainers": ["luca-miniati"], + "python_dependencies": "statsmodels", + # estimator type + # -------------- "scitype:transform-input": "Series", "scitype:transform-output": "Series", "scitype:instancewise": True, @@ -110,7 +117,6 @@ class MSTL(BaseTransformer): "capability:inverse_transform:exact": False, "skip-inverse-transform": False, "fit_is_empty": False, - "python_dependencies": "statsmodels", } def __init__( diff --git a/sktime/transformations/series/difference.py b/sktime/transformations/series/difference.py index 47ba582956d..a3f76227343 100644 --- a/sktime/transformations/series/difference.py +++ b/sktime/transformations/series/difference.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -u # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) """Class to iteratively apply differences to a time series.""" -__author__ = ["RNKuhns", "fkiraly"] +__author__ = ["RNKuhns", "fkiraly", "benheid"] __all__ = ["Differencer"] from typing import Union @@ -232,6 +232,11 @@ class Differencer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["RNKuhns", "fkiraly", "benheid"], + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/dilation_mapping.py b/sktime/transformations/series/dilation_mapping.py new file mode 100644 index 00000000000..967db827b00 --- /dev/null +++ b/sktime/transformations/series/dilation_mapping.py @@ -0,0 +1,133 @@ +"""DilationMapping transformer. + +A transformer for applying dilation mapping to time series data. +""" + +__author__ = ["fspinna"] +__all__ = ["DilationMappingTransformer"] + +import pandas as pd + +from sktime.transformations.base import BaseTransformer + + +class DilationMappingTransformer(BaseTransformer): + r"""Dilation mapping transformer. + + A transformer for applying an index grid dilation mapping to time series data, + in the terminology of [1]_. + + This transformation is motivated by kernel dilation, it + reorders the timesteps of a time series to simulate the effect of dilation. + For instance, in a pipeline, it enables a dilation-like effect for downstream + models that do not inherently support such a feature. + + Mathematically, the mapping operates on sequences :math:`x_1, \dots, x_k`. + The dilation with factor :math:`d` is defined as the sequence + :math:`x_1, x_{1+d}, x_{1+2d}, \dots, x_2, x_{2+d}, x_{2+2d}, \dots, x_d, x_{2d}, \dots`, + where the subsequences with grid spacing :math:`d` are maximal. + + The resulting sequence is of equal length to the input sequence. + + This transformer reorders the values, and resets the sequence index + to a ``RangeIndex``, if the mtype is ``pandas`` based. + + Parameters + ---------- + dilation : int, default=2 + The dilation factor. Determines the spacing between original data points in the + transformed series. Must be an integer greater than 0. A dilation of 1 means no + change, while higher values increase the spacing. + + References + ---------- + .. [1] Patrick SchΓ€fer and Ulf Leser, + "WEASEL 2.0--A Random Dilated Dictionary Transform for Fast, + Accurate and Memory Constrained Time Series Classification", 2023, + arXiv preprint arXiv:2301.10194. + + Example + ---------- + >>> from sktime.transformations.series.dilation_mapping import \ + ... DilationMappingTransformer + >>> from sktime.datasets import load_airline + >>> y = load_airline() + >>> y_transform = DilationMappingTransformer(dilation=2).fit_transform(y) + """ # noqa: E501 + + _tags = { + # packaging info + # -------------- + "authors": ["fspinna"], + "maintainers": ["fspinna"], + # estimator type + # -------------- + "scitype:transform-input": "Series", + "scitype:transform-output": "Series", + "scitype:instancewise": True, + "scitype:transform-labels": "None", + "X_inner_mtype": "pd.Series", + "y_inner_mtype": "None", + "univariate-only": True, + "requires_y": False, + "fit_is_empty": True, + "capability:inverse_transform": False, + "capability:unequal_length": True, + "handles-missing-data": True, + } + + def __init__(self, dilation=2): + self.dilation = dilation + + super().__init__() + + if dilation < 1: + raise ValueError("Dilation must be greater than 0") + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + private _transform containing core logic, called from transform + + Parameters + ---------- + X : Series, Panel, or Hierarchical data, of mtype X_inner_mtype + if X_inner_mtype is list, _transform must support all types in it + Data to be transformed + y : Series, Panel, or Hierarchical data, of mtype y_inner_mtype, default=None + Additional data, e.g., labels for transformation + + Returns + ------- + # X_transformed : Series of mtype pd.Series + # transformed version of X + """ + return self._dilate_series(X, self.dilation) + + def _dilate_series(self, x, d): + x_dilations = [x[i::d] for i in range(0, d)] + x_dilated = pd.concat(x_dilations, axis=0) + x_dilated.name = x.name + return x_dilated.reset_index(drop=True) + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are currently no reserved values for transformers. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params = [{"dilation": 2}] + return params diff --git a/sktime/transformations/series/dobin.py b/sktime/transformations/series/dobin.py index d55e9c202fa..39d974a1ce4 100644 --- a/sktime/transformations/series/dobin.py +++ b/sktime/transformations/series/dobin.py @@ -78,10 +78,15 @@ class DOBIN(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": "KatieBuc", + "python_dependencies": "scipy", + # estimator type + # -------------- "X_inner_mtype": "pd.DataFrame", "fit_is_empty": False, "skip-inverse-transform": True, - "python_dependencies": "scipy", } def __init__( diff --git a/sktime/transformations/series/dropna.py b/sktime/transformations/series/dropna.py index 2f9fa76cc81..2521988230e 100644 --- a/sktime/transformations/series/dropna.py +++ b/sktime/transformations/series/dropna.py @@ -43,6 +43,8 @@ class DropNA(BaseTransformer): """ _tags = { + "authors": ["hliebert"], + "maintainers": ["hliebert"], "univariate-only": False, "scitype:transform-input": "Series", "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/exponent.py b/sktime/transformations/series/exponent.py index 5fd8d29392a..d607d0e07dc 100644 --- a/sktime/transformations/series/exponent.py +++ b/sktime/transformations/series/exponent.py @@ -70,6 +70,7 @@ class ExponentTransformer(BaseTransformer): """ _tags = { + "authors": ["RNKuhns"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/feature_selection.py b/sktime/transformations/series/feature_selection.py index 54b173fb4da..2a9f955d836 100644 --- a/sktime/transformations/series/feature_selection.py +++ b/sktime/transformations/series/feature_selection.py @@ -30,7 +30,7 @@ class FeatureSelection(BaseTransformer): Requires parameter n_columns. * "random": Randomly select n_columns features. Requires parameter n_columns. * "columns": Select features by given names. - * "none": Remove all columns by setting Z to None. + * "none": Remove all columns, transform returns None. * "all": Select all given features. regressor : sklearn-like regressor, optional, default=None. Used as meta-model for the method "feature-importances". The given @@ -70,6 +70,7 @@ class FeatureSelection(BaseTransformer): """ _tags = { + "authors": ["aiwalter"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/filter.py b/sktime/transformations/series/filter.py index 4a5362b9650..3117be08a86 100644 --- a/sktime/transformations/series/filter.py +++ b/sktime/transformations/series/filter.py @@ -35,6 +35,12 @@ class Filter(BaseTransformer): # default tag values for "Series-to-Series" _tags = { + # packaging info + # -------------- + "authors": ["sveameyer13"], + "python_dependencies": "mne", + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -43,7 +49,6 @@ class Filter(BaseTransformer): "X_inner_mtype": ["np.ndarray", "numpy3D"], "y_inner_mtype": "None", # which mtypes do _fit/_predict support for X? "fit_is_empty": True, # is fit empty and can be skipped? Yes = True - "python_dependencies": "mne", } def __init__( diff --git a/sktime/transformations/series/fourier.py b/sktime/transformations/series/fourier.py index b1b3478c12a..13f02ee1c8f 100644 --- a/sktime/transformations/series/fourier.py +++ b/sktime/transformations/series/fourier.py @@ -23,27 +23,40 @@ class FourierFeatures(BaseTransformer): - sin_sp_k = :math:`sin(\frac{2 \pi k t}{sp})` - cos_sp_k = :math:`cos(\frac{2 \pi k t}{sp})` - Where :math:`t` is the number of time steps elapsed from the beginning of the time - series. + Where :math:`t` is the elapsed time since the beginning of the seasonal period and + :math:`sp` the total time of the seasonal period. - The output of the transform is a pandas DataFrame that includes the fourier terms as + The transformed output is a pandas DataFrame that includes the fourier terms as additional columns with the naming convention stated above (sin_sp_k and cos_sp_k). - For instance for sp_list = [12, 3] and fourier_terms_list = [2, 1] the transformed - series will have the additional columns: - "cos_12_1", "sin_12_1", "cos_12_2", "sin_12_2", "cos_3_1", "sin_3_1" + The numbers of Fourier terms :math:`K` in the fourier_terms_list + determines the number of Fourier terms that will be used for each seasonal period, + i.e., Fourier terms :math:`k = 1\dots K` (integers), cos and sine, will be generated + for the seasonality :math:`sp` at the same list index. + For example, consider sp_list = [12, "Y"] and fourier_terms_list = [2, 1]. + This says that we compute 2 (2 cos, 2 sine) Fourier terms for + seasonality 12 periods, and 1 Fourier term (1 cos and 1 sine) + for seasonality 1 year. + The transformed series will then have columns with the following names: + "cos_12_1", "sin_12_1", "cos_12_2", "sin_12_2", "cos_Y_1", "sin_Y_1" The implementation is based on the fourier function from the R forecast package [3]_ Parameters ---------- - sp_list : List[float] - list of seasonal periods + sp_list : List[float and/or str] + List of seasonal periods. Can be defined with the following options: + * float : Periodicity defined as number of timesteps since the beginning of the + data seen in `fit`. + * string : Periodicity defined as a column name in X that contains the + :math:`t/sp` values + * string : Periodicity defined as a pandas period alias: + https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#period-aliases fourier_terms_list : List[int] - list of number of fourier terms (K) for each seasonal period. - Each K matches to the sp (seasonal period) of the sp_list. - For example, if sp_list = [7, 365] and fourier_terms_list = [3, 9], the seasonal - frequency of 7 will have 3 fourier terms and the seasonal frequency of 365 - will have 9 fourier terms. + List of number of fourier terms (:math:`K`) per corresponding (:math:`sp`); each + :math:`K` matches to one :math:`sp` of the sp_list. For example, if sp_list = + [7, "Y"] and fourier_terms_list = [3, 9], the seasonality of 7 timesteps will + have 3 sin_sp_k and 3 cos_sp_k fourier terms and the yearly seasonality "Y" will + have 9 sin_sp_k and 9 cos_sp_k fourier terms. freq : str, optional, default = None Only used when X has a pd.DatetimeIndex without a specified frequency. Specifies the frequency of the index of your data. The string should @@ -66,11 +79,16 @@ class FourierFeatures(BaseTransformer): >>> from sktime.transformations.series.fourier import FourierFeatures >>> from sktime.datasets import load_airline >>> y = load_airline() - >>> transformer = FourierFeatures(sp_list=[12], fourier_terms_list=[4]) + >>> transformer = FourierFeatures(sp_list=[12, "Y"], fourier_terms_list=[4, 1]) >>> y_hat = transformer.fit_transform(y) """ _tags = { + # packaging info + # -------------- + "authors": ["ltsaprounis", "blazingbhavneek"], + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -103,7 +121,6 @@ class FourierFeatures(BaseTransformer): # todo: rename to capability:missing_values "capability:missing_values:removes": False, # is transform result always guaranteed to contain no missing values? - "python_version": None, # PEP 440 python version specifier to limit versions } def __init__( @@ -124,11 +141,16 @@ def __init__( "to the length of fourier_terms_list." ) - if np.any(np.array(self.sp_list) / np.array(self.fourier_terms_list) < 1): - raise ValueError( - "In FourierFeatures the number of each element of fourier_terms_list" - "needs to be lower from the corresponding element of the sp_list" - ) + for i in range(len(self.sp_list)): + if ( + not isinstance(sp_list[i], str) + and sp_list[i] / fourier_terms_list[i] < 1 + ): + raise ValueError( + "In FourierFeatures the number of each element of " + "fourier_terms_list needs to be lower from the corresponding " + "element of the sp_list" + ) super().__init__() @@ -144,11 +166,6 @@ def _fit(self, X, y=None): Data to fit transform to y : Series or Panel of mtype y_inner_mtype, default=None Additional data, e.g., labels for transformation - freq : str, optional, default = None - Only used when X has a pd.DatetimeIndex without a specified frequency. - Specifies the frequency of the index of your data. The string should - match a pandas offset alias: - https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases Returns ------- @@ -160,17 +177,21 @@ def _fit(self, X, y=None): coefficient_list = [] for i, sp in enumerate(self.sp_list): for k in range(1, self.fourier_terms_list[i] + 1): - coef = k / sp - if coef not in coefficient_list: - coefficient_list.append(coef) + if not isinstance(sp, str): # periodicity sp relative to start + coef = k / sp + if coef not in coefficient_list: + coefficient_list.append(coef) + self.sp_k_pairs_list_.append((sp, k)) + else: + warnings.warn( + f"The terms sin_{sp}_{k} and cos_{sp}_{k} from " + "FourierFeatures will be skipped because the resulting " + "coefficient already exists from other seasonal period, " + "fourier term pairs.", + stacklevel=2, + ) + else: # periodicity sp from offset string or X column self.sp_k_pairs_list_.append((sp, k)) - else: - warnings.warn( - f"The terms sin_{sp}_{k} and cos_{sp}_{k} from FourierFeatures " - "will be skipped because the resulting coefficient already " - "exists from other seasonal period, fourier term pairs.", - stacklevel=2, - ) time_index = X.index @@ -178,10 +199,10 @@ def _fit(self, X, y=None): # Chooses first non None value self.freq_ = time_index.freq or self.freq or pd.infer_freq(time_index) if self.freq_ is None: - ValueError("X has no known frequency and none is supplied") + raise ValueError("X has no known frequency and none is supplied") if self.freq_ == time_index.freq and self.freq_ != self.freq: warnings.warn( - f"Using frequency from index: {time_index.freq}, which" + f"Using frequency from index: {time_index.freq}, which " f"does not match the frequency given:{self.freq}.", stacklevel=2, ) @@ -211,10 +232,12 @@ def _transform(self, X, y=None): transformed version of X """ X_transformed = pd.DataFrame(index=X.index) - time_index = X.index + X_df = pd.DataFrame(X) - if isinstance(time_index, pd.DatetimeIndex): - time_index = time_index.to_period(self.freq_) + if isinstance(X.index, pd.DatetimeIndex): + time_index = X.index.to_period(self.freq_) + else: + time_index = X.index # get the integer form of the PeriodIndex int_index = time_index.astype("int64") - self.min_t_ @@ -223,14 +246,76 @@ def _transform(self, X, y=None): sp = sp_k[0] k = sp_k[1] - X_transformed[f"sin_{sp}_{k}"] = np.sin(int_index * 2 * k * np.pi / sp) - X_transformed[f"cos_{sp}_{k}"] = np.cos(int_index * 2 * k * np.pi / sp) + if not isinstance(sp, str): # periodicity sp relative to start + X_transformed[f"sin_{sp}_{k}"] = np.sin(int_index * 2 * k * np.pi / sp) + X_transformed[f"cos_{sp}_{k}"] = np.cos(int_index * 2 * k * np.pi / sp) + + elif sp in X_df.columns: # periodicity sp from X column + frac_index = X_df[sp].values + X_transformed[f"sin_{sp}_{k}"] = np.sin(frac_index * 2 * k * np.pi) + X_transformed[f"cos_{sp}_{k}"] = np.cos(frac_index * 2 * k * np.pi) + + else: # periodicity sp from offset string + if isinstance(X.index, pd.PeriodIndex): + datetime_index = X.index.to_timestamp() + else: + datetime_index = X.index + + frac_index = self._offset_frac_since_prev_offset( + datetime_index=datetime_index, + period_str=sp, + ) + X_transformed[f"sin_{sp}_{k}"] = np.sin(frac_index * 2 * k * np.pi) + X_transformed[f"cos_{sp}_{k}"] = np.cos(frac_index * 2 * k * np.pi) if self.keep_original_columns: X_transformed = pd.concat([X, X_transformed], axis=1, copy=True) return X_transformed + def _offset_frac_since_prev_offset(self, datetime_index, period_str): + """Get time passed as fraction of the current period. + + Parameters + ---------- + datetime_index : pandas DatetimeIndex + period_str : pandas period str + Cannot contain digits + + Returns + ------- + numpy array containing the time passed between [previous offset, next offset) + as fraction in the interval [0, 1) for every datetime in datetimes + """ + + def _get_frac(datetime, offset_boundaries): + i = np.searchsorted(offset_boundaries, datetime, side="right") + prev = offset_boundaries[i - 1] + next = offset_boundaries[i] + period_timedelta = next - prev + since_prev_timedelta = datetime - prev + return since_prev_timedelta / period_timedelta + + offset = pd.tseries.frequencies.to_offset(period_str) + offset_boundaries = pd.date_range( + start=np.amin(datetime_index) - offset, + end=np.amax(datetime_index) + offset, + freq=period_str, + tz=datetime_index.tz, + ) + + # date_range created with offsets <= 1day have boundaries on the first + # moment of the new period, but date_range created with offsets > 1day + # have boundaries on the last day of the period rather than the desired + # first day of new period. workaround: shift by 1 day + offset_td = pd.to_timedelta(offset, errors="coerce") + if not offset_td <= pd.Timedelta(days=1): + offset_boundaries = offset_boundaries + pd.Timedelta(days=1) + + fracs = [_get_frac(dt, offset_boundaries) for dt in datetime_index] + + return np.array(fracs) + @classmethod def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. @@ -253,6 +338,8 @@ def get_test_params(cls, parameter_set="default"): params = [ {"sp_list": [12], "fourier_terms_list": [4]}, {"sp_list": [12, 6.2], "fourier_terms_list": [3, 4]}, + {"sp_list": ["Y"], "fourier_terms_list": [4]}, + {"sp_list": ["Y", "Q"], "fourier_terms_list": [3, 4]}, ] return params diff --git a/sktime/transformations/series/func_transform.py b/sktime/transformations/series/func_transform.py index 0d853141d9a..dd692d5e30a 100644 --- a/sktime/transformations/series/func_transform.py +++ b/sktime/transformations/series/func_transform.py @@ -18,12 +18,12 @@ class FunctionTransformer(BaseTransformer): r"""Constructs a transformer from an arbitrary callable. A FunctionTransformer forwards its y (and optionally X) arguments to a - user-defined function or function object and returns the result of this + user-defined function (or callable object) and returns the result of this function. This is useful for stateless transformations such as taking the log of frequencies, doing custom scaling, etc. - Note: If a lambda is used as the function, then the resulting - transformer will not be pickleable. + Note: If a lambda function is used as the ``func``, then the + resulting transformer will not be pickleable. Parameters ---------- @@ -74,6 +74,7 @@ class FunctionTransformer(BaseTransformer): """ _tags = { + "authors": ["BoukePostma"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/hidalgo.py b/sktime/transformations/series/hidalgo.py index 4e96a027954..9cfc0da8e00 100644 --- a/sktime/transformations/series/hidalgo.py +++ b/sktime/transformations/series/hidalgo.py @@ -84,6 +84,7 @@ class Hidalgo(BaseTransformer): """ _tags = { + "authors": ["KatieBuc"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/holiday/_holidayfeats.py b/sktime/transformations/series/holiday/_holidayfeats.py index 10d478f511d..385bfe67d6e 100644 --- a/sktime/transformations/series/holiday/_holidayfeats.py +++ b/sktime/transformations/series/holiday/_holidayfeats.py @@ -88,6 +88,13 @@ class HolidayFeatures(BaseTransformer): _required_parameters = ["calendar"] _tags = { + # packaging info + # -------------- + "authors": ["mloning", "VyomkeshVyas"], + "maintainers": "VyomkeshVyas", + "python_dependencies": ["holidays"], + # estimator type + # -------------- "scitype:transform-input": "Series", "scitype:transform-output": "Series", "scitype:transform-labels": "None", @@ -102,7 +109,6 @@ class HolidayFeatures(BaseTransformer): "enforce_index_type": [pd.DatetimeIndex], "transform-returns-same-time-index": True, "skip-inverse-transform": True, - "python_dependencies": ["holidays"], } def __init__( diff --git a/sktime/transformations/series/holiday/country_holidays.py b/sktime/transformations/series/holiday/country_holidays.py index 35d11a73604..b0b6d0913fe 100644 --- a/sktime/transformations/series/holiday/country_holidays.py +++ b/sktime/transformations/series/holiday/country_holidays.py @@ -67,6 +67,14 @@ class CountryHolidaysTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": "yarnabrina", + "maintainers": "yarnabrina", + "python_version": ">=3.8", + "python_dependencies": ["holidays"], + # estimator type + # -------------- "scitype:transform-input": "Series", "scitype:transform-output": "Series", "scitype:instancewise": True, @@ -79,8 +87,6 @@ class CountryHolidaysTransformer(BaseTransformer): "capability:inverse_transform": False, "capability:unequal_length": True, "handles-missing-data": True, - "python_version": ">=3.8", - "python_dependencies": ["holidays"], } def __init__( diff --git a/sktime/transformations/series/holiday/financial_holidays.py b/sktime/transformations/series/holiday/financial_holidays.py index e037c6a0ca7..42a0281ef52 100644 --- a/sktime/transformations/series/holiday/financial_holidays.py +++ b/sktime/transformations/series/holiday/financial_holidays.py @@ -67,6 +67,14 @@ class FinancialHolidaysTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": "yarnabrina", + "maintainers": "yarnabrina", + "python_version": ">=3.8", + "python_dependencies": ["holidays"], + # estimator type + # -------------- "scitype:transform-input": "Series", "scitype:transform-output": "Series", "scitype:instancewise": True, @@ -79,8 +87,6 @@ class FinancialHolidaysTransformer(BaseTransformer): "capability:inverse_transform": False, "capability:unequal_length": True, "handles-missing-data": True, - "python_version": ">=3.8", - "python_dependencies": ["holidays"], } def __init__(self, market, years=None, expand=True, observed=True, name=None): diff --git a/sktime/transformations/series/hpfilter.py b/sktime/transformations/series/hpfilter.py index 3ced37e68cc..04918c5a8fc 100644 --- a/sktime/transformations/series/hpfilter.py +++ b/sktime/transformations/series/hpfilter.py @@ -53,6 +53,8 @@ class HPFilter(BaseTransformer): """ _tags = { + "authors": ["ken_maeda"], + "maintainers": ["ken_maeda"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/impute.py b/sktime/transformations/series/impute.py index 40e4d6fa608..efbc4bae536 100644 --- a/sktime/transformations/series/impute.py +++ b/sktime/transformations/series/impute.py @@ -26,41 +26,54 @@ class Imputer(BaseTransformer): Method to fill the missing values. * "drift" : drift/trend values by sktime.PolynomialTrendForecaster(degree=1) - first, X in transform() is filled with ffill then bfill - then PolynomialTrendForecaster(degree=1) is fitted to filled X, and - predict values are queried at indices which had missing values + first, X in transform() is filled with ffill then bfill + then PolynomialTrendForecaster(degree=1) is fitted to filled X, and + predict values are queried at indices which had missing values * "linear" : linear interpolation, uses pd.Series.interpolate() WARNING: This method can not extrapolate, so it is fitted always on the data given to transform(). * "nearest" : use nearest value, uses pd.Series.interpolate() * "constant" : same constant value (given in arg value) for all NaN - * "mean" : pd.Series.mean() of *fit* data - * "median" : pd.Series.median() of *fit* data - * "backfill" to "bfill" : adapted from pd.Series.fillna() - * "pad" or "ffill" : adapted from pd.Series.fillna() + * "mean" : pd.Series.mean() of data seen in ``fit`` + to use data in transform, wrap this estimator in ``FitInTransform`` + * "median" : pd.Series.median() of data seen in ``fit`` + to use data in transform, wrap this estimator in ``FitInTransform`` + * "backfill" to "bfill" : applies ``pd.Series.bfill`` to all data + * "pad" or "ffill" : applies ``pd.Series.ffill`` to all data * "random" : random values between pd.Series.min() and .max() of *fit* data - if pd.Series dtype is int, sample is uniform discrete - if pd.Series dtype is float, sample is uniform continuous - * "forecaster" : use an sktime Forecaster, given in param forecaster. - First, X in *fit* is filled with ffill then bfill - then forecaster is fitted to filled X, and *predict* values are queried - at indices of X data in *transform* which had missing values - For the following methods, the train data is used to fit them: - "drift", "mean", "median", "random". For all other methods, the - transform data is sufficient to compute the impute values. - - missing_values : int/float/str, default=None - The placeholder for the missing values. All occurrences of - missing_values will be imputed, in addition to np.nan. - If None, then only np.nan values are imputed. + if pd.Series dtype is int, sample is uniform discrete + if pd.Series dtype is float, sample is uniform continuous + * "forecaster" : use an sktime forecaster, given in param ``forecaster``. + First, X seed in ``fit`` is filled with ``ffill`` then ``bfill`` + then forecaster is fitted to filled X, and ``predict`` values are queried + at indices of X data in ``transform`` which had missing values. + ``forecaster`` is always applied by variable and instance. + + The following methods, fit non-trivially to the data seen in ``fit``: + "drift", "mean", "median", "random". All other methods + do not depend on values seen in ``fit``. + + missing_values : str, int, float, regex, list, or None, default=None + Value to consider as `np.nan`` and impute, passed to ``DataFrame.replace`` + If str, int, float, all entries equal to ``missing_values`` will be imputed, + in addition to ``np.nan.`` + If regex, all entrie matching regex will be imputed, in addition to ``np.nan.`` + If list, must be list of str, int, float, or regex. + Values matching any list element by above rules will be imputed, + in addition to ``np.nan``. + If None, then only ``np.nan`` values are imputed. + value : int/float, default=None Value to use to fill missing values when method="constant". + Only used if ``method="constant"``, otherwise ignored. + forecaster : Any Forecaster based on sktime.BaseForecaster, default=None Use a given Forecaster to impute by insample predictions when - method="forecaster". Before fitting, missing data is imputed with - method="ffill" or "bfill" as heuristic. in case of multivariate X, - the forecaster is applied separate to each column like a - ColumnEnsembleForecaster. + ``method="forecaster"``. Before fitting, missing data is imputed with + ``method="ffill"`` or ``"bfill"`` as heuristic. In case of multivariate X, + a clone of ``forecaster`` is applied per column. + Only used if ``method="forecaster"``, otherwise ignored. + random_state : int/float/str, optional Value to set random.seed() if method="random", default None @@ -79,6 +92,7 @@ class Imputer(BaseTransformer): """ _tags = { + "authors": ["aiwalter"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -220,13 +234,13 @@ def _transform(self, X, y=None): X_grouped = X.groupby(level=list(range(index.nlevels - 1))) if self.method in ["backfill", "bfill"]: - X = X_grouped.fillna(method="bfill") + X = X_grouped.bfill() # fill trailing NAs of panel instances with reverse method - return X.fillna(method="ffill") + return X.ffill() elif self.method in ["pad", "ffill"]: - X = X_grouped.fillna(method="ffill") + X = X_grouped.ffill() # fill leading NAs of panel instances with reverse method - return X.fillna(method="bfill") + return X.bfill() elif self.method == "mean": return X_grouped.fillna(value=self._mean) elif self.method == "median": @@ -234,8 +248,10 @@ def _transform(self, X, y=None): else: raise AssertionError("Code should not be reached") else: - if self.method in ["backfill", "bfill", "pad", "ffill"]: - X = X.fillna(method=self.method) + if self.method in ["backfill", "bfill"]: + X = X.bfill() + elif self.method in ["pad", "ffill"]: + X = X.ffill() elif self.method == "drift": X = self._impute_with_forecaster(X, y) elif self.method == "forecaster": @@ -251,7 +267,7 @@ def _transform(self, X, y=None): # fill first/last elements of series, # as some methods (e.g. "linear") can't impute those - X = X.fillna(method="ffill").fillna(method="backfill") + X = X.ffill().bfill() return X @@ -272,25 +288,15 @@ def _check_method(self): "forecaster", ]: raise ValueError(f"Given method {method} is not an allowed method.") - if ( - self.value is not None - and method != "constant" - or method == "constant" - and self.value is None - ): + if method == "constant" and self.value is None: raise ValueError( - """Imputing with a value can only be - used if method="constant" and if parameter "value" is not None""" + """Imputing with method=\"constant\" can only be used if parameter + value" is not None""" ) - elif ( - self.forecaster is not None - and method != "forecaster" - or method == "forecaster" - and self.forecaster is None - ): + elif method == "forecaster" and self.forecaster is None: raise ValueError( - """Imputing with a forecaster can only be used if - method=\"forecaster\" and if arg forecaster is not None""" + """Imputing with method=\"forecaster\" can only be + used if param forecaster is not None""" ) else: pass @@ -344,10 +350,8 @@ def _impute_with_forecaster(self, X, y): # fill NaN before fitting with ffill and backfill (heuristic) self._forecaster.fit( - y=self._X[col].fillna(method="ffill").fillna(method="backfill"), - X=self._y[col].fillna(method="ffill").fillna(method="backfill") - if self._y is not None - else None, + y=self._X[col].ffill().bfill(), + X=self._y[col].ffill().bfill() if self._y is not None else None, fh=fh, ) diff --git a/sktime/transformations/series/kalman_filter.py b/sktime/transformations/series/kalman_filter.py index 360e5069eb7..843a7be7ac2 100644 --- a/sktime/transformations/series/kalman_filter.py +++ b/sktime/transformations/series/kalman_filter.py @@ -271,6 +271,8 @@ class BaseKalmanFilter: https://www.stat.pitt.edu/stoffer/dss_files/em.pdf """ + _tags = {"authors": ["NoaBenAmi", "lielleravid"], "maintainers": ["NoaBenAmi"]} + def __init__( self, state_dim, @@ -533,6 +535,14 @@ class KalmanFilterTransformerPK(BaseKalmanFilter, BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["NoaBenAmi", "lielleravid"], + "maintainers": ["NoaBenAmi"], + "python_dependencies": "pykalman-bardo", + "python_dependencies_alias": {"pykalman-bardo": "pykalman"}, + # estimator type + # -------------- "X_inner_mtype": "np.ndarray", # which mtypes do _fit/_predict support for X? "requires_y": False, # does y need to be passed in fit? "fit_is_empty": False, # is fit empty and can be skipped? Yes = True @@ -542,8 +552,6 @@ class KalmanFilterTransformerPK(BaseKalmanFilter, BaseTransformer): "capability:missing_values:removes": False, # is transform result always guaranteed to contain no missing values? "scitype:instancewise": True, # is this an instance-wise transform? - "python_dependencies": "pykalman-bardo", - "python_dependencies_alias": {"pykalman-bardo": "pykalman"}, } def __init__( @@ -998,6 +1006,13 @@ class KalmanFilterTransformerFP(BaseKalmanFilter, BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["NoaBenAmi", "lielleravid"], + "maintainers": ["NoaBenAmi"], + "python_dependencies": "filterpy", + # estimator type + # -------------- "scitype:transform-labels": "Series", # what is the scitype of y: None (not needed), Primitives, Series, Panel "X_inner_mtype": "np.ndarray", # which mtypes do _fit/_predict support for X? @@ -1010,7 +1025,6 @@ class KalmanFilterTransformerFP(BaseKalmanFilter, BaseTransformer): "capability:missing_values:removes": False, # is transform result always guaranteed to contain no missing values? "scitype:instancewise": True, # is this an instance-wise transform? - "python_dependencies": "filterpy", } def __init__( diff --git a/sktime/transformations/series/kinematic.py b/sktime/transformations/series/kinematic.py index 07acc721f56..36fb3eeb2f0 100644 --- a/sktime/transformations/series/kinematic.py +++ b/sktime/transformations/series/kinematic.py @@ -65,6 +65,7 @@ class KinematicFeatures(BaseTransformer): """ _tags = { + "authors": ["fkiraly"], "scitype:transform-input": "Series", "scitype:transform-output": "Series", "scitype:instancewise": True, diff --git a/sktime/transformations/series/lag.py b/sktime/transformations/series/lag.py index 3e359e8e468..5af0b25a527 100644 --- a/sktime/transformations/series/lag.py +++ b/sktime/transformations/series/lag.py @@ -116,6 +116,7 @@ class Lag(BaseTransformer): """ _tags = { + "authors": ["fkiraly"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/matrix_profile.py b/sktime/transformations/series/matrix_profile.py index 9ca3bba8753..ed6b037b048 100644 --- a/sktime/transformations/series/matrix_profile.py +++ b/sktime/transformations/series/matrix_profile.py @@ -39,6 +39,12 @@ class MatrixProfileTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["mloning"], + "python_dependencies": "stumpy", + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -49,7 +55,6 @@ class MatrixProfileTransformer(BaseTransformer): "y_inner_mtype": "None", # which mtypes do _fit/_predict support for y?, "univariate-only": True, "fit_is_empty": True, # for unit test cases - "python_dependencies": "stumpy", } def __init__(self, window_length=3): diff --git a/sktime/transformations/series/outlier_detection.py b/sktime/transformations/series/outlier_detection.py index 26344ed1fd6..68c13b0bc2a 100644 --- a/sktime/transformations/series/outlier_detection.py +++ b/sktime/transformations/series/outlier_detection.py @@ -53,6 +53,7 @@ class HampelFilter(BaseTransformer): """ _tags = { + "authors": ["aiwalter"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -171,8 +172,8 @@ def get_test_params(cls, parameter_set="default"): def _hampel_filter(Z, cv, n_sigma, half_window_length, k): for i in cv.split(Z): cv_window = i[0] - cv_median = np.nanmedian(Z[cv_window]) - cv_sigma = k * np.nanmedian(np.abs(Z[cv_window] - cv_median)) + cv_median = np.nanmedian(Z.iloc[cv_window]) + cv_sigma = k * np.nanmedian(np.abs(Z.iloc[cv_window] - cv_median)) is_start_window = cv_window[-1] == cv.window_length - 1 is_end_window = cv_window[-1] == len(Z) - 1 diff --git a/sktime/transformations/series/paa.py b/sktime/transformations/series/paa.py new file mode 100644 index 00000000000..229d16ea876 --- /dev/null +++ b/sktime/transformations/series/paa.py @@ -0,0 +1,173 @@ +"""Piecewise Aggregate Approximation Transformer.""" + +__author__ = ["steenrotsman"] + +import numpy as np + +from sktime.transformations.base import BaseTransformer +from sktime.utils.warnings import warn + + +# TODO 0.27.0: rename the class PAA2 to PAA +class PAA2(BaseTransformer): + """Piecewise Aggregate Approximation Transformer (PAA). + + PAA [1]_ is a dimensionality reduction technique that divides a time series + into frames and takes their mean. This implementation offers two variants: + 1) the original, which takes the desired number of frames and can set the + frame size to a fraction to support cases where the time series cannot be + divided into the frames equally. + 2) a variant that takes the desired frame size and can decrease the frame + size of the last frame to support cases where the time series is not + evenly divisible into frames. + + Parameters + ---------- + frames : int, optional (default=8, greater equal 1 if frame_size=0) + length of transformed time series. Ignored if `frame_size` is set. + frame_size : int, optional (default=0, greater equal 0) + length of the frames over which the mean is taken. Overrides `frames` if > 0. + + References + ---------- + .. [1] Keogh, E., Chakrabarti, K., Pazzani, M., and Mehrotra, S. + Dimensionality Reduction for Fast Similarity Search + in Large Time Series Databases. + Knowledge and Information Systems 3, 263β286 (2001). + https://doi.org/10.1007/PL00011669 + + Examples + -------- + >>> from numpy import arange + >>> from sktime.transformations.series.paa import PAA2 + + >>> X = arange(10) + >>> paa = PAA2(frames=3) + >>> paa.fit_transform(X) # doctest: +SKIP + array([1.2, 4.5, 7.8]) + >>> paa = PAA2(frame_size=3) # doctest: +SKIP + array([1, 4, 7, 9]) + """ + + _tags = { + "authors": ["steenrotsman"], + "maintainers": ["steenrotsman"], + "scitype:transform-input": "Series", + "scitype:transform-output": "Series", + "scitype:instancewise": True, + "scitype:transform-labels": "None", + "X_inner_mtype": "np.ndarray", + "y_inner_mtype": "None", + "univariate-only": True, + "requires_y": False, + "fit_is_empty": True, + "capability:inverse_transform": False, + "handles-missing-data": False, + } + + def __init__(self, frames=8, frame_size=0): + self.frames = frames + self.frame_size = frame_size + + super().__init__() + + warn( + "In sktime 0.27.0, PAA2 will become the primary PAA implementation in " + "sktime, and will be renamed to PAA. " + "PAA2 is available under both its current and future name at its " + "current location, imports under the deprecated name PAA2 will be possible" + "until 0.28.0. " + "To prepare for the name change, replace imports of PAA2 from " + "sktime.transformations.series.paa by imports of PAA from the same " + "module.", + DeprecationWarning, + obj=self, + ) + + self._check_params() + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + private _transform containing core logic, called from transform + + Parameters + ---------- + X : Series of mtype np.ndarray + data to be transformed + y : None + unused + + Returns + ------- + X_transformed : Series of mtype np.ndarray + transformed version of X + """ + if self.frame_size: + return self._transform_frame_size(X) + return self._transform_frames(X) + + def _transform_frames(self, X): + """Original PAA definition. + + First performs validity check and handles trivial cases. If self.frames evenly + divides X, calculation is easy. Else, values in X are weighed into the frames. + Last case corrects and adapts https://vigne.sh/posts/piecewise-aggregate-approx/ + """ + if self.frames > X.shape[0]: + raise ValueError( + "Series length cannot be shorter than the desired number of frames." + ) + elif not X.shape[0] % self.frames: + return X.reshape(self.frames, -1).mean(axis=1).T + + indices = np.arange(X.shape[0] * self.frames) // self.frames + return X[indices.reshape(self.frames, -1)].sum(axis=1) / X.shape[0] + + def _transform_frame_size(self, X): + if self.frame_size > X.shape[0]: + raise ValueError( + "Series length cannot be shorter than the desired frame size." + ) + elif last_frame_length := (X.shape[0] % self.frame_size): + last_frame_mean = np.mean(X[-last_frame_length:]) + last_frame_fill = [last_frame_mean] * (self.frame_size - last_frame_length) + X = np.append(X, last_frame_fill) + + return np.mean(X.reshape(-1, self.frame_size), axis=1).T + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are currently no reserved values for transformers. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params = {"frames": 4} + return params + + def _check_params(self): + for attribute in ["frames", "frame_size"]: + if not isinstance(getattr(self, attribute), int): + t = type(getattr(self, attribute)).__name__ + raise TypeError(f"{attribute} must be of type int. Found {t}.") + + if self.frames < 1 and not self.frame_size: + raise ValueError("frames must be at least 1.") + + +# TODO 0.27.0: switch to PAA2 = PAA +# TODO 0.28.0: remove the alias line altogether +PAA = PAA2 diff --git a/sktime/transformations/series/peak.py b/sktime/transformations/series/peak.py index c79850dd2ae..81cc91dbbc0 100644 --- a/sktime/transformations/series/peak.py +++ b/sktime/transformations/series/peak.py @@ -132,11 +132,11 @@ class PeakTimeFeature(BaseTransformer): Examples -------- - >>> from sktime.transformations.series.peak import PeakTimeFeature - >>> from sktime.datasets import load_solar - >>> y = load_solar() - >>> y = y.tz_localize(None) - >>> y = y.asfreq("H") + >>> from sktime.transformations.series.peak import PeakTimeFeature # doctest: +SKIP + >>> from sktime.datasets import # doctest: +SKIP + >>> y = load_solar() # doctest: +SKIP + >>> y = y.tz_localize(None) # doctest: +SKIP + >>> y = y.asfreq("H") # doctest: +SKIP Example 1: one interval for peak hour and working hour. (based on one start/end interval) @@ -146,8 +146,8 @@ class PeakTimeFeature(BaseTransformer): >>> transformer = PeakTimeFeature(ts_freq="H", ... peak_hour_start=[6], peak_hour_end=[9], ... working_hour_start=[8], working_hour_end=[16] - ... ) - >>> y_hat_peak = transformer.fit_transform(y) + ... ) # doctest: +SKIP + >>> y_hat_peak = transformer.fit_transform(y) # doctest: +SKIP Example 2: two intervals for peak hour and working hour. (based on two start/end intervals) @@ -157,8 +157,8 @@ class PeakTimeFeature(BaseTransformer): >>> transformer = PeakTimeFeature(ts_freq="H", ... peak_hour_start=[6, 16], peak_hour_end=[9, 20], ... working_hour_start=[8, 15], working_hour_end=[12, 19] - ... ) - >>> y_hat_peak = transformer.fit_transform(y) + ... ) # doctest: +SKIP + >>> y_hat_peak = transformer.fit_transform(y) # doctest: +SKIP Example 3: We may have peak for different seasonality Here is an example for peak hour, peak day, peak week, peak month for @@ -171,11 +171,18 @@ class PeakTimeFeature(BaseTransformer): ... peak_day_start=[1, 2], peak_day_end=[2, 3], ... peak_week_start=[35, 45], peak_week_end=[40, 52], ... peak_month_start=[1, 7], peak_month_end=[6, 12] - ... ) - >>> y_hat_peak = transformer.fit_transform(y) + ... ) # doctest: +SKIP + >>> y_hat_peak = transformer.fit_transform(y) # doctest: +SKIP """ _tags = { + # packaging info + # -------------- + "authors": ["ali-parizad"], + "maintainers": ["ali-parizad"], + "python_dependencies": "pandas>=1.2.0", # from DateTimeProperties + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -194,7 +201,6 @@ class PeakTimeFeature(BaseTransformer): "transform-returns-same-time-index": True, "enforce_index_type": [pd.DatetimeIndex, pd.PeriodIndex], "skip-inverse-transform": True, - "python_dependencies": "pandas>=1.2.0", # from DateTimeProperties } def __init__( diff --git a/sktime/transformations/series/sax.py b/sktime/transformations/series/sax.py new file mode 100644 index 00000000000..e6414b0b04c --- /dev/null +++ b/sktime/transformations/series/sax.py @@ -0,0 +1,163 @@ +"""Symbolic Aggregate Approximation Transformer.""" +__author__ = ["steenrotsman"] +import numpy as np +from scipy.stats import norm, zscore + +from sktime.transformations.base import BaseTransformer +from sktime.transformations.series.paa import PAA +from sktime.utils.warnings import warn + + +# TODO 0.27.0: rename the class SAX2 to SAX +class SAX2(BaseTransformer): + """Symbolic Aggregate approXimation Transformer (SAX). + + SAX [2]_ is a dimensionality reduction technique that z-normalises a time + series, applies Piecewise Aggregate Approximation (PAA) [1]_, and bins the + mean of each PAA frame to a discrete value, resulting in a SAX word. + + This implementation offers two variants: + 1) the original, which takes the desired number of frames and can set the + frame size to a fraction to support cases where the time series cannot be + divided into the frames equally. + 2) a variant that takes the desired frame size and can decrease the frame + size of the last frame to support cases where the time series is not + evenly divisible into frames. + + Parameters + ---------- + word_size : int, optional (default=8, greater equal 1 if frame_size=0) + length of transformed time series. Ignored if `frame_size` is set. + alphabet_size : int, optional (default=5, greater equal 2) + number of discrete values transformed time series is binned to. + frame_size : int, optional (default=0, greater equal 0) + length of the frames over which the mean is taken. Overrides `frames` if > 0. + + References + ---------- + .. [1] Keogh, E., Chakrabarti, K., Pazzani, M., and Mehrotra, S. + Dimensionality Reduction for Fast Similarity Search + in Large Time Series Databases. + Knowledge and Information Systems 3, 263β286 (2001). + https://doi.org/10.1007/PL00011669 + .. [2] Lin, J., Keogh, E., Wei, L., and Lonardi, S. + Experiencing SAX: A Novel Symbolic Representation of Time Series. + Data Mining and Knowledge Discovery 15, 107β144 (2007). + https://doi.org/10.1007/s10618-007-0064-z + + Examples + -------- + >>> from numpy import arange + >>> from sktime.transformations.series.sax import SAX2 + + >>> X = arange(10) + >>> sax = SAX2(word_size=3, alphabet_size=5) + >>> sax.fit_transform(X) # doctest: +SKIP + array([0, 2, 4]) + >>> sax = SAX2(frame_size=2, alphabet_size=5) # doctest: +SKIP + array([0, 1, 2, 3, 4]) + """ + + _tags = { + "authors": ["steenrotsman"], + "maintainers": ["steenrotsman"], + "scitype:transform-input": "Series", + "scitype:transform-output": "Series", + "scitype:instancewise": True, + "scitype:transform-labels": "None", + "X_inner_mtype": "np.ndarray", + "y_inner_mtype": "None", + "univariate-only": True, + "requires_y": False, + "fit_is_empty": True, + "capability:inverse_transform": False, + "handles-missing-data": False, + } + + def __init__(self, word_size=8, alphabet_size=5, frame_size=0): + self.word_size = word_size + self.alphabet_size = alphabet_size + self.frame_size = frame_size + + super().__init__() + + warn( + "In sktime 0.27.0, SAX2 will become the primary SAX implementation in " + "sktime, and will be renamed to SAX. " + "SAX2 is available under both its current and future name at its " + "current location, imports under the deprecated name SAX2 will be possible" + "until 0.28.0. " + "To prepare for the name change, replace imports of SAX2 from " + "sktime.transformations.series.sax by imports of SAX from the same " + "module.", + DeprecationWarning, + obj=self, + ) + + self._check_params() + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + private _transform containing core logic, called from transform + + Parameters + ---------- + X : Series of mtype np.ndarray + data to be transformed + y : None + unused + + Returns + ------- + X_transformed : Series of mtype np.ndarray + transformed version of X + """ + X_transformed = zscore(X) + paa = PAA(self.word_size, self.frame_size) + X_transformed = paa.fit_transform(X_transformed) + X_transformed = np.digitize(X_transformed, self._get_breakpoints()) + return X_transformed + + def _get_breakpoints(self): + return norm.ppf(np.arange(1, self.alphabet_size) / self.alphabet_size, loc=0) + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are currently no reserved values for transformers. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params = {"word_size": 4, "alphabet_size": 5} + return params + + def _check_params(self): + for attribute in ["word_size", "alphabet_size", "frame_size"]: + if not isinstance(getattr(self, attribute), int): + t = type(getattr(self, attribute)).__name__ + raise TypeError(f"{attribute} must be of type int. Found {t}.") + + if self.word_size < 1 and not self.frame_size: + raise ValueError("word_size must be at least 1.") + if self.alphabet_size < 2: + raise ValueError("alphabet_size must be at least 2.") + if self.frame_size < 0: + raise ValueError("frame_size must be at least 0.") + + +# TODO 0.27.0: switch to SAX2 = SAX +# TODO 0.28.0: remove the alias line altogether +SAX = SAX2 diff --git a/sktime/transformations/series/scaledasinh.py b/sktime/transformations/series/scaledasinh.py index 8df1b26532d..b459ad0d7e5 100644 --- a/sktime/transformations/series/scaledasinh.py +++ b/sktime/transformations/series/scaledasinh.py @@ -1,7 +1,7 @@ # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) """Implements the Hyperbolic Sine transformation and its inverse.""" -__author__ = ["Ali Parizad"] +__author__ = ["ali-parizad"] __all__ = ["ScaledAsinhTransformer"] @@ -16,7 +16,7 @@ class ScaledAsinhTransformer(BaseTransformer): Known as variance stabilizing transformation, Combined with an sktime.forecasting.compose.TransformedTargetForecaster, - can be usefull in time series that exhibit spikes [1]_, [2]_ + can be useful in time series that exhibit spikes [1]_, [2]_ Parameters ---------- @@ -82,6 +82,12 @@ class ScaledAsinhTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["ali-parizad"], + "maintainers": ["ali-parizad"], + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/scaledlogit.py b/sktime/transformations/series/scaledlogit.py index 741931c9b01..e87b9ff6cbe 100644 --- a/sktime/transformations/series/scaledlogit.py +++ b/sktime/transformations/series/scaledlogit.py @@ -90,6 +90,11 @@ class ScaledLogitTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["ltsaprounis"], + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/subset.py b/sktime/transformations/series/subset.py index d82b6a0eeb0..53ab86a3751 100644 --- a/sktime/transformations/series/subset.py +++ b/sktime/transformations/series/subset.py @@ -36,6 +36,7 @@ class IndexSubset(BaseTransformer): """ _tags = { + "authors": ["fkiraly"], "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/summarize.py b/sktime/transformations/series/summarize.py index 977f8920ef3..5d4836b134a 100644 --- a/sktime/transformations/series/summarize.py +++ b/sktime/transformations/series/summarize.py @@ -11,6 +11,7 @@ from sktime.split import ExpandingWindowSplitter, SlidingWindowSplitter from sktime.transformations.base import BaseTransformer from sktime.utils.multiindex import flatten_multiindex +from sktime.utils.warnings import warn class WindowSummarizer(BaseTransformer): @@ -189,6 +190,12 @@ class WindowSummarizer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["danbartl", "grzegorzrut", "ltsaprounis"], + "maintainers": ["danbartl"], + # estimator type + # -------------- "scitype:transform-input": "Series", "scitype:transform-output": "Series", "scitype:instancewise": True, @@ -455,7 +462,7 @@ class description for in-depth explanation. else: feat = getattr( Z.shift(lag) - .fillna(method="bfill") + .bfill() .rolling(window=window_length, min_periods=window_length), summarizer, )() @@ -474,7 +481,7 @@ class description for in-depth explanation. feat = Z.apply( lambda x: getattr( x.shift(lag) - .fillna(method="bfill") + .bfill() .rolling(window=window_length, min_periods=window_length), summarizer, )() @@ -483,7 +490,7 @@ class description for in-depth explanation. if bfill is False: feat = Z.shift(lag) else: - feat = Z.shift(lag).fillna(method="bfill") + feat = Z.shift(lag).bfill() if isinstance(Z, pd.core.groupby.generic.SeriesGroupBy) and callable( summarizer ): @@ -498,7 +505,7 @@ class description for in-depth explanation. ) feat = pd.DataFrame(feat) if bfill is True: - feat = feat.fillna(method="bfill") + feat = feat.bfill() if callable(summarizer): name = summarizer.__name__ @@ -645,6 +652,11 @@ class SummaryTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["RNKuhns", "fkiraly"], + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Primitives", @@ -763,6 +775,7 @@ def get_test_params(cls, parameter_set="default"): return [params1, params2, params3, params4] +# TODO 0.27.0: remove remember_data from docstring and __init__ class SplitterSummarizer(BaseTransformer): """Create summary values of a time series' splits. @@ -780,21 +793,46 @@ class SplitterSummarizer(BaseTransformer): Parameters ---------- transformer : `sktime` transformer inheriting from `BaseTransformer` - series-to-primitives transformer used to convert series to primitives. + series-to-primitives transformer used to convert series to primitives. splitter : `sktime` splitter inheriting from `BaseSplitter`, optional (default=None) - splitter used to divide the series. - If None, it takes `ExpandingWindowSplitter` with `start_with_window=False` - and otherwise default parameters. + splitter used to divide the series. + If None, it takes `ExpandingWindowSplitter` with `start_with_window=False` + and otherwise default parameters. index : str, optional (default="last") - Determines the indexing approach for the resulting series. - If "last", the latest index of the split is used. - If anything else, the row's number becomes the index. - - remember_data : bool, optional (default=True) - if True, memorizes data seen in ``fit``, ``update``, uses it in ``transform`` - if False, only uses data seen in ``transform`` for splits and summaries. + Determines the indexing approach for the resulting series. + If "last", the latest index of the split is used. + If anything else, the row's number becomes the index. + + fit_on : str, optional (default="transform_train") + What data to fit ``transformer`` on, for the ``i``-th row + of the resulting series. + + * "all_train" : transform the ``i``-th train split obtained from + ``splitter.split_series``, called on + all data seen in ``fit`` and ``update`` calls, + plus all data seen in ``transform``. + * "all_test" : transform the ``i``-th test split obtained from + ``splitter.split_series``, called on + all data seen in ``fit`` and ``update`` calls, + plus all data seen in ``transform``. + * "transform_train" : transform the ``i``-th train split obtained from + ``splitter.split_series``, called on the data seen in ``transform``. + * "transform_test" : transform the ``i``-th test split obtained from + ``splitter.split_series``, called on the data seen in ``transform``. + + transform_on : str, optional (default="transform_train") + What data to transform with ``transformer``, for the ``i``-th row + of the resulting series. + Values and meaning same as for ``fit_on``. + + remember_data : bool, optional (default=False) + deprecated, will be removed in 0.27.0. + If set, overrides ``fit_on`` and ``transform_on``: + + * True: ``fit_on="all_train"``, ``transform_on="all_train"`` + * False: ``fit_on="transform_train"``, ``transform_on="transform_train"`` Methods ------- @@ -819,6 +857,11 @@ class SplitterSummarizer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["BensHamza", "fkiraly"], + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", @@ -830,11 +873,21 @@ class SplitterSummarizer(BaseTransformer): "fit_is_empty": True, } - def __init__(self, transformer, splitter=None, index="last", remember_data=False): + def __init__( + self, + transformer, + splitter=None, + index="last", + remember_data=None, + fit_on="transform_train", + transform_on="transform_train", + ): self.transformer = transformer self.index = index self.splitter = splitter self.remember_data = remember_data + self.fit_on = fit_on + self.transform_on = transform_on if splitter is None: self._splitter = SlidingWindowSplitter(start_with_window=False) @@ -851,12 +904,34 @@ def __init__(self, transformer, splitter=None, index="last", remember_data=False if not hasattr(self._splitter, "split_series"): raise ValueError( f"Error in {self.__class__.__name__}, splitter parameter, if passed, " - "should be an BaseSplitter descendant with a seplit_series method" + "should be an BaseSplitter descendant with a split_series method" ) - if remember_data: + # TODO 0.27.0: remove remember_data and related logic + # remove next lie + need_to_remember_data = remember_data is not None and remember_data + # replace next two lines by + # need_to_remember_data = fit_on.startswith("all") + # or transform_on.startswith("all") + need_to_remember_data = need_to_remember_data or fit_on.startswith("all") + need_to_remember_data = need_to_remember_data or transform_on.startswith("all") + + if need_to_remember_data: self.set_tags(**{"remember_data": True, "fit_is_empty": False}) + # TODO 0.27.0: remove remember_data and related logic + if remember_data is not None: + warn( + "remember_data is deprecated and will be removed in 0.27.0. " + "Use fit_on and transform_on instead. " + "Replace remember_data=True with fit_on='all_train' and " + "transform_on='all_train'. Replace remember_data=False with " + "fit_on='transform_train' and transform_on='transform_train'.", + DeprecationWarning, + obj=self, + stacklevel=2, + ) + def _transform(self, X, y=None): """Transform X and return a transformed version. @@ -874,16 +949,38 @@ def _transform(self, X, y=None): Xt : pd.DataFrame The transformed Data """ - if self.remember_data: - X = X.combine_first(self._X) + fit_on = self.fit_on + transform_on = self.transform_on + + if self.remember_data is not None: + if self.remember_data: + fit_on = "all_train" + transform_on = "all_train" + else: + fit_on = "transform_train" + transform_on = "transform_train" + + X_dict = {"transform": X} + + if fit_on.startswith("all") or transform_on.startswith("all"): + X_all = X.combine_first(self._X) + X_dict["all"] = X_all + + fit_on_data = fit_on.split("_")[0] + transform_on_data = transform_on.split("_")[0] + fit_on_ix = int(fit_on.split("_")[1] == "test") + transform_on_ix = int(transform_on.split("_")[1] == "test") transformed_series = [] - splits = self._splitter.split_series(X) + splits_fit = self._splitter.split_series(X_dict[fit_on_data]) + splits_transform = self._splitter.split_series(X_dict[transform_on_data]) - for split in splits: + for split_fit, split_transform in zip(splits_fit, splits_transform): tf = self.transformer.clone() - transformed_split = tf.fit_transform(split[0]) - transformed_split.index = [split[0].index[-1]] + X_fit = split_fit[fit_on_ix] + X_transform = split_transform[transform_on_ix] + transformed_split = tf.fit(X_fit).transform(X_transform) + transformed_split.index = [X_transform.index[-1]] transformed_series.append(transformed_split) Xt = pd.concat(transformed_series) @@ -927,7 +1024,8 @@ def get_test_params(cls, parameter_set="default"): params3 = { "transformer": SummaryTransformer(), "splitter": SlidingWindowSplitter(window_length=3, step_length=2), - "remember_data": True, + "fit_on": "all_train", + "transform_on": "all_train", } params4 = { @@ -936,6 +1034,8 @@ def get_test_params(cls, parameter_set="default"): window_length=3, step_length=2, fh=1, start_with_window=True ), "index": "last", + "fit_on": "all_test", + "transform_on": "transform_test", } return [params1, params2, params3, params4] diff --git a/sktime/transformations/series/tests/test_imputer.py b/sktime/transformations/series/tests/test_imputer.py index d0275454e02..05b958d105b 100644 --- a/sktime/transformations/series/tests/test_imputer.py +++ b/sktime/transformations/series/tests/test_imputer.py @@ -31,6 +31,8 @@ z.iloc[-1] = np.nan +@pytest.mark.parametrize("forecaster", [None, NaiveForecaster()]) +@pytest.mark.parametrize("value", [None, 1]) @pytest.mark.parametrize("Z", [y, X, z]) @pytest.mark.parametrize( "method", @@ -47,10 +49,10 @@ "forecaster", ], ) -def test_imputer(method, Z): +def test_imputer(method, Z, value, forecaster): """Test univariate and multivariate Imputer with all methods.""" - forecaster = NaiveForecaster() if method == "forecaster" else None - value = 3 if method == "constant" else None + forecaster = NaiveForecaster() if method == "forecaster" else forecaster + value = 1 if method == "constant" else value t = Imputer(method=method, forecaster=forecaster, value=value) y_hat = t.fit_transform(Z) assert not y_hat.isnull().to_numpy().any() diff --git a/sktime/transformations/series/tests/test_paa.py b/sktime/transformations/series/tests/test_paa.py new file mode 100644 index 00000000000..4acf1b13d68 --- /dev/null +++ b/sktime/transformations/series/tests/test_paa.py @@ -0,0 +1,44 @@ +"""PAA transformer test code.""" +import numpy as np +import pytest + +from sktime.transformations.series.paa import PAA2 + + +# Check that exception is raised for bad frames values. +# input types - string, float, negative int, negative float, empty dict. +# correct input is an integer of 1 or more, but less than or equal to series length. +@pytest.mark.parametrize("bad_frames", ["str", 1.2, -1.2, -1, {}, 0, 11]) +def test_bad_input_args(bad_frames): + """Test that exception is raised for bad num levels.""" + X = np.arange(10).T + + if not isinstance(bad_frames, int): + for attribute in ["frames", "frame_size"]: + with pytest.raises(TypeError): + PAA2(**{attribute: bad_frames}).fit(X).transform(X) + else: + with pytest.raises(ValueError): + PAA2(bad_frames).fit(X).transform(X) + + +@pytest.mark.parametrize( + "frames,frame_size,expected", + [ + (1, 0, [4.5]), + (2, 0, [2, 7]), + (3, 0, [1.2, 4.5, 7.8]), + (10, 0, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + (0, 1, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + (0, 2, [0.5, 2.5, 4.5, 6.5, 8.5]), + (0, 3, [1, 4, 7, 9]), + (0, 10, [4.5]), + ], +) +def test_output_of_transformer(frames, frame_size, expected): + """Test that the transformer has changed the data correctly.""" + X = np.arange(10).T + paa = PAA2(frames, frame_size) + res = paa.fit_transform(X) + want = np.array(expected, dtype=np.float64).T + np.testing.assert_array_equal(res, want) diff --git a/sktime/transformations/series/tests/test_sax.py b/sktime/transformations/series/tests/test_sax.py new file mode 100644 index 00000000000..ca122c932ff --- /dev/null +++ b/sktime/transformations/series/tests/test_sax.py @@ -0,0 +1,48 @@ +"""SAX transformer test code.""" +import numpy as np +import pytest + +from sktime.transformations.series.sax import SAX2 + + +# Check that exception is raised for bad size values. +# input types - string, float, negative float, empty dict. +# correct input is an integer, see below for value restrictions. +@pytest.mark.parametrize("bad_size", ["str", 1.2, -1.2, {}]) +def test_bad_input_types(bad_size): + """Test that exception is raised for bad sizes.""" + for attribute in ["word_size", "alphabet_size", "frame_size"]: + with pytest.raises(TypeError): + SAX2(**{attribute: bad_size}) + + +@pytest.mark.parametrize( + "attribute,bad_size", + [ + ("word_size", 0), + ("alphabet_size", 1), + ("frame_size", -1), + ], +) +def test_bad_input_values(attribute, bad_size): + """Test that word_size is at least 1 (if frame_size is not set).""" + with pytest.raises(ValueError): + SAX2(**{attribute: bad_size}) + + +@pytest.mark.parametrize( + "word_size,alphabet_size,frame_size,expected", + [ + (2, 5, 0, [0, 4]), + (3, 5, 0, [0, 2, 4]), + (0, 5, 2, [0, 1, 2, 3, 4]), + (0, 5, 3, [0, 2, 4, 4]), + ], +) +def test_output_of_transformer(word_size, alphabet_size, frame_size, expected): + """Test that the transformer has changed the data correctly.""" + X = np.arange(10).T + sax = SAX2(word_size, alphabet_size, frame_size) + res = sax.fit_transform(X) + want = np.array(expected, dtype=np.float64).T + np.testing.assert_array_equal(res, want) diff --git a/sktime/transformations/series/theta.py b/sktime/transformations/series/theta.py index 38243d91b1a..f980ff6243f 100644 --- a/sktime/transformations/series/theta.py +++ b/sktime/transformations/series/theta.py @@ -60,6 +60,11 @@ class ThetaLinesTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["GuzalBulatova", "mloning"], + # estimator type + # -------------- "scitype:transform-input": "Series", # what is the scitype of X: Series, or Panel "scitype:transform-output": "Series", diff --git a/sktime/transformations/series/time_since.py b/sktime/transformations/series/time_since.py index 21b85c1c645..d7732cdfaf2 100644 --- a/sktime/transformations/series/time_since.py +++ b/sktime/transformations/series/time_since.py @@ -73,6 +73,12 @@ class TimeSince(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["KishManani"], + "maintainers": ["KishManani"], + # estimator type + # -------------- # what is the scitype of X: Series, or Panel "scitype:transform-input": "Series", # what scitype is returned: Primitives, Series, Panel diff --git a/sktime/transformations/series/vmd/_vmd.py b/sktime/transformations/series/vmd/_vmd.py index 155670a7531..3dcc9744c3f 100644 --- a/sktime/transformations/series/vmd/_vmd.py +++ b/sktime/transformations/series/vmd/_vmd.py @@ -97,6 +97,12 @@ class VmdTransformer(BaseTransformer): """ _tags = { + # packaging info + # -------------- + "authors": ["DaneLyttinen", "vcarvo"], + "maintainers": ["DaneLyttinen", "vcarvo"], + # estimator type + # -------------- "scitype:transform-input": "Series", "scitype:transform-output": "Series", "scitype:instancewise": True, diff --git a/sktime/transformations/tests/test_all_transformers.py b/sktime/transformations/tests/test_all_transformers.py index 493fb63d33f..b6a9699ac62 100644 --- a/sktime/transformations/tests/test_all_transformers.py +++ b/sktime/transformations/tests/test_all_transformers.py @@ -78,6 +78,8 @@ def _expected_trafo_output_scitype(self, X_scitype, trafo_input, trafo_output): return "Panel" if X_scitype in ["Panel", "Hierarchical"]: return "Hierarchical" + if trafo_input == "Panel" and trafo_output == "Series": + return "Series" def test_fit_transform_output(self, estimator_instance, scenario): """Test that transform output is of expected scitype.""" diff --git a/sktime/transformations/tests/test_base.py b/sktime/transformations/tests/test_base.py index 4e09393af9f..be3b7f85b4a 100644 --- a/sktime/transformations/tests/test_base.py +++ b/sktime/transformations/tests/test_base.py @@ -35,11 +35,15 @@ TransformerFitTransformSeriesUnivariate, ) from sktime.utils._testing.series import _make_series +from sktime.utils.parallel import _get_parallel_test_fixtures from sktime.utils.validation._dependencies import _check_soft_dependencies # other scenarios that might be needed later in development: # TransformerFitTransformPanelUnivariateWithClassY, +# list of parallelization backends to test +BACKENDS = _get_parallel_test_fixtures("config") + def inner_X_scitypes(est): """Return list of scitypes supported by class est, as list of str.""" @@ -183,7 +187,7 @@ def test_panel_in_panel_out_supported(): # todo: possibly, add mtype check, use metadata return -@pytest.mark.parametrize("backend", [None, "joblib", "loky", "threading"]) +@pytest.mark.parametrize("backend", BACKENDS) def test_panel_in_panel_out_not_supported_but_series(backend): """Test that fit/transform runs and returns the correct output type. @@ -199,7 +203,7 @@ def test_panel_in_panel_out_not_supported_but_series(backend): # one example for a transformer which supports Series internally but not Panel cls = BoxCoxTransformer est = cls.create_test_instance() - est.set_config(**{"backend:parallel": backend}) + est.set_config(**backend.copy()) # ensure cls is a good example, if this fails, choose another example # (if this changes, it may be due to implementing more scitypes) # (then this is not a failure of cls, but we need to choose another example) @@ -253,7 +257,7 @@ def test_series_in_primitives_out_supported_fit_in_transform(): assert len(Xt) == 1 -@pytest.mark.parametrize("backend", [None, "joblib", "loky", "threading"]) +@pytest.mark.parametrize("backend", BACKENDS) def test_panel_in_primitives_out_not_supported_fit_in_transform(backend): """Test that fit/transform runs and returns the correct output type. @@ -269,7 +273,7 @@ def test_panel_in_primitives_out_not_supported_fit_in_transform(backend): # one example for a transformer which supports Series internally but not Panel cls = SummaryTransformer est = cls.create_test_instance() - est.set_config(**{"backend:parallel": backend}) + est.set_config(**backend.copy()) # ensure cls is a good example, if this fails, choose another example # (if this changes, it may be due to implementing more scitypes) # (then this is not a failure of cls, but we need to choose another example) @@ -370,7 +374,7 @@ def test_panel_in_primitives_out_supported_with_y_in_fit_but_not_transform(): assert len(Xt) == 7 -@pytest.mark.parametrize("backend", [None, "joblib", "loky", "threading"]) +@pytest.mark.parametrize("backend", BACKENDS) def test_hierarchical_in_hierarchical_out_not_supported_but_series(backend): """Test that fit/transform runs and returns the correct output type. @@ -386,7 +390,7 @@ def test_hierarchical_in_hierarchical_out_not_supported_but_series(backend): # one example for a transformer which supports Series internally cls = BoxCoxTransformer est = cls.create_test_instance() - est.set_config(**{"backend:parallel": backend}) + est.set_config(**backend.copy()) # ensure cls is a good example, if this fails, choose another example # (if this changes, it may be due to implementing more scitypes) # (then this is not a failure of cls, but we need to choose another example) @@ -444,7 +448,7 @@ def test_hierarchical_in_hierarchical_out_not_supported_but_series_fit_in_transf assert len(Xt) == 2 * 4 * 12 -@pytest.mark.parametrize("backend", [None, "joblib", "loky", "threading"]) +@pytest.mark.parametrize("backend", BACKENDS) def test_vectorization_multivariate_no_row_vectorization(backend): """Test that multivariate vectorization of univariate transformers works. @@ -463,7 +467,7 @@ def test_vectorization_multivariate_no_row_vectorization(backend): # one example for a transformer which supports Series internally cls = BoxCoxTransformer est = cls.create_test_instance() - est.set_config(**{"backend:parallel": backend}) + est.set_config(**backend.copy()) # ensure cls is a good example, if this fails, choose another example # (if this changes, it may be due to implementing multivariate functionality) # (then this is not a failure of cls, but we need to choose another example) @@ -485,7 +489,7 @@ def test_vectorization_multivariate_no_row_vectorization(backend): assert len(Xt.columns) == len(scenario.args["fit"]["X"].columns) -@pytest.mark.parametrize("backend", [None, "joblib", "loky", "threading"]) +@pytest.mark.parametrize("backend", BACKENDS) def test_vectorization_multivariate_and_hierarchical(backend): """Test that fit/transform runs and returns the correct output type. @@ -504,7 +508,7 @@ def test_vectorization_multivariate_and_hierarchical(backend): # one example for a transformer which supports Series internally cls = BoxCoxTransformer est = cls.create_test_instance() - est.set_config(**{"backend:parallel": backend}) + est.set_config(**backend.copy()) # ensure cls is a good example, if this fails, choose another example # (if this changes, it may be due to implementing more scitypes) # (then this is not a failure of cls, but we need to choose another example) @@ -528,7 +532,7 @@ def test_vectorization_multivariate_and_hierarchical(backend): assert len(Xt.columns) == len(scenario.args["fit"]["X"].columns) -@pytest.mark.parametrize("backend", [None, "joblib", "loky", "threading"]) +@pytest.mark.parametrize("backend", BACKENDS) def test_vectorization_multivariate_no_row_vectorization_empty_fit(backend): """Test that multivariate vectorization of univariate transformers works. @@ -547,7 +551,7 @@ def test_vectorization_multivariate_no_row_vectorization_empty_fit(backend): # one example for a transformer which supports Series internally cls = BoxCoxTransformer est = FitInTransform(cls.create_test_instance()) - est.set_config(**{"backend:parallel": backend}) + est.set_config(**backend.copy()) # ensure cls is a good example, if this fails, choose another example # (if this changes, it may be due to implementing multivariate functionality) # (then this is not a failure of cls, but we need to choose another example) @@ -569,7 +573,7 @@ def test_vectorization_multivariate_no_row_vectorization_empty_fit(backend): assert len(Xt.columns) == len(scenario.args["fit"]["X"].columns) -@pytest.mark.parametrize("backend", [None, "joblib", "loky", "threading"]) +@pytest.mark.parametrize("backend", BACKENDS) def test_vectorization_multivariate_and_hierarchical_empty_fit(backend): """Test that fit/transform runs and returns the correct output type. @@ -588,7 +592,7 @@ def test_vectorization_multivariate_and_hierarchical_empty_fit(backend): # one example for a transformer which supports Series internally cls = BoxCoxTransformer est = FitInTransform(cls.create_test_instance()) - est.set_config(**{"backend:parallel": backend}) + est.set_config(**backend.copy()) # ensure cls is a good example, if this fails, choose another example # (if this changes, it may be due to implementing more scitypes) # (then this is not a failure of cls, but we need to choose another example) diff --git a/sktime/transformations/tests/test_compose.py b/sktime/transformations/tests/test_compose.py index 92cb635b5a5..0b64a605be9 100644 --- a/sktime/transformations/tests/test_compose.py +++ b/sktime/transformations/tests/test_compose.py @@ -5,10 +5,12 @@ __all__ = [] import pandas as pd +import pytest from sklearn.preprocessing import StandardScaler from sktime.datasets import load_airline from sktime.datatypes import get_examples +from sktime.transformations.bootstrap import STLBootstrapTransformer from sktime.transformations.compose import ( FeatureUnion, InvertTransform, @@ -22,8 +24,9 @@ from sktime.transformations.series.subset import ColumnSelect from sktime.transformations.series.summarize import SummaryTransformer from sktime.transformations.series.theta import ThetaLinesTransformer -from sktime.utils._testing.deep_equals import deep_equals from sktime.utils._testing.estimator_checks import _assert_array_almost_equal +from sktime.utils.deep_equals import deep_equals +from sktime.utils.validation._dependencies import _check_estimator_deps def test_dunder_mul(): @@ -83,8 +86,28 @@ def test_dunder_add(): _assert_array_almost_equal(t123r.fit_transform(X), t123.fit_transform(X)) +def test_add_sklearn_autoadapt(): + """Test the add dunder method, with sklearn coercion.""" + X = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + + t1 = ExponentTransformer(power=2) + t2 = StandardScaler() + t3 = ExponentTransformer(power=3) + + t123 = t1 + t2 + t3 + t123r = t1 + (t2 + t3) + t123l = (t1 + t2) + t3 + + assert isinstance(t123, FeatureUnion) + assert isinstance(t123r, FeatureUnion) + assert isinstance(t123l, FeatureUnion) + + _assert_array_almost_equal(t123.fit_transform(X), t123l.fit_transform(X)) + _assert_array_almost_equal(t123r.fit_transform(X), t123l.fit_transform(X)) + + def test_mul_sklearn_autoadapt(): - """Test auto-adapter for sklearn in mul.""" + """Test the mul dunder method, with sklearn coercion.""" X = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) t1 = ExponentTransformer(power=2) @@ -262,3 +285,26 @@ def test_dunder_neg(): assert isinstance(tp.get_params()["transformer"], ExponentTransformer) _assert_array_almost_equal(tp.fit_transform(X), X) + + +@pytest.mark.skipif( + not _check_estimator_deps(STLBootstrapTransformer, severity="none"), + reason="skip test if required soft dependency for statsmodels not available", +) +def test_input_output_series_panel_chain(): + """Test that series-to-panel can be chained with series-to-series trafos. + + Failure case of #5624. + """ + from sktime.datasets import load_airline + from sktime.transformations.series.impute import Imputer + + X = load_airline() + bootstrap_trafo = STLBootstrapTransformer(4, sp=4) * Imputer(method="nearest") + + assert bootstrap_trafo.get_tags()["scitype:transform-input"] == "Series" + assert bootstrap_trafo.get_tags()["scitype:transform-output"] == "Panel" + + Xt = bootstrap_trafo.fit_transform(X) + assert isinstance(Xt, pd.DataFrame) + assert isinstance(Xt.index, pd.MultiIndex) diff --git a/sktime/utils/_maint/_show_versions.py b/sktime/utils/_maint/_show_versions.py index d45caa54679..cc2c621ced6 100644 --- a/sktime/utils/_maint/_show_versions.py +++ b/sktime/utils/_maint/_show_versions.py @@ -75,7 +75,7 @@ def _get_deps_info(deps=None): deps = ["sktime"] def get_version(module): - return module.__version__ + return getattr(module, "__version__", None) deps_info = {} @@ -85,10 +85,11 @@ def get_version(module): mod = sys.modules[modname] else: mod = importlib.import_module(modname) - ver = get_version(mod) - deps_info[modname] = ver except ImportError: deps_info[modname] = None + else: + ver = get_version(mod) + deps_info[modname] = ver return deps_info diff --git a/sktime/utils/_maint/tests/test_show_versions.py b/sktime/utils/_maint/tests/test_show_versions.py index 0953f97a345..25c7a06b639 100644 --- a/sktime/utils/_maint/tests/test_show_versions.py +++ b/sktime/utils/_maint/tests/test_show_versions.py @@ -1,4 +1,6 @@ """Tests for the show_versions utility.""" +import pathlib +import uuid from sktime.utils._maint._show_versions import ( DEFAULT_DEPS_TO_SHOW, @@ -31,3 +33,15 @@ def test_deps_info(): assert _check_soft_dependencies(f"{key}=={deps_info_default[key]}") deps_single_key = _get_deps_info([key]) assert set(deps_single_key.keys()) == {key} + + +def test_deps_info_deps_missing_package_present_directory(): + """Test that _get_deps_info does not fail if a dependency is missing.""" + dummy_package_name = uuid.uuid4().hex + + dummy_folder_path = pathlib.Path(dummy_package_name) + dummy_folder_path.mkdir() + + assert _get_deps_info([dummy_package_name]) == {dummy_package_name: None} + + dummy_folder_path.rmdir() diff --git a/sktime/utils/_testing/deep_equals.py b/sktime/utils/_testing/deep_equals.py index 84494ad5987..f3f81adbc8e 100644 --- a/sktime/utils/_testing/deep_equals.py +++ b/sktime/utils/_testing/deep_equals.py @@ -16,9 +16,86 @@ import pandas as pd +# todo 0.27.0: check whether scikit-base>=0.6.1 lower bound is 0.6.1 or higher +# if yes, remove this legacy function and use the new one from sktime.utils.deep_equals def deep_equals(x, y, return_msg=False): """Test two objects for equality in value. + Correct if x/y are one of the following valid types: + types compatible with != comparison + pd.Series, pd.DataFrame, np.ndarray + lists, tuples, or dicts of a valid type (recursive) + delayed types that result in the above when calling .compute(), e.g., dask df + + Important note: + this function will return "not equal" if types of x,y are different + for instant, bool and numpy.bool are *not* considered equal + + Parameters + ---------- + x : object + y : object + return_msg : bool, optional, default=False + whether to return informative message about what is not equal + + Returns + ------- + is_equal: bool - True if x and y are equal in value + x and y do not need to be equal in reference + msg : str, only returned if return_msg = True + indication of what is the reason for not being equal + concatenation of the following strings: + .type - type is not equal + .len - length is not equal + .value - value is not equal + .keys - if dict, keys of dict are not equal + if class/object, names of attributes and methods are not equal + .dtype - dtype of pandas or numpy object is not equal + .index - index of pandas object is not equal + .series_equals, .df_equals, .index_equals - .equals of pd returns False + [i] - if tuple/list: i-th element not equal + [key] - if dict: value at key is not equal + [colname] - if pandas.DataFrame: column with name colname is not equal + != - call to generic != returns False + """ + from sktime.utils.validation._dependencies import _check_soft_dependencies + from sktime.utils.warnings import warn + + deprec_explain = ( + "The legacy deep_equals from sktime.utils._testing.deep_equals is " + "deprecated and should be replaced by the new deep_equals," + " from sktime.utils.deep_equals, which requires scikit-base>=0.6.1. " + ) + + removal_schedule = ( + "The legacy deep_equals is not scheduled for removal yet, this " + "warning will change to specify a removal date when it is scheduled." + ) + + if _check_soft_dependencies( + "scikit-base>=0.6.1", + package_import_alias={"scikit-base": "skbase"}, + ): + env_msg = ( + "As you have scikit-base>=0.6.1, please update your imports to use the " + "new deep_equals utility. " + ) + else: + env_msg = ( + "As you have scikit-base<0.6.1, please consider updating your environment " + "to scikit-base>=0.6.1, and update your imports to use the " + "new deep_equals utility. " + ) + + msg = deprec_explain + env_msg + removal_schedule + warn(msg, FutureWarning, stacklevel=2) + + return deep_equals_legacy(x, y, return_msg=return_msg) + + +def deep_equals_legacy(x, y, return_msg=False): + """Test two objects for equality in value. + Correct if x/y are one of the following valid types: types compatible with != comparison pd.Series, pd.DataFrame, np.ndarray @@ -98,11 +175,67 @@ def ret(is_equal, msg="", string_arguments: list = None): else: return ret(x.equals(y), ".series_equals, x = {} != y = {}", [x, y]) elif isinstance(x, pd.DataFrame): + # check column names for equality if not x.columns.equals(y.columns): return ret( False, f".columns, x.columns = {x.columns} != y.columns = {y.columns}" ) - # if columns are equal and at least one is object, recurse over Series + # check dtypes for equality + if not x.dtypes.equals(y.dtypes): + return ret( + False, f".dtypes, x.dtypes = {x.dtypes} != y.dtypes = {y.dtypes}" + ) + # check index for equality + # we are not recursing due to ambiguity in integer index types + # which may differ from pandas version to pandas version + # and would upset the type check, e.g., RangeIndex(2) vs Index([0, 1]) + xix = x.index + yix = y.index + if hasattr(xix, "dtype") and hasattr(xix, "dtype"): + if not xix.dtype == yix.dtype: + return ret( + False, + ".index.dtype, x.index.dtype = {} != y.index.dtype = {}", + [xix.dtype, yix.dtype], + ) + if hasattr(xix, "dtypes") and hasattr(yix, "dtypes"): + if not x.dtypes.equals(y.dtypes): + return ret( + False, + ".index.dtypes, x.dtypes = {} != y.index.dtypes = {}", + [xix.dtypes, yix.dtypes], + ) + ix_eq = xix.equals(yix) + if not ix_eq: + if not len(xix) == len(yix): + return ret( + False, + ".index.len, x.index.len = {} != y.index.len = {}", + [len(xix), len(yix)], + ) + if hasattr(xix, "name") and hasattr(yix, "name"): + if not xix.name == yix.name: + return ret( + False, + ".index.name, x.index.name = {} != y.index.name = {}", + [xix.name, yix.name], + ) + if hasattr(xix, "names") and hasattr(yix, "names"): + if not len(xix.names) == len(yix.names): + return ret( + False, + ".index.names, x.index.names = {} != y.index.name = {}", + [xix.names, yix.names], + ) + if not np.all(xix.names == yix.names): + return ret( + False, + ".index.names, x.index.names = {} != y.index.name = {}", + [xix.names, yix.names], + ) + elts_eq = np.all(xix == yix) + return ret(elts_eq, ".index.equals, x = {} != y = {}", [xix, yix]) + # if columns, dtypes are equal and at least one is object, recurse over Series if sum(x.dtypes == "object") > 0: for c in x.columns: is_equal, msg = deep_equals(x[c], y[c], return_msg=True) @@ -112,11 +245,22 @@ def ret(is_equal, msg="", string_arguments: list = None): else: return ret(x.equals(y), ".df_equals, x = {} != y = {}", [x, y]) elif isinstance(x, pd.Index): + if hasattr(x, "dtype") and hasattr(y, "dtype"): + if not x.dtype == y.dtype: + return ret(False, f".dtype, x.dtype = {x.dtype} != y.dtype = {y.dtype}") + if hasattr(x, "dtypes") and hasattr(y, "dtypes"): + if not x.dtypes.equals(y.dtypes): + return ret( + False, f".dtypes, x.dtypes = {x.dtypes} != y.dtypes = {y.dtypes}" + ) return ret(x.equals(y), ".index_equals, x = {} != y = {}", [x, y]) elif isinstance(x, np.ndarray): if x.dtype != y.dtype: return ret(False, f".dtype, x.dtype = {x.dtype} != y.dtype = {y.dtype}") - return ret(np.array_equal(x, y, equal_nan=True), ".values") + if x.dtype in ["object", "str"]: + return ret(np.array_equal(x, y), ".values") + else: + return ret(np.array_equal(x, y, equal_nan=True), ".values") # recursion through lists, tuples and dicts elif isinstance(x, (list, tuple)): return ret(*_tuple_equals(x, y, return_msg=True)) diff --git a/sktime/utils/_testing/estimator_checks.py b/sktime/utils/_testing/estimator_checks.py index a275d8d644e..ff56eb21fd1 100644 --- a/sktime/utils/_testing/estimator_checks.py +++ b/sktime/utils/_testing/estimator_checks.py @@ -20,6 +20,7 @@ from sktime.datatypes._panel._check import is_nested_dataframe from sktime.dists_kernels import BasePairwiseTransformer, BasePairwiseTransformerPanel from sktime.forecasting.base import BaseForecaster +from sktime.registry import scitype from sktime.regression.base import BaseRegressor from sktime.tests._config import VALID_ESTIMATOR_TYPES from sktime.transformations.base import BaseTransformer @@ -216,6 +217,11 @@ def get_tag(est, tag_name, tag_value_default=None): return True return get_tag(est, "capability:pred_int", False) # skip transform for forecasters that have it - pipelines - if method == "transform" and isinstance(est, BaseForecaster): + if method == "transform" and scitype(est) in ( + "classifier", + "forecaster", + ): + return False + if method == "predict" and scitype(est) == "transformer": return False return True diff --git a/sktime/utils/_testing/scenarios_classification.py b/sktime/utils/_testing/scenarios_classification.py index ae12056bc14..dedff1ed032 100644 --- a/sktime/utils/_testing/scenarios_classification.py +++ b/sktime/utils/_testing/scenarios_classification.py @@ -14,9 +14,7 @@ from inspect import isclass from sktime.base import BaseObject -from sktime.classification.base import BaseClassifier -from sktime.classification.early_classification import BaseEarlyClassifier -from sktime.regression.base import BaseRegressor +from sktime.registry import scitype from sktime.utils._testing.hierarchical import _make_hierarchical from sktime.utils._testing.panel import _make_classification_y, _make_panel_X from sktime.utils._testing.scenarios import TestScenario @@ -71,12 +69,14 @@ def get_tag(obj, tag_name): else: return obj.get_tag(tag_name) - regr_or_classf = (BaseClassifier, BaseEarlyClassifier, BaseRegressor) - # applicable only if obj inherits from BaseClassifier, BaseEarlyClassifier or # BaseRegressor. currently we test both classifiers and regressors using these # scenarios - if not isinstance(obj, regr_or_classf) and not issubclass(obj, regr_or_classf): + if scitype(obj) not in ( + "classifier", + "early_classifier", + "regressor", + ): return False # if X is multivariate, applicable only if can handle multivariate diff --git a/sktime/utils/_testing/scenarios_forecasting.py b/sktime/utils/_testing/scenarios_forecasting.py index 2dc06aecd25..9281153fde7 100644 --- a/sktime/utils/_testing/scenarios_forecasting.py +++ b/sktime/utils/_testing/scenarios_forecasting.py @@ -18,7 +18,7 @@ from sktime.base import BaseObject from sktime.datatypes import mtype_to_scitype -from sktime.forecasting.base import BaseForecaster +from sktime.registry import scitype from sktime.utils._testing.hierarchical import _make_hierarchical from sktime.utils._testing.panel import _make_panel_X from sktime.utils._testing.scenarios import TestScenario @@ -49,7 +49,7 @@ def get_tag(obj, tag_name): return obj.get_tag(tag_name) # applicable only if obj inherits from BaseForecaster - if not isinstance(obj, BaseForecaster) and not issubclass(obj, BaseForecaster): + if scitype(obj) != "forecaster": return False # applicable only if number of variables in y complies with scitype:y diff --git a/sktime/utils/_testing/tests/test_panel.py b/sktime/utils/_testing/tests/test_panel.py index c791eaa571d..788c7f057cf 100644 --- a/sktime/utils/_testing/tests/test_panel.py +++ b/sktime/utils/_testing/tests/test_panel.py @@ -58,7 +58,9 @@ def test_make_panel(n_instances, n_columns, n_timepoints, return_mtype): return_mtype=return_mtype, ) - valid, _, metadata = check_is_mtype(X, mtype=return_mtype, return_metadata=True) + valid, _, metadata = check_is_mtype( + X, mtype=return_mtype, return_metadata=True, msg_return_dict="list" + ) msg = f"_make_panel_X generated data does not comply with mtype {return_mtype}" assert valid, msg assert metadata["n_instances"] == n_instances diff --git a/sktime/utils/deep_equals/__init__.py b/sktime/utils/deep_equals/__init__.py new file mode 100644 index 00000000000..cdbd0571aff --- /dev/null +++ b/sktime/utils/deep_equals/__init__.py @@ -0,0 +1,19 @@ +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) +"""Module for nested equality checking.""" +from sktime.utils.validation._dependencies import _check_soft_dependencies + +# todo 0.27.0: check whether scikit-base>=0.6.1 lower bound is 0.6.1 or higher +# if yes, remove legacy handling and only use the new deep_equals +if _check_soft_dependencies( + "scikit-base<0.6.1", + package_import_alias={"scikit-base": "skbase"}, + severity="none", +): + from sktime.utils._testing.deep_equals import deep_equals + +else: + from sktime.utils.deep_equals._deep_equals import deep_equals + +__all__ = [ + "deep_equals", +] diff --git a/sktime/utils/deep_equals/_deep_equals.py b/sktime/utils/deep_equals/_deep_equals.py new file mode 100644 index 00000000000..1d7d3397767 --- /dev/null +++ b/sktime/utils/deep_equals/_deep_equals.py @@ -0,0 +1,187 @@ +"""Testing utility to compare equality in value for nested objects. + +Objects compared can have one of the following valid types: + types compatible with != comparison + pd.Series, pd.DataFrame, np.ndarray + lists, tuples, or dicts of a valid type (recursive) + polars.DataFrame, polars.LazyFrame +""" +from skbase.utils.deep_equals._common import _make_ret +from skbase.utils.deep_equals._deep_equals import deep_equals as _deep_equals + +__author__ = ["fkiraly"] +__all__ = ["deep_equals"] + + +def deep_equals(x, y, return_msg=False, plugins=None): + """Test two objects for equality in value. + + Correct if x/y are one of the following valid types: + types compatible with != comparison + pd.Series, pd.DataFrame, np.ndarray + lists, tuples, or dicts of a valid type (recursive) + + Important note: + this function will return "not equal" if types of x,y are different + for instant, bool and numpy.bool are *not* considered equal + + Parameters + ---------- + x : object + y : object + return_msg : bool, optional, default=False + whether to return informative message about what is not equal + plugins : list, optional, default=None + optional additional deep_equals plugins to use + will be appended to the default plugins from ``skbase`` ``deep_equals_custom`` + see ``skbase`` ``deep_equals_custom`` for details of signature of plugins + + Returns + ------- + is_equal: bool - True if x and y are equal in value + x and y do not need to be equal in reference + msg : str, only returned if return_msg = True + indication of what is the reason for not being equal + concatenation of the following strings: + .type - type is not equal + .len - length is not equal + .value - value is not equal + .keys - if dict, keys of dict are not equal + if class/object, names of attributes and methods are not equal + .dtype - dtype of pandas or numpy object is not equal + .index - index of pandas object is not equal + .series_equals, .df_equals, .index_equals - .equals of pd returns False + [i] - if tuple/list: i-th element not equal + [key] - if dict: value at key is not equal + [colname] - if pandas.DataFrame: column with name colname is not equal + != - call to generic != returns False + """ + # call deep_equals_custom with default plugins + plugins_default = [ + _csr_matrix_equals_plugin, + _dask_dataframe_equals_plugin, + _fh_equals_plugin, + ] + + if plugins is not None: + plugins_inner = plugins_default + plugins + else: + plugins_inner = plugins_default + + res = _deep_equals(x, y, return_msg=return_msg, plugins=plugins_inner) + return res + + +def _fh_equals_plugin(x, y, return_msg=False, deep_equals=None): + """Test two forecasting horizons for equality. + + Correct if both x and y are ForecastingHorizon. + + Parameters + ---------- + x: ForcastingHorizon + y: ForcastingHorizon + return_msg : bool, optional, default=False + whether to return informative message about what is not equal + + Returns + ------- + is_equal: bool - True if x and y are equal in value + x and y do not need to be equal in reference + msg : str, only returned if return_msg = True + indication of what is the reason for not being equal + concatenation of the following strings: + .is_relative - x is absolute and y is relative, or vice versa + .values - values of x and y are not equal + """ + if type(x).__name__ != "ForecastingHorizon": + return None + + ret = _make_ret(return_msg) + + if x.is_relative != y.is_relative: + return ret(False, ".is_relative") + + # recurse through values of x, y + is_equal, msg = deep_equals(x._values, y._values, return_msg=True) + if not is_equal: + return ret(False, ".values" + msg) + + return ret(True, "") + + +def _csr_matrix_equals_plugin(x, y, return_msg=False, deep_equals=None): + """Test two scipy csr_matrix for equality. + + Correct if both x and y are csr_matrix. + + Parameters + ---------- + x: csr_matrix + y: csr_matrix + return_msg : bool, optional, default=False + whether to return informative message about what is not equal + + Returns + ------- + is_equal: bool - True if x and y are equal in value + x and y do not need to be equal in reference + msg : str, only returned if return_msg = True + indication of what is the reason for not being equal + if unequal, returns string + returns None if this function does not apply, i.e., x is not dask + """ + if type(x).__name__ != "csr_matrix": # isinstance(x, csr_matrix): + return None + + import numpy as np + + ret = _make_ret(return_msg) + + # csr-matrix must not be compared using np.any(x!=y) + if not np.allclose(x.A, y.A): + return ret(False, " !=, {} != {}", [x, y]) + + return ret(True, "") + + +def _dask_dataframe_equals_plugin(x, y, return_msg=False, deep_equals=None): + """Test two dask dataframes for equality. + + Correct if both x and y are dask.dataframe. + + Parameters + ---------- + x: dask.dataframe + y: dask.dataframe + return_msg : bool, optional, default=False + whether to return informative message about what is not equal + + Returns + ------- + is_equal: bool - True if x and y are equal in value + x and y do not need to be equal in reference + msg : str, only returned if return_msg = True + indication of what is the reason for not being equal + if unequal, returns string + returns None if this function does not apply, i.e., x is not dask.dataframe + """ + if not hasattr(x, "compute"): + return None + + from sktime.utils.validation._dependencies import _check_soft_dependencies + + dask_available = _check_soft_dependencies("dask", severity="none") + + if not dask_available: + return None + + import dask + + if not isinstance(x, dask.dataframe.DataFrame): + return None + + x = x.compute() + y = y.compute() + + return deep_equals(x, y, return_msg=return_msg) diff --git a/sktime/utils/deep_equals/tests/__init__.py b/sktime/utils/deep_equals/tests/__init__.py new file mode 100644 index 00000000000..b55ccc21978 --- /dev/null +++ b/sktime/utils/deep_equals/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for deep_equals utility.""" diff --git a/sktime/utils/_testing/tests/test_deep_equals.py b/sktime/utils/deep_equals/tests/test_deep_equals.py similarity index 85% rename from sktime/utils/_testing/tests/test_deep_equals.py rename to sktime/utils/deep_equals/tests/test_deep_equals.py index 9d16330eb17..7b1b849f88f 100644 --- a/sktime/utils/_testing/tests/test_deep_equals.py +++ b/sktime/utils/deep_equals/tests/test_deep_equals.py @@ -4,9 +4,10 @@ import numpy as np import pandas as pd import pytest +from scipy.sparse import csr_matrix from sktime.forecasting.base import ForecastingHorizon -from sktime.utils._testing.deep_equals import deep_equals +from sktime.utils.deep_equals._deep_equals import deep_equals # examples used for comparison below EXAMPLES = [ @@ -26,6 +27,11 @@ ForecastingHorizon([1, 2, 3], is_relative=False), {"foo": [42], "bar": pd.Series([1, 2])}, {"bar": [42], "foo": pd.Series([1, 2])}, + csr_matrix((3, 4), dtype=np.int8), + pd.Index([1, 2, 3]), + pd.Index([2, 3, 4]), + np.array([0.1, 1], dtype="object"), + np.array([0.2, 1], dtype="object"), ] diff --git a/sktime/utils/estimator_checks.py b/sktime/utils/estimator_checks.py index 46319ee11cd..1d05c65dccc 100644 --- a/sktime/utils/estimator_checks.py +++ b/sktime/utils/estimator_checks.py @@ -3,8 +3,6 @@ __author__ = ["fkiraly"] __all__ = ["check_estimator"] -from inspect import isclass - from sktime.utils.validation._dependencies import _check_soft_dependencies @@ -108,70 +106,14 @@ def check_estimator( ) _check_soft_dependencies("pytest", msg=msg) - from sktime.alignment.tests.test_all_aligners import TestAllAligners - from sktime.base import BaseEstimator - from sktime.classification.early_classification.tests.test_all_early_classifiers import ( # noqa E501 - TestAllEarlyClassifiers, - ) - from sktime.classification.tests.test_all_classifiers import TestAllClassifiers - from sktime.dists_kernels.tests.test_all_dist_kernels import ( - TestAllPairwiseTransformers, - TestAllPanelTransformers, - ) - from sktime.forecasting.tests.test_all_forecasters import TestAllForecasters - from sktime.param_est.tests.test_all_param_est import TestAllParamFitters - from sktime.proba.tests.test_all_distrs import TestAllDistributions - from sktime.registry import scitype - from sktime.regression.tests.test_all_regressors import TestAllRegressors - from sktime.tests.test_all_estimators import TestAllEstimators, TestAllObjects - from sktime.transformations.tests.test_all_transformers import TestAllTransformers - - testclass_dict = dict() - testclass_dict["aligner"] = TestAllAligners - testclass_dict["classifier"] = TestAllClassifiers - testclass_dict["distribution"] = TestAllDistributions - testclass_dict["early_classifier"] = TestAllEarlyClassifiers - testclass_dict["forecaster"] = TestAllForecasters - testclass_dict["param_est"] = TestAllParamFitters - testclass_dict["regressor"] = TestAllRegressors - testclass_dict["transformer"] = TestAllTransformers - testclass_dict["transformer-pairwise"] = TestAllPairwiseTransformers - testclass_dict["transformer-pairwise-panel"] = TestAllPanelTransformers - - results = TestAllObjects().run_tests( - estimator=estimator, - raise_exceptions=raise_exceptions, - tests_to_run=tests_to_run, - fixtures_to_run=fixtures_to_run, - tests_to_exclude=tests_to_exclude, - fixtures_to_exclude=fixtures_to_exclude, - ) - - def is_estimator(obj): - """Return whether obj is an estimator class or estimator object.""" - if isclass(obj): - return issubclass(obj, BaseEstimator) - else: - return isinstance(obj, BaseEstimator) + from sktime.tests.test_class_register import get_test_classes_for_obj - if is_estimator(estimator): - results_estimator = TestAllEstimators().run_tests( - estimator=estimator, - raise_exceptions=raise_exceptions, - tests_to_run=tests_to_run, - fixtures_to_run=fixtures_to_run, - tests_to_exclude=tests_to_exclude, - fixtures_to_exclude=fixtures_to_exclude, - ) - results.update(results_estimator) + test_clss_for_est = get_test_classes_for_obj(estimator) - try: - scitype_of_estimator = scitype(estimator) - except Exception: - scitype_of_estimator = "" + results = {} - if scitype_of_estimator in testclass_dict.keys(): - results_scitype = testclass_dict[scitype_of_estimator]().run_tests( + for test_cls in test_clss_for_est: + test_cls_results = test_cls().run_tests( estimator=estimator, raise_exceptions=raise_exceptions, tests_to_run=tests_to_run, @@ -179,7 +121,7 @@ def is_estimator(obj): tests_to_exclude=tests_to_exclude, fixtures_to_exclude=fixtures_to_exclude, ) - results.update(results_scitype) + results.update(test_cls_results) failed_tests = [key for key in results.keys() if results[key] != "PASSED"] if len(failed_tests) > 0: diff --git a/sktime/utils/estimators/tests/test_forecasters.py b/sktime/utils/estimators/tests/test_forecasters.py index 686c481f6a7..2e2419ce2ad 100644 --- a/sktime/utils/estimators/tests/test_forecasters.py +++ b/sktime/utils/estimators/tests/test_forecasters.py @@ -8,7 +8,6 @@ from sktime.datasets import load_airline from sktime.forecasting.base import ForecastingHorizon -from sktime.utils._testing.deep_equals import deep_equals from sktime.utils.estimators import MockUnivariateForecasterLogger y_series = load_airline().iloc[:-5] @@ -39,6 +38,8 @@ def test_mock_univariate_forecaster_log(y, X_train, X_pred, fh): - All the private methods that have logging enabled are in the log - the correct inner mtypes are preserved, according to the forecaster tags """ + from sktime.utils.deep_equals import deep_equals + forecaster = MockUnivariateForecasterLogger() forecaster.fit(y, X_train, fh) forecaster.predict(fh, X_pred) diff --git a/sktime/utils/git_diff.py b/sktime/utils/git_diff.py index a88a7e9ac6a..fb0b29a70fb 100644 --- a/sktime/utils/git_diff.py +++ b/sktime/utils/git_diff.py @@ -78,3 +78,83 @@ class to get module string from, e.g., NaiveForecaster """ module_str = get_module_from_class(cls) return is_module_changed(module_str) + + +def get_changed_lines(file_path, only_indented=True): + """Get changed or added lines from a file. + + Compares the current branch to the origin-main branch. + + Parameters + ---------- + file_path : str + path to file to get changed lines from + only_indented : bool, default=True + if True, only indented lines are returned, otherwise all lines are returned; + more precisely, only changed/added lines starting with a space are returned + + Returns + ------- + list of str : changed or added lines on current branch + """ + cmd = f"git diff remotes/origin/main -- {file_path}" + + try: + # Run 'git diff' command to get the changes in the specified file + result = subprocess.check_output(cmd, shell=True, text=True) + + # if only indented lines are requested, add space to start_chars + start_chars = "+" + if only_indented: + start_chars += " " + + # Extract the changed or new lines and return as a list of strings + changed_lines = [ + line.strip() for line in result.split("\n") if line.startswith(start_chars) + ] + # remove first character ('+') from each line + changed_lines = [line[1:] for line in changed_lines] + + return changed_lines + + except subprocess.CalledProcessError: + return [] + + +def get_packages_with_changed_specs(): + """Get packages with changed or added specs. + + Returns + ------- + list of str : names of packages with changed or added specs + """ + from packaging.requirements import Requirement + + changed_lines = get_changed_lines("pyproject.toml") + + packages = [] + for line in changed_lines: + if line.find("'") > line.find('"') and line.find('"') != -1: + sep = '"' + elif line.find("'") == -1: + sep = '"' + else: + sep = "'" + + splits = line.split(sep) + if len(splits) < 2: + continue + + req = line.split(sep)[1] + + # deal with ; python_version >= "3.7" in requirements + if ";" in req: + req = req.split(";")[0] + + pkg = Requirement(req).name + packages.append(pkg) + + # make unique + packages = list(set(packages)) + + return packages diff --git a/sktime/utils/index.py b/sktime/utils/index.py new file mode 100644 index 00000000000..c9c4cde9f68 --- /dev/null +++ b/sktime/utils/index.py @@ -0,0 +1,20 @@ +"""Utility functions for working with indices.""" + +import numpy as np + + +def random_ss_ix(ix, size, replace=True): + """Randomly uniformly sample indices from a list of indices. + + Parameters + ---------- + ix : pd.Index or subsettable iterable via getitem + list of indices to sample from + size : int + number of indices to sample + replace : bool, default=True + whether to sample with replacement + """ + a = range(len(ix)) + ixs = ix[np.random.choice(a, size=size, replace=replace)] + return ixs diff --git a/sktime/utils/mlflow_sktime.py b/sktime/utils/mlflow_sktime.py index 7c720bcdf52..4124bda414b 100644 --- a/sktime/utils/mlflow_sktime.py +++ b/sktime/utils/mlflow_sktime.py @@ -33,7 +33,7 @@ `pyfunc.predict()` will return output from sktime `predict()` method. """ -__author__ = ["benjaminbluhm"] +__author__ = ["benjaminbluhm", "achieveordie"] __all__ = [ "get_default_pip_requirements", "get_default_conda_env", @@ -44,17 +44,17 @@ import logging import os -import pickle import pandas as pd import yaml import sktime from sktime import utils +from sktime.base._serialize import load from sktime.utils.multiindex import flatten_multiindex -from sktime.utils.validation._dependencies import _check_soft_dependencies +from sktime.utils.validation._dependencies import _check_mlflow_dependencies -if _check_soft_dependencies("mlflow", severity="warning"): +if _check_mlflow_dependencies(severity="warning"): from mlflow import pyfunc FLAVOR_NAME = "mlflow_sktime" @@ -93,7 +93,7 @@ def get_default_pip_requirements(include_cloudpickle=False): Calls to :func:`save_model()` and :func:`log_model()` produce a pip environment that, at a minimum, contains these requirements. """ - _check_soft_dependencies("mlflow", severity="error") + _check_mlflow_dependencies(severity="error") from mlflow.utils.requirements_utils import _get_pinned_requirement pip_deps = [_get_pinned_requirement("sktime")] @@ -111,7 +111,7 @@ def get_default_conda_env(include_cloudpickle=False): The default Conda environment for MLflow Models produced by calls to :func:`save_model()` and :func:`log_model()` """ - _check_soft_dependencies("mlflow", severity="error") + _check_mlflow_dependencies(severity="error") from mlflow.utils.environment import _mlflow_conda_env return _mlflow_conda_env( @@ -211,7 +211,7 @@ def save_model( >>> loaded_model = mlflow_sktime.load_model(model_uri=model_path) # doctest: +SKIP >>> loaded_model.predict(fh=[1, 2, 3]) # doctest: +SKIP """ # noqa: E501 - _check_soft_dependencies("mlflow", severity="error") + _check_mlflow_dependencies(severity="error") from mlflow.exceptions import MlflowException from mlflow.models import Model from mlflow.models.model import MLMODEL_FILE_NAME @@ -258,7 +258,7 @@ def save_model( if input_example is not None: _save_example(mlflow_model, input_example, path) - model_data_subpath = "model.pkl" + model_data_subpath = "model" model_data_path = os.path.join(path, model_data_subpath) _save_model( sktime_model, model_data_path, serialization_format=serialization_format @@ -420,7 +420,7 @@ def log_model( ... sktime_model=forecaster, ... artifact_path=artifact_path) # doctest: +SKIP """ # noqa: E501 - _check_soft_dependencies("mlflow", severity="error") + _check_mlflow_dependencies(severity="error") from mlflow.models import Model if await_registration_for is None: @@ -493,7 +493,7 @@ def load_model(model_uri, dst_path=None): ... path=model_path) >>> loaded_model = mlflow_sktime.load_model(model_uri=model_path) # doctest: +SKIP """ # noqa: E501 - _check_soft_dependencies("mlflow", severity="error") + _check_mlflow_dependencies(severity="error") from mlflow.tracking.artifact_utils import _download_artifact_from_uri from mlflow.utils.model_utils import ( _add_code_from_conf_to_system_path, @@ -519,30 +519,20 @@ def load_model(model_uri, dst_path=None): def _save_model(model, path, serialization_format): - _check_soft_dependencies("mlflow", severity="error") + _check_mlflow_dependencies(severity="error") from mlflow.exceptions import MlflowException from mlflow.protos.databricks_pb2 import INTERNAL_ERROR - with open(path, "wb") as out: - if serialization_format == SERIALIZATION_FORMAT_PICKLE: - pickle.dump(model, out) - elif serialization_format == SERIALIZATION_FORMAT_CLOUDPICKLE: - _check_soft_dependencies("cloudpickle", severity="error") - import cloudpickle - - cloudpickle.dump(model, out) - else: - raise MlflowException( - message="Unrecognized serialization format: " - "{serialization_format}".format( - serialization_format=serialization_format - ), - error_code=INTERNAL_ERROR, - ) + if serialization_format not in SUPPORTED_SERIALIZATION_FORMATS: + raise MlflowException( + message="Unrecognized serialization format: " f"{serialization_format}.", + error_code=INTERNAL_ERROR, + ) + model.save(path=path, serialization_format=serialization_format) def _load_model(path, serialization_format): - _check_soft_dependencies("mlflow", severity="error") + _check_mlflow_dependencies(severity="error") from mlflow.exceptions import MlflowException from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE @@ -559,14 +549,7 @@ def _load_model(path, serialization_format): error_code=INVALID_PARAMETER_VALUE, ) - with open(path, "rb") as pickled_model: - if serialization_format == SERIALIZATION_FORMAT_PICKLE: - return pickle.load(pickled_model) - elif serialization_format == SERIALIZATION_FORMAT_CLOUDPICKLE: - _check_soft_dependencies("cloudpickle", severity="error") - import cloudpickle - - return cloudpickle.load(pickled_model) + return load(path) def _load_pyfunc(path): @@ -585,7 +568,7 @@ def _load_pyfunc(path): ---------- .. [1] https://www.mlflow.org/docs/latest/python_api/mlflow.pyfunc.html#mlflow.pyfunc.load_model """ # noqa: E501 - _check_soft_dependencies("mlflow", severity="error") + _check_mlflow_dependencies(severity="error") from mlflow.exceptions import MlflowException from mlflow.utils.model_utils import _get_flavor_configuration @@ -621,7 +604,7 @@ def _load_pyfunc(path): class _SktimeModelWrapper: def __init__(self, sktime_model): - _check_soft_dependencies("mlflow", severity="error") + _check_mlflow_dependencies(severity="error") self.sktime_model = sktime_model def predict(self, X): diff --git a/sktime/utils/pandas.py b/sktime/utils/pandas.py new file mode 100644 index 00000000000..b0419e67bd2 --- /dev/null +++ b/sktime/utils/pandas.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 -u +"""Utilities for pandas adapbation.""" + +__author__ = ["fkiraly"] + + +def df_map(x): + """Access map or applymap, of DataFrame. + + In pandas 2.1.0, applymap was deprecated in favor of the newly introduced map. + To ensure compatibility with older versions, we use map if available, + otherwise applymap. + + Parameters + ---------- + x : assumed pd.DataFrame + + Returns + ------- + x.map, if available, otherwise x.applymap + Note: returns method itself, not result of method call + """ + if hasattr(x, "map"): + return x.map + else: + return x.applymap diff --git a/sktime/utils/parallel.py b/sktime/utils/parallel.py index ab839145cac..5e099248523 100644 --- a/sktime/utils/parallel.py +++ b/sktime/utils/parallel.py @@ -38,6 +38,7 @@ def parallelize(fun, iter, meta=None, backend=None, backend_params=None): - "None": executes loop sequentally, simple list comprehension - "loky", "multiprocessing" and "threading": uses ``joblib`` ``Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` - "dask": uses ``dask``, requires ``dask`` package in environment - "dask_lazy": same as ``"dask"``, but returns delayed object instead of list @@ -46,9 +47,16 @@ def parallelize(fun, iter, meta=None, backend=None, backend_params=None): Valid keys depend on the value of ``backend``: - "None": no additional parameters, ``backend_params`` is ignored - - "loky", "multiprocessing" and "threading": + - "loky", "multiprocessing" and "threading": default ``joblib`` backends any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, - with the exception of ``backend`` which is directly controlled by ``backend`` + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. - "dask": any valid keys for ``dask.compute`` can be passed, e.g., ``scheduler`` """ if meta is None: @@ -72,6 +80,7 @@ def parallelize(fun, iter, meta=None, backend=None, backend_params=None): "loky": "joblib", "multiprocessing": "joblib", "threading": "joblib", + "joblib": "joblib", "dask": "dask", "dask_lazy": "dask", } @@ -92,7 +101,29 @@ def _parallelize_joblib(fun, iter, meta, backend, backend_params): from joblib import Parallel, delayed par_params = backend_params.copy() - par_params["backend"] = backend + if "backend" not in par_params: + # if user selects custom joblib backend but does not specify backend explicitly, + # raise a ValueError + if backend == "joblib": + raise ValueError( + '"joblib" was selected as first layer parallelization backend, ' + "but no backend string was " + 'passed in the backend parameters dict, e.g., "spark". ' + "Please specify a backend to joblib as a key-value pair " + "in the backend_params arg or the backend:parallel:params config " + 'when using "joblib". ' + 'For clarity, "joblib" should only be used for two-layer ' + "backend dispatch, where the first layer is joblib, " + "and the second layer is a custom backend of joblib, e.g., spark. " + "For first-party joblib backends, please use the backend string " + 'of sktime directly, e.g., by specifying "multiprocessing" or "loky".' + ) + # in all other cases, we ensure the backend parameter is one of + # "loky", "multiprocessing" or "threading", as passed via backend + else: + par_params["backend"] = backend + elif backend != "joblib": + par_params["backend"] = backend if "n_jobs" not in par_params: par_params["n_jobs"] = -1 @@ -118,11 +149,31 @@ def _parallelize_dask(fun, iter, meta, backend, backend_params): para_dict["dask"] = _parallelize_dask -def _get_parallel_test_fixtures(): +def _get_parallel_test_fixtures(naming="estimator"): """Return fixtures for parallelization tests. Returns a list of parameter fixtures, where each fixture is a dict with keys "backend" and "backend_params". + + Parameters + ---------- + naming : str, optional + naming convention for the parameters, one of + + "estimator": for use in estimator constructors, + ``backend`` and ``backend_params`` + "config": for use in ``set_config``, + ``backend:parallel`` and ``backend:parallel:params`` + + Returns + ------- + fixtures : list of dict + list of backend parameter fixtures + keys depend on ``naming`` parameter, see above + either ``backend`` and ``backend_params`` (``naming="estimator"``), + or ``backend:parallel`` and ``backend:parallel:params`` (``naming="config"``) + values are backend strings and backend parameter dicts + only backends that are available in the environment are included """ from sktime.utils.validation._dependencies import _check_soft_dependencies diff --git a/sktime/utils/plotting.py b/sktime/utils/plotting.py index 64e2b49fa8f..ef4af2ce468 100644 --- a/sktime/utils/plotting.py +++ b/sktime/utils/plotting.py @@ -2,8 +2,8 @@ # copyright: sktime developers, BSD-3-Clause License (see LICENSE file) """Common timeseries plotting functionality.""" -__all__ = ["plot_series", "plot_correlations", "plot_windows"] -__author__ = ["mloning", "RNKuhns", "Dbhasin1", "chillerobscuro"] +__all__ = ["plot_series", "plot_correlations", "plot_windows", "plot_calibration"] +__author__ = ["mloning", "RNKuhns", "Dbhasin1", "chillerobscuro", "benheid"] import math from warnings import simplefilter, warn @@ -176,20 +176,23 @@ def format_fn(tick_val, tick_pos): ax.legend() if pred_interval is not None: check_interval_df(pred_interval, series[-1].index) - ax = plot_interval(ax, pred_interval) + ax = plot_interval(ax, pred_interval, index) if _ax_kwarg_is_none: return fig, ax else: return ax -def plot_interval(ax, interval_df): +def plot_interval(ax, interval_df, ix=None): cov = interval_df.columns.levels[1][0] var_name = interval_df.columns.levels[0][0] + x_ix = np.argwhere(ix.isin(interval_df.index)).ravel() + x_ix = np.array(x_ix) + ax.fill_between( - ax.get_lines()[-1].get_xdata(), - interval_df[var_name][cov]["lower"].astype("float64"), - interval_df[var_name][cov]["upper"].astype("float64"), + x_ix, + interval_df[var_name][cov]["lower"].astype("float64").to_numpy(), + interval_df[var_name][cov]["upper"].astype("float64").to_numpy(), alpha=0.2, color=ax.get_lines()[-1].get_c(), label=f"{int(cov * 100)}% prediction interval", @@ -446,7 +449,7 @@ def plot_windows(cv, y, title="", ax=None): # create figure if no ax provided for plotting if _ax_kwarg_is_none: - fig, ax = plt.subplots(1, figsize=plt.figaspect(0.25)) + fig, ax = plt.subplots(figsize=plt.figaspect(0.3)) train_windows, test_windows = _get_windows(cv, y) @@ -460,8 +463,6 @@ def get_y(length, split): train_color, test_color = sns.color_palette("colorblind")[:2] - fig, ax = plt.subplots(figsize=plt.figaspect(0.3)) - for i in range(n_splits): train = train_windows[i] test = test_windows[i] @@ -501,3 +502,66 @@ def get_y(length, split): return fig, ax else: return ax + + +def plot_calibration(y_true, y_pred, ax=None): + """Plot the calibration of a probabilistic forecast. + + Calculates internally the calibration of the quantile forecast and + visualise it. + + x-axis: interval from 0 to 1 + y-axis: interval from 0 to 1 + plot elements: the calibration fo the forecast (blue) and the ideal + calibration (orange) + + Parameters + ---------- + y_true : pd.Series, single columned pd.DataFrame, or single columned np.array. + The actual values of the forecast + y_pred : pd.DataFrame + The quantile forecast. + ax : matplotlib.axes.Axes, optional (default=None) + Axes on which to plot. If None, axes will be created and returned. + + Returns + ------- + fig : matplotlib.figure.Figure, returned only if ax is None + matplotlib figure object + ax : matplotlib.axes.Axes + matplotlib axes object with the figure + """ + import matplotlib.pyplot as plt + + series = convert_to(y_true, "pd.Series", "Series") + + _ax_kwarg_is_none = True if ax is None else False + + if _ax_kwarg_is_none: + fig, ax = plt.subplots(1, figsize=plt.figaspect(0.25)) + + result = [0] + ideal_calibration = [0] + + for col in y_pred.columns: + if isinstance(col, tuple): + q = col[1] + else: + q = col + pred_q = convert_to(y_pred[[col]], "pd.Series", "Series") + result.append(sum(series.values < pred_q.values) / len(pred_q.values)) + ideal_calibration.append(q) + result.append(1) + ideal_calibration.append(1) + + df = pd.DataFrame( + {"Forecast's Calibration": result, "Ideal Calibration": ideal_calibration}, + index=ideal_calibration, + ) + + df.plot(ax=ax) + + if _ax_kwarg_is_none: + return fig, ax + else: + return ax diff --git a/sktime/utils/sklearn/__init__.py b/sktime/utils/sklearn/__init__.py new file mode 100644 index 00000000000..5b1546d3361 --- /dev/null +++ b/sktime/utils/sklearn/__init__.py @@ -0,0 +1,21 @@ +"""Sklearn related utility functionality.""" + +from sktime.utils.sklearn._adapt_df import prep_skl_df +from sktime.utils.sklearn._scitype import ( + is_sklearn_classifier, + is_sklearn_clusterer, + is_sklearn_estimator, + is_sklearn_regressor, + is_sklearn_transformer, + sklearn_scitype, +) + +__all__ = [ + "prep_skl_df", + "is_sklearn_estimator", + "is_sklearn_transformer", + "is_sklearn_classifier", + "is_sklearn_regressor", + "is_sklearn_clusterer", + "sklearn_scitype", +] diff --git a/sktime/utils/sklearn/_adapt_df.py b/sktime/utils/sklearn/_adapt_df.py new file mode 100644 index 00000000000..300764c1b67 --- /dev/null +++ b/sktime/utils/sklearn/_adapt_df.py @@ -0,0 +1,31 @@ +"""Utility functions for adapting to sklearn.""" +# copyright: sktime developers, BSD-3-Clause License (see LICENSE file) + +import numpy as np + + +def prep_skl_df(df, copy_df=False): + """Make df compatible with sklearn input expectations. + + Changes: + turns column index into a list of strings + + Parameters + ---------- + df : pd.DataFrame + list of indices to sample from + copy_df : bool, default=False + whether to mutate df or return a copy + if False, index of df is mutated + if True, original df is not mutated. If index is not a list of strings, + a copy is made and the copy is mutated. Otherwise, the original df is returned. + """ + cols = df.columns + str_cols = cols.astype(str) + + if not np.all(str_cols == cols): + if copy_df: + df = df.copy() + df.columns = str_cols + + return df diff --git a/sktime/utils/sklearn.py b/sktime/utils/sklearn/_scitype.py similarity index 100% rename from sktime/utils/sklearn.py rename to sktime/utils/sklearn/_scitype.py diff --git a/sktime/utils/sklearn/tests/__init__.py b/sktime/utils/sklearn/tests/__init__.py new file mode 100644 index 00000000000..1d9b760fee6 --- /dev/null +++ b/sktime/utils/sklearn/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for sklearn related utility functionality.""" diff --git a/sktime/utils/sklearn/tests/test_sklearn_df_adapt.py b/sktime/utils/sklearn/tests/test_sklearn_df_adapt.py new file mode 100644 index 00000000000..57b36ebcdfc --- /dev/null +++ b/sktime/utils/sklearn/tests/test_sklearn_df_adapt.py @@ -0,0 +1,33 @@ +"""Tests for sklearn dataframe coercion.""" + +__author__ = ["fkiraly"] + +import numpy as np +import pandas as pd +import pytest + +from sktime.utils.sklearn._adapt_df import prep_skl_df + + +@pytest.mark.parametrize("copy_df", [True, False]) +def test_prep_skl_df_coercion(copy_df): + """Test that prep_skl_df behaves correctly on the coercion case.""" + mixed_example = pd.DataFrame({0: [1, 2, 3], "b": [1, 2, 3]}) + + res = prep_skl_df(mixed_example, copy_df=copy_df) + + assert np.all(res.columns == ["0", "b"]) + + if not copy_df: + assert res is mixed_example + + +@pytest.mark.parametrize("copy_df", [True, False]) +def test_prep_skl_df_non_coercion(copy_df): + """Test that prep_skl_df behaves correctly on the non-coercion case.""" + mixed_example = pd.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) + + res = prep_skl_df(mixed_example, copy_df=copy_df) + + assert np.all(res.columns == ["a", "b"]) + assert res is mixed_example diff --git a/sktime/utils/tests/test_sklearn_typing.py b/sktime/utils/sklearn/tests/test_sklearn_typing.py similarity index 100% rename from sktime/utils/tests/test_sklearn_typing.py rename to sktime/utils/sklearn/tests/test_sklearn_typing.py diff --git a/sktime/utils/tests/test_mlflow_sktime_model_export.py b/sktime/utils/tests/test_mlflow_sktime_model_export.py index 6143ae5eac0..f46fcb2bb45 100644 --- a/sktime/utils/tests/test_mlflow_sktime_model_export.py +++ b/sktime/utils/tests/test_mlflow_sktime_model_export.py @@ -1,6 +1,6 @@ """Tests for mlflow-sktime custom model flavor.""" -__author__ = ["benjaminbluhm"] +__author__ = ["benjaminbluhm", "achieveordie"] import os import sys @@ -11,10 +11,11 @@ import pandas as pd import pytest -from sktime.datasets import load_airline, load_longley +from sktime.datasets import load_airline, load_arrow_head, load_longley from sktime.forecasting.arima import AutoARIMA from sktime.forecasting.naive import NaiveForecaster from sktime.split import temporal_train_test_split +from sktime.tests.test_switch import run_test_for_class from sktime.utils.multiindex import flatten_multiindex from sktime.utils.validation._dependencies import _check_soft_dependencies @@ -41,7 +42,12 @@ def mock_s3_bucket(): ------- string with name of mock S3 bucket """ - with moto.mock_s3(): + if moto.__version__ < "5.0.0": + mock_method = moto.mock_s3 + else: + mock_method = moto.mock_aws + + with mock_method(): bucket_name = "mock-bucket" my_config = Config(region_name="us-east-1") s3_client = boto3.client("s3", config=my_config) @@ -77,6 +83,18 @@ def test_data_longley(): return y_train, y_test, X_train, X_test +@pytest.fixture(scope="module") +def test_data_arrow_head(): + """Create sample data for univariate classification.""" + X_train, y_train = load_arrow_head(split="TRAIN") + X_test, y_test = load_arrow_head(split="TEST") + return y_train.astype(int), y_test.astype(int), X_train, X_test + + +@pytest.mark.skipif( + not run_test_for_class(AutoARIMA), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", +) @pytest.fixture(scope="module") def auto_arima_model(test_data_airline): """Create instance of fitted auto arima model.""" @@ -85,6 +103,22 @@ def auto_arima_model(test_data_airline): ) +@pytest.mark.skipif( + not _check_soft_dependencies("tensorflow", severity="none"), + reason="skip test if required soft dependency is not available.", +) +@pytest.fixture(scope="module") +def cnn_model(test_data_arrow_head): + """Create an instance of fitted ResNet Classifier model.""" + from sktime.classification.deep_learning.cnn import CNNClassifier + + y_train, _, X_train, _ = test_data_arrow_head + + return CNNClassifier(n_epochs=1, n_conv_layers=1, kernel_size=3).fit( + X_train, y_train + ) + + @pytest.fixture(scope="module") def naive_forecaster_model_with_regressor(test_data_longley): """Create instance of fitted naive forecaster model.""" @@ -94,8 +128,11 @@ def naive_forecaster_model_with_regressor(test_data_longley): @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) @pytest.mark.parametrize("serialization_format", ["pickle", "cloudpickle"]) def test_auto_arima_model_save_and_load( @@ -117,8 +154,11 @@ def test_auto_arima_model_save_and_load( @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) @pytest.mark.parametrize("serialization_format", ["pickle", "cloudpickle"]) def test_auto_arima_model_pyfunc_output( @@ -169,8 +209,36 @@ def test_auto_arima_model_pyfunc_output( @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not _check_soft_dependencies(["mlflow", "tensorflow"], severity="none"), + reason="skip mlflow tests on CNN if required soft dependency not available", +) +@pytest.mark.parametrize("serialization_format", ["pickle", "cloudpickle"]) +def test_cnn_model_save_and_load( + cnn_model, test_data_arrow_head, model_path, serialization_format +): + """Test saving and loading of DL sktime estimator.""" + from sktime.utils import mlflow_sktime + + mlflow_sktime.save_model( + sktime_model=cnn_model, + path=model_path, + serialization_format=serialization_format, + ) + loaded_model = mlflow_sktime.load_model(model_uri=model_path) + + _, _, _, X_test = test_data_arrow_head + + np.testing.assert_array_almost_equal( + cnn_model.predict(X_test), loaded_model.predict(X_test) + ) + + +@pytest.mark.skipif( + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) def test_auto_arima_model_pyfunc_with_params_output(auto_arima_model, model_path): """Test auto arima prediction of loaded pyfunc model with parameters.""" @@ -223,8 +291,11 @@ def test_auto_arima_model_pyfunc_with_params_output(auto_arima_model, model_path @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) def test_auto_arima_model_pyfunc_without_params_output(auto_arima_model, model_path): """Test auto arima prediction of loaded pyfunc model without parameters.""" @@ -277,8 +348,11 @@ def test_auto_arima_model_pyfunc_without_params_output(auto_arima_model, model_p @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) def test_auto_arima_model_pyfunc_without_conf_output(auto_arima_model, model_path): """Test auto arima prediction of loaded pyfunc model without config.""" @@ -355,8 +429,11 @@ def test_naive_forecaster_model_with_regressor_pyfunc_output( @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) @pytest.mark.parametrize("use_signature", [True, False]) @pytest.mark.parametrize("use_example", [True, False]) @@ -390,8 +467,11 @@ def test_signature_and_examples_saved_correctly( @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) @pytest.mark.parametrize("use_signature", [True, False]) def test_predict_var_signature_saved_correctly( @@ -413,8 +493,11 @@ def test_predict_var_signature_saved_correctly( @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) @pytest.mark.parametrize("use_signature", [True, False]) @pytest.mark.parametrize("use_example", [True, False]) @@ -461,8 +544,11 @@ def test_signature_and_example_for_pyfunc_predict( @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) def test_load_from_remote_uri_succeeds(auto_arima_model, model_path, mock_s3_bucket): """Test loading native sktime model from mock S3 bucket.""" @@ -487,8 +573,11 @@ def test_load_from_remote_uri_succeeds(auto_arima_model, model_path, mock_s3_buc @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) @pytest.mark.parametrize("should_start_run", [True, False]) @pytest.mark.parametrize("serialization_format", ["pickle", "cloudpickle"]) @@ -531,8 +620,11 @@ def test_log_model(auto_arima_model, tmp_path, should_start_run, serialization_f @pytest.mark.xfail(reason="known failure to be debugged, see #4904") @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) def test_log_model_calls_register_model(auto_arima_model, tmp_path): """Test log model calls register model.""" @@ -562,8 +654,11 @@ def test_log_model_calls_register_model(auto_arima_model, tmp_path): @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) def test_log_model_no_registered_model_name(auto_arima_model, tmp_path): """Test log model calls register model without registered model name.""" @@ -586,8 +681,11 @@ def test_log_model_no_registered_model_name(auto_arima_model, tmp_path): @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) def test_pyfunc_raises_invalid_attribute_type(auto_arima_model, model_path): """Test pyfunc raises exception with invalid attribute type.""" @@ -607,8 +705,11 @@ def test_pyfunc_raises_invalid_attribute_type(auto_arima_model, model_path): @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) def test_pyfunc_raises_invalid_dict_key(auto_arima_model, model_path): """Test pyfunc raises exception with invalid dict key.""" @@ -628,8 +729,11 @@ def test_pyfunc_raises_invalid_dict_key(auto_arima_model, model_path): @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) def test_pyfunc_raises_invalid_dict_value_type(auto_arima_model, model_path): """Test pyfunc raises exception with invalid dict value type.""" @@ -648,8 +752,11 @@ def test_pyfunc_raises_invalid_dict_value_type(auto_arima_model, model_path): @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) def test_pyfunc_raises_invalid_dict_value(auto_arima_model, model_path): """Test pyfunc raises exception with invalid dict value.""" @@ -669,8 +776,11 @@ def test_pyfunc_raises_invalid_dict_value(auto_arima_model, model_path): @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) def test_pyfunc_predict_proba_raises_invalid_attribute_type( auto_arima_model, model_path @@ -693,8 +803,11 @@ def test_pyfunc_predict_proba_raises_invalid_attribute_type( @pytest.mark.skipif( - not _check_soft_dependencies("mlflow", severity="none"), - reason="skip test if required soft dependency not available", + not ( + _check_soft_dependencies("mlflow", severity="none") + and run_test_for_class(AutoARIMA) + ), + reason="Skip AutoARIMA to test mlflow functionalities since soft deps are missing.", ) def test_pyfunc_predict_proba_raises_invalid_dict_value(auto_arima_model, model_path): """Test pyfunc predict_proba raises exception with invalid dict value.""" diff --git a/sktime/utils/tests/test_plotting.py b/sktime/utils/tests/test_plotting.py index ebc62f14100..832827ae78e 100644 --- a/sktime/utils/tests/test_plotting.py +++ b/sktime/utils/tests/test_plotting.py @@ -244,6 +244,28 @@ def test_plot_series_uniform_treatment_of_int64_range_index_types(): plt.close() +@pytest.mark.skipif( + not run_test_for_class(plot_series), + reason="run test only if softdeps are present and incrementally (if requested)", +) +@pytest.mark.skipif( + not _check_soft_dependencies("matplotlib", severity="none"), + reason="skip test if required soft dependency for matplotlib not available", +) +def test_plot_series_interval(): + """Test prediction interval plotting functionality in plot_series.""" + from sktime.forecasting.base import ForecastingHorizon + from sktime.forecasting.naive import NaiveForecaster + + model = NaiveForecaster() + y = load_airline() + model.fit(y[:-3]) + fh = ForecastingHorizon(y[-3:].index, is_relative=False) + pred = model.predict(fh) + interval = model.predict_interval(fh) + plot_series(y[:-3], y[-3:], pred, pred_interval=interval) + + # Generically test whether plots only accepting univariate input run @pytest.mark.skipif( not run_test_for_class(univariate_plots), diff --git a/sktime/utils/tests/test_sampling.py b/sktime/utils/tests/test_sampling.py index eda0b6ee925..40d5bfe3b67 100644 --- a/sktime/utils/tests/test_sampling.py +++ b/sktime/utils/tests/test_sampling.py @@ -5,7 +5,6 @@ from sktime.datasets import load_unit_test from sktime.datatypes import check_is_scitype -from sktime.utils._testing.deep_equals import deep_equals from sktime.utils.sampling import random_partition, stratified_resample NK_FIXTURES = [(10, 3), (15, 5), (19, 6), (3, 1), (1, 2)] @@ -40,6 +39,8 @@ def test_partition(n, k): @pytest.mark.parametrize("n, k", NK_FIXTURES) def test_seed(n, k, seed): """Test that seed is deterministic.""" + from sktime.utils.deep_equals import deep_equals + part = random_partition(n, k, seed) part2 = random_partition(n, k, seed) diff --git a/sktime/utils/validation/_dependencies.py b/sktime/utils/validation/_dependencies.py index b927273be96..ce974049106 100644 --- a/sktime/utils/validation/_dependencies.py +++ b/sktime/utils/validation/_dependencies.py @@ -40,7 +40,7 @@ def _check_soft_dependencies( should be provided if import name differs from package name severity : str, "error" (default), "warning", "none" behaviour for raising errors or warnings - "error" - raises a `ModuleNotFoundException` if one of packages is not installed + "error" - raises a `ModuleNotFoundError` if one of packages is not installed "warning" - raises a warning if one of packages is not installed function returns False if one of packages is not installed, otherwise True "none" - does not raise exception or warning @@ -216,7 +216,7 @@ def _check_dl_dependencies(msg=None, severity="error"): error message to be returned in the `ModuleNotFoundError`, overrides default severity : str, "error" (default), "warning", "none" behaviour for raising errors or warnings - "error" - raises a ModuleNotFoundException if one of packages is not installed + "error" - raises a ModuleNotFoundError if one of packages is not installed "warning" - raises a warning if one of packages is not installed function returns False if one of packages is not installed, otherwise True "none" - does not raise exception or warning @@ -254,6 +254,49 @@ def _check_dl_dependencies(msg=None, severity="error"): ) +def _check_mlflow_dependencies( + msg=None, severity="error", suppress_import_stdout=False +): + """Check if `mlflow` and its dependencies are installed. + + Parameters + ---------- + msg: str, optional, default= default message (msg below) + error message to be returned when `ModuleNotFoundError` is raised. + severity: str, either of "error", "warning" or "none" + behaviour for raising errors or warnings + "error" - raises a `ModuleNotFound` if mlflow-related packages are not found. + "warning" - raises a warning message if any mlflow-related package is not + installed also returns False. In case all packages are present, + returns True. + "none" - does not raise any exception or warning and simply returns True + if all packages are installed otherwise return False. + + Raise + ----- + ModuleNotFoundError + User Friendly error with a suggested action to install mlflow dependencies + + Returns + ------- + boolean - whether all mlflow-related packages are installed. + """ + if not isinstance(msg, str): + msg = ( + "`mlflow` is an extra dependency and is not included " + "in the base sktime installation. " + "Please run `pip install mlflow` " + "or `pip install sktime[mlflow]` to install the package." + ) + + return _check_soft_dependencies( + "mlflow", + msg=msg, + severity=severity, + suppress_import_stdout=suppress_import_stdout, + ) + + def _check_python_version(obj, package=None, msg=None, severity="error"): """Check if system python version is compatible with requirements of obj. @@ -348,7 +391,7 @@ def _check_estimator_deps(obj, msg=None, severity="error"): error message to be returned in the `ModuleNotFoundError`, overrides default severity : str, "error" (default), "warning", or "none" behaviour for raising errors or warnings - "error" - raises a ModuleNotFoundException if environment is incompatible + "error" - raises a `ModuleNotFoundError` if environment is incompatible "warning" - raises a warning if environment is incompatible function returns False if environment is incompatible, otherwise True "none" - does not raise exception or warning diff --git a/sktime/utils/validation/forecasting.py b/sktime/utils/validation/forecasting.py index f446a5c5abe..21fdedd9666 100644 --- a/sktime/utils/validation/forecasting.py +++ b/sktime/utils/validation/forecasting.py @@ -511,7 +511,9 @@ def check_interval_df(interval_df, index_to_match): """ from sktime.datatypes import check_is_mtype - checked = check_is_mtype(interval_df, "pred_interval", return_metadata=True) + checked = check_is_mtype( + interval_df, "pred_interval", return_metadata=True, msg_return_dict="list" + ) if not checked[0]: raise ValueError(checked[1]) df_idx = interval_df.index