diff --git a/.all-contributorsrc b/.all-contributorsrc index d5b5d8d9c5..7476a2db82 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -2437,6 +2437,43 @@ "contributions": [ "doc" ] + }, + { + "login": "IRKnyazev", + "name": "Ivan Knyazev", + "avatar_url": "https://avatars.githubusercontent.com/u/105492484?v=4", + "profile": "https://github.com/IRKnyazev", + "contributions": [ + "doc" + ] + }, + { + "login": "Cyril-Meyer", + "name": "Cyril Meyer", + "avatar_url": "https://avatars.githubusercontent.com/u/69190238?v=4", + "profile": "https://github.com/Cyril-Meyer", + "contributions": [ + "test" + ] + }, + { + "login": "Moonzyyy", + "name": "Daniele Carli", + "avatar_url": "https://avatars.githubusercontent.com/u/47296443?v=4", + "profile": "https://github.com/Moonzyyy", + "contributions": [ + "doc" + ] + }, + { + "login": "adm-unl", + "name": "Adam Unal", + "avatar_url": "https://avatars.githubusercontent.com/u/143117979?v=4", + "profile": "https://github.com/adm-unl", + "contributions": [ + "doc" + ] } - ] + ], + "commitType": "docs" } diff --git a/.github/workflows/pr_precommit.yml b/.github/workflows/pr_precommit.yml index d9d60bdd03..a81fc60e14 100644 --- a/.github/workflows/pr_precommit.yml +++ b/.github/workflows/pr_precommit.yml @@ -17,8 +17,19 @@ jobs: runs-on: ubuntu-20.04 steps: + - name: Create app token + uses: actions/create-github-app-token@v1 + id: app-token + with: + app-id: ${{ vars.PR_APP_ID }} + private-key: ${{ secrets.PR_APP_KEY }} + - name: Checkout uses: actions/checkout@v4 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.head_ref }} + token: ${{ steps.app-token.outputs.token }} - name: Setup Python 3.10 uses: actions/setup-python@v5 @@ -45,22 +56,6 @@ jobs: extra_args: --files ${{ steps.changed-files.outputs.all_changed_files }} # push fixes if pre-commit fails and PR is eligible - - if: ${{ failure() && github.event_name == 'pull_request_target' && !github.event.pull_request.draft && !contains(github.event.pull_request.labels.*.name, 'stop pre-commit fixes') }} - name: Create app token - uses: actions/create-github-app-token@v1 - id: app-token - with: - app-id: ${{ vars.PR_APP_ID }} - private-key: ${{ secrets.PR_APP_KEY }} - - - if: ${{ failure() && github.event_name == 'pull_request_target' && !github.event.pull_request.draft && !contains(github.event.pull_request.labels.*.name, 'stop pre-commit fixes') }} - name: Checkout - uses: actions/checkout@v4 - with: - repository: ${{ github.event.pull_request.head.repo.full_name }} - ref: ${{ github.head_ref }} - token: ${{ steps.app-token.outputs.token }} - - if: ${{ failure() && github.event_name == 'pull_request_target' && !github.event.pull_request.draft && !contains(github.event.pull_request.labels.*.name, 'stop pre-commit fixes') }} name: Push pre-commit fixes uses: stefanzweifel/git-auto-commit-action@v5 diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index afeee5ffdd..e0f95c2b04 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -27,7 +27,7 @@ jobs: persist-credentials: false - name: Run analysis - uses: ossf/scorecard-action@v2.3.3 + uses: ossf/scorecard-action@v2.4.0 with: results_file: results.sarif results_format: sarif diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2dec990167..e5991439bd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,13 +29,13 @@ repos: args: [ "--create", "--python-folders", "aeon" ] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.2 + rev: v0.5.5 hooks: - id: ruff args: [ "--fix"] - repo: https://github.com/asottile/pyupgrade - rev: v3.16.0 + rev: v3.17.0 hooks: - id: pyupgrade args: [ "--py38-plus" ] diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 0a2b990a23..21372faf43 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -1,7 +1,7 @@ # Contributors -[![All Contributors](https://img.shields.io/badge/all_contributors-236-orange.svg)](#contributors) +[![All Contributors](https://img.shields.io/badge/all_contributors-240-orange.svg)](#contributors) This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! @@ -20,10 +20,11 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Aaron Margolese-Malin
Aaron Margolese-Malin

πŸ› Aaron Smith
Aaron Smith

πŸ’» Abhishek Pathak
Abhishek Pathak

πŸ› + Adam Unal
Adam Unal

πŸ“– Afzal Ansari
Afzal Ansari

πŸ’» πŸ“– - Ahmed Bilal
Ahmed Bilal

πŸ“– + Ahmed Bilal
Ahmed Bilal

πŸ“– AidenRushbrooke
AidenRushbrooke

πŸ’» ⚠️ Akshat Nayak
Akshat Nayak

πŸ’» Akshat Rampuria
Akshat Rampuria

πŸ“– @@ -31,9 +32,9 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Alex Hawkins-Hooker
Alex Hawkins-Hooker

πŸ’» Alexandra Amidon
Alexandra Amidon

πŸ“ πŸ“– πŸ€” Ali Ismail-Fawaz
Ali Ismail-Fawaz

πŸ’» πŸ› πŸ“– ⚠️ 🚧 - Ali Teeney
Ali Teeney

πŸ’» + Ali Teeney
Ali Teeney

πŸ’» Ali Yazdizadeh
Ali Yazdizadeh

πŸ“– Alwin
Alwin

πŸ“– πŸ’» 🚧 An Hoang
An Hoang

πŸ› πŸ’» @@ -41,9 +42,9 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d AndrΓ© Guarnier De Mitri
AndrΓ© Guarnier De Mitri

πŸ’» Angus Dempster
Angus Dempster

πŸ’» ⚠️ βœ… Antoine Guillaume
Antoine Guillaume

πŸ’» πŸ“– - Antoni Baum
Antoni Baum

πŸ’» + Antoni Baum
Antoni Baum

πŸ’» Aparna Sakshi
Aparna Sakshi

πŸ’» Arelo Tanoh
Arelo Tanoh

πŸ“– Arepalli Yashwanth Reddy
Arepalli Yashwanth Reddy

πŸ’» πŸ› πŸ“– @@ -51,9 +52,9 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Arnav
Arnav

πŸ’» Ayushmaan Seth
Ayushmaan Seth

πŸ’» πŸ‘€ ⚠️ πŸ“– πŸ“‹ βœ… BANDASAITEJAREDDY
BANDASAITEJAREDDY

πŸ’» πŸ“– - Badr-Eddine Marani
Badr-Eddine Marani

πŸ’» + Badr-Eddine Marani
Badr-Eddine Marani

πŸ’» Benedikt Heidrich
Benedikt Heidrich

πŸ’» Benjamin Bluhm
Benjamin Bluhm

πŸ’» πŸ“– πŸ’‘ Bhaskar Dhariyal
Bhaskar Dhariyal

πŸ’» ⚠️ @@ -61,9 +62,9 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Bohan Zhang
Bohan Zhang

πŸ’» Bouke Postma
Bouke Postma

πŸ’» πŸ› πŸ€” Brian Murphy
Brian Murphy

πŸ“– - Carlos Borrajo
Carlos Borrajo

πŸ’» πŸ“– + Carlos Borrajo
Carlos Borrajo

πŸ’» πŸ“– Carlos Ramos CarreΓ±o
Carlos Ramos CarreΓ±o

πŸ“– Chang Wei Tan
Chang Wei Tan

πŸ’» Cheuk Ting Ho
Cheuk Ting Ho

πŸ’» @@ -71,239 +72,242 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Christopher Dahlin
Christopher Dahlin

πŸ’» Christopher Lo
Christopher Lo

πŸ’» πŸ€” Ciaran Gilbert
Ciaran Gilbert

πŸ› πŸ’» πŸ“– ⚠️ πŸ€” - ClaudiaSanches
ClaudiaSanches

πŸ’» ⚠️ + ClaudiaSanches
ClaudiaSanches

πŸ’» ⚠️ Corvin Paul
Corvin Paul

πŸ“– + Cyril Meyer
Cyril Meyer

⚠️ Daniel Burkhardt Cerigo
Daniel Burkhardt Cerigo

πŸ’» Daniel MartΓ­n MartΓ­nez
Daniel MartΓ­n MartΓ­nez

πŸ“– πŸ› + Daniele Carli
Daniele Carli

πŸ“– Dave Hirschfeld
Dave Hirschfeld

πŸš‡ David Buchaca Prats
David Buchaca Prats

πŸ’» + + David Guijo-Rubio
David Guijo-Rubio

πŸ’» πŸ€” Divya Tiwari
Divya Tiwari

πŸ’» πŸ”£ Dmitriy Valetov
Dmitriy Valetov

πŸ’» βœ… - - Doug Ollerenshaw
Doug Ollerenshaw

πŸ“– Drishti Bhasin
Drishti Bhasin

πŸ’» Dylan Sherry
Dylan Sherry

πŸš‡ Emilia Rose
Emilia Rose

πŸ’» ⚠️ Er Jie Yong
Er Jie Yong

πŸ› πŸ’» + + Evan Miller
Evan Miller

βœ… Eyal Shafran
Eyal Shafran

πŸ’» Federico Garza
Federico Garza

πŸ’» πŸ’‘ - - Felix Claessen
Felix Claessen

πŸ’» πŸ“– ⚠️ πŸ› Florian Stinner
Florian Stinner

πŸ’» ⚠️ Franz Kiraly
Franz Kiraly

πŸ› πŸ’Ό πŸ’» πŸ“– 🎨 πŸ“‹ πŸ’‘ πŸ’΅ πŸ” πŸ€” 🚧 πŸ§‘β€πŸ« πŸ“† πŸ’¬ πŸ‘€ πŸ“’ ⚠️ βœ… πŸ“Ή Freddy A Boulton
Freddy A Boulton

πŸš‡ ⚠️ Futuer
Futuer

πŸ“– + + Gabriel Riegner
Gabriel Riegner

πŸ“– Galina Chernikova
Galina Chernikova

πŸ’» George Oastler
George Oastler

πŸ’» ⚠️ πŸ“¦ πŸ’‘ πŸ“– - - Gilberto Barbosa
Gilberto Barbosa

πŸ’» Grace Gao
Grace Gao

πŸ’» πŸ› Guilherme Arcencio
Guilherme Arcencio

πŸ’» ⚠️ Guzal Bulatova
Guzal Bulatova

πŸ› πŸ’» πŸ“‹ πŸ§‘β€πŸ« πŸ“† πŸ‘€ ⚠️ HYang1996
HYang1996

πŸ’» ⚠️ πŸ“– βœ… + + Harshitha Sudhakar
Harshitha Sudhakar

πŸ“– πŸ’» Hedeer El Showk
Hedeer El Showk

πŸ› πŸ“– πŸ’» Huayi Wei
Huayi Wei

βœ… - - Ifeanyi30
Ifeanyi30

πŸ’» Ilja Maurer
Ilja Maurer

πŸ’» Ilyas Moutawwakil
Ilyas Moutawwakil

πŸ’» πŸ“– Ireoluwatomiwa
Ireoluwatomiwa

πŸ“– Ishan Nangia
Ishan Nangia

πŸ€” + + + Ivan Knyazev
Ivan Knyazev

πŸ“– Jack Russon
Jack Russon

πŸ’» James Large
James Large

πŸ’» πŸ“– ⚠️ πŸš‡ 🚧 James Morrill
James Morrill

πŸ’» - - Jasmine Liaw
Jasmine Liaw

πŸ’» Jason Lines
Jason Lines

πŸ’» πŸ’Ό πŸ“– 🎨 πŸ“‹ πŸ” πŸ€” πŸ“† πŸ’¬ πŸ‘€ πŸ“’ πŸ’‘ Jason Mok
Jason Mok

πŸ“– Jason Pong
Jason Pong

πŸ’» ⚠️ + + Jaume Mateu
Jaume Mateu

πŸ’» JonathanBechtel
JonathanBechtel

πŸ’» πŸ€” ⚠️ Joren Hammudoglu
Joren Hammudoglu

πŸš‡ Juan Orduz
Juan Orduz

βœ… πŸ“– - - Julian Cooper
Julian Cooper

πŸ’» πŸ€” Juliana
Juliana

πŸ’» Justin Shenk
Justin Shenk

πŸ“– Kai Lion
Kai Lion

πŸ’» ⚠️ πŸ“– + + Kavin Anand
Kavin Anand

πŸ“– Kejsi Take
Kejsi Take

πŸ’» Kevin Lam
Kevin Lam

πŸ’» πŸ’‘ ⚠️ Kirstie Whitaker
Kirstie Whitaker

πŸ€” πŸ” - - Kishan Manani
Kishan Manani

πŸ’» πŸ“– ⚠️ πŸ› πŸ€” Krum Arnaudov
Krum Arnaudov

πŸ› πŸ’» Kutay Koralturk
Kutay Koralturk

πŸ’» πŸ› Leonidas Tsaprounis
Leonidas Tsaprounis

πŸ’» πŸ› πŸ§‘β€πŸ« πŸ‘€ + + Lielle Ravid
Lielle Ravid

πŸ’» πŸ“– Logan Duffy
Logan Duffy

πŸ’» πŸ“– ⚠️ πŸ› πŸ€” Lorena Pantano
Lorena Pantano

πŸ€” Lorenzo Toniazzi
Lorenzo Toniazzi

πŸ’» - - Lovkush
Lovkush

πŸ’» ⚠️ πŸ€” πŸ§‘β€πŸ« πŸ“† Luca Bennett
Luca Bennett

πŸ’» πŸ“– ⚠️ Luis Ventura
Luis Ventura

πŸ’» Luis Zugasti
Luis Zugasti

πŸ“– + + Lukasz Mentel
Lukasz Mentel

πŸ’» πŸ“– πŸš‡ ⚠️ πŸ› 🚧 πŸ§‘β€πŸ« Marcelo Trylesinski
Marcelo Trylesinski

πŸ“– Marco Gorelli
Marco Gorelli

πŸš‡ Margaret Gorlin
Margaret Gorlin

πŸ’» πŸ’‘ ⚠️ - - Mariam Jabara
Mariam Jabara

πŸ’» Marielle
Marielle

πŸ“– πŸ’» πŸ€” Markus LΓΆning
Markus LΓΆning

πŸ’» ⚠️ 🚧 πŸ“¦ πŸ‘€ πŸš‡ πŸ’‘ πŸ› βœ… πŸ’Ό πŸ“– 🎨 πŸ“‹ πŸ” πŸ€” πŸ“† πŸ’¬ πŸ“’ πŸ§‘β€πŸ« πŸ“Ή Martin Walter
Martin Walter

πŸ’» πŸ› πŸ“† πŸ” πŸ§‘β€πŸ« πŸ€” 🎨 πŸ‘€ πŸ“– πŸ“’ + + Martina G. Vilas
Martina G. Vilas

πŸ‘€ πŸ€” Matthew Middlehurst
Matthew Middlehurst

πŸ› πŸ’» πŸ”£ πŸ“– 🎨 πŸ’‘ πŸ€” πŸš‡ 🚧 πŸ§‘β€πŸ« πŸ“£ πŸ’¬ πŸ”¬ πŸ‘€ ⚠️ βœ… πŸ“’ Max Patzelt
Max Patzelt

πŸ’» Miao Cai
Miao Cai

πŸ› πŸ’» - - Michael F. Mbouopda
Michael F. Mbouopda

πŸ’» πŸ› πŸ“– Michael Feil
Michael Feil

πŸ’» ⚠️ πŸ€” Michal Chromcak
Michal Chromcak

πŸ’» πŸ“– ⚠️ βœ… Mirae Parker
Mirae Parker

πŸ’» ⚠️ + + Mohammed Saif Kazamel
Mohammed Saif Kazamel

πŸ› Morad :)
Morad :)

πŸ’» ⚠️ πŸ“– Multivin12
Multivin12

πŸ’» ⚠️ MΓ‘rcio A. Freitas Jr
MΓ‘rcio A. Freitas Jr

πŸ“– - - Niek van der Laan
Niek van der Laan

πŸ’» Nikhil Gupta
Nikhil Gupta

πŸ’» πŸ› πŸ“– Nikola Shahpazov
Nikola Shahpazov

πŸ“– Nilesh Kumar
Nilesh Kumar

πŸ’» + + Nima Nooshiri
Nima Nooshiri

πŸ“– Ninnart Fuengfusin
Ninnart Fuengfusin

πŸ’» Noa Ben Ami
Noa Ben Ami

πŸ’» ⚠️ πŸ“– Oleksandr Shchur
Oleksandr Shchur

πŸ› πŸ’» - - Oleksii Kachaiev
Oleksii Kachaiev

πŸ’» ⚠️ Oliver Matthews
Oliver Matthews

πŸ’» Patrick Rockenschaub
Patrick Rockenschaub

πŸ’» 🎨 πŸ€” ⚠️ Patrick SchΓ€fer
Patrick SchΓ€fer

πŸ’» βœ… + + Paul
Paul

πŸ“– Paul Rabich
Paul Rabich

πŸ’» Paul Yim
Paul Yim

πŸ’» πŸ’‘ ⚠️ Philipp Kortmann
Philipp Kortmann

πŸ’» πŸ“– - - Piyush Gade
Piyush Gade

πŸ’» πŸ‘€ Pulkit Verma
Pulkit Verma

πŸ“– Quaterion
Quaterion

πŸ› Rafael AyllΓ³n-GavilΓ‘n
Rafael AyllΓ³n-GavilΓ‘n

πŸ’» + + Rakshitha Godahewa
Rakshitha Godahewa

πŸ’» πŸ“– RavenRudi
RavenRudi

πŸ’» Raya Chakravarty
Raya Chakravarty

πŸ“– Rick van Hattem
Rick van Hattem

πŸš‡ - - Rishabh Bali
Rishabh Bali

πŸ’» Rishav Kumar Sinha
Rishav Kumar Sinha

πŸ“– Rishi Kumar Ray
Rishi Kumar Ray

πŸš‡ Riya Elizabeth John
Riya Elizabeth John

πŸ’» ⚠️ πŸ“– + + Ronnie Llamado
Ronnie Llamado

πŸ“– Ryan Kuhns
Ryan Kuhns

πŸ’» πŸ“– βœ… πŸ’‘ πŸ€” πŸ‘€ ⚠️ Sagar Mishra
Sagar Mishra

⚠️ Sajaysurya Ganesh
Sajaysurya Ganesh

πŸ’» πŸ“– 🎨 πŸ’‘ πŸ€” ⚠️ βœ… - - Saransh Chopra
Saransh Chopra

πŸ“– πŸš‡ Satya Prakash Pattnaik
Satya Prakash Pattnaik

πŸ“– Saurabh Dasgupta
Saurabh Dasgupta

πŸ’» Sebastiaan Koel
Sebastiaan Koel

πŸ’» πŸ“– + + Sebastian Hagn
Sebastian Hagn

πŸ“– Sebastian Schmidl
Sebastian Schmidl

πŸ› πŸ’» πŸ“– πŸ”¬ ⚠️ πŸ‘€ πŸ”£ Shivansh Subramanian
Shivansh Subramanian

πŸ“– πŸ’» Solomon Botchway
Solomon Botchway

🚧 - - Stanislav Khrapov
Stanislav Khrapov

πŸ’» Stijn Rotman
Stijn Rotman

πŸ’» Svea Marie Meyer
Svea Marie Meyer

πŸ“– πŸ’» Sylvain Combettes
Sylvain Combettes

πŸ’» πŸ› + + TNTran92
TNTran92

πŸ’» Taiwo Owoseni
Taiwo Owoseni

πŸ’» Thach Le Nguyen
Thach Le Nguyen

πŸ’» ⚠️ TheMathcompay Widget Factory Team
TheMathcompay Widget Factory Team

πŸ“– - - Thomas Buckley-Houston
Thomas Buckley-Houston

πŸ› Tom Xu
Tom Xu

πŸ’» πŸ“– Tomasz Chodakowski
Tomasz Chodakowski

πŸ’» πŸ“– πŸ› Tony Bagnall
Tony Bagnall

πŸ’» πŸ’Ό πŸ“– 🎨 πŸ“‹ πŸ” πŸ€” πŸ“† πŸ’¬ πŸ‘€ πŸ“’ πŸ”£ + + Tvisha Vedant
Tvisha Vedant

πŸ’» Utkarsh Kumar
Utkarsh Kumar

πŸ’» πŸ“– Utsav Kumar Tiwari
Utsav Kumar Tiwari

πŸ’» πŸ“– Vedant
Vedant

πŸ“– - - Viktor Dremov
Viktor Dremov

πŸ’» ViktorKaz
ViktorKaz

πŸ’» πŸ“– 🎨 Vyomkesh Vyas
Vyomkesh Vyas

πŸ’» πŸ“– πŸ’‘ ⚠️ Wayne Adams
Wayne Adams

πŸ“– + + William Templier
William Templier

πŸ“– William Zeng
William Zeng

πŸ› William Zheng
William Zheng

πŸ’» ⚠️ Yair Beer
Yair Beer

πŸ’» - - Yash Lamba
Yash Lamba

πŸ’» Yi-Xuan Xu
Yi-Xuan Xu

πŸ’» ⚠️ 🚧 πŸ“– Ziyao Wei
Ziyao Wei

πŸ’» aa25desh
aa25desh

πŸ’» πŸ› + + abandus
abandus

πŸ€” πŸ’» adoherty21
adoherty21

πŸ› bethrice44
bethrice44

πŸ› πŸ’» πŸ‘€ ⚠️ big-o
big-o

πŸ’» ⚠️ 🎨 πŸ€” πŸ‘€ βœ… πŸ§‘β€πŸ« - - bobbys
bobbys

πŸ’» brett koonce
brett koonce

πŸ“– btrtts
btrtts

πŸ“– chizzi25
chizzi25

πŸ“ + + chrisholder
chrisholder

πŸ’» ⚠️ πŸ“– 🎨 πŸ’‘ πŸ› danbartl
danbartl

πŸ› πŸ’» πŸ‘€ πŸ“’ ⚠️ βœ… πŸ“Ή hamzahiqb
hamzahiqb

πŸš‡ hiqbal2
hiqbal2

πŸ“– - - jesellier
jesellier

πŸ’» jschemm
jschemm

πŸ’» julu98
julu98

πŸ› kkoziara
kkoziara

πŸ’» πŸ› + + matteogales
matteogales

πŸ’» 🎨 πŸ€” nileenagp
nileenagp

πŸ’» oleskiewicz
oleskiewicz

πŸ’» πŸ“– ⚠️ pabworks
pabworks

πŸ’» ⚠️ - - patiently pending world peace
patiently pending world peace

πŸ’» raishubham1
raishubham1

πŸ“– simone-pignotti
simone-pignotti

πŸ’» πŸ› sophijka
sophijka

πŸ“– 🚧 + + sri1419
sri1419

πŸ’» tensorflow-as-tf
tensorflow-as-tf

πŸ’» vNtzYy
vNtzYy

πŸ› vedazeren
vedazeren

πŸ’» ⚠️ - - vincent-nich12
vincent-nich12

πŸ’» vollmersj
vollmersj

πŸ“– xiaobenbenecho
xiaobenbenecho

πŸ’» diff --git a/aeon/classification/deep_learning/__init__.py b/aeon/classification/deep_learning/__init__.py index 26ff063855..ba36690cfe 100644 --- a/aeon/classification/deep_learning/__init__.py +++ b/aeon/classification/deep_learning/__init__.py @@ -3,6 +3,7 @@ __all__ = [ "BaseDeepClassifier", "CNNClassifier", + "TimeCNNClassifier", "EncoderClassifier", "FCNClassifier", "InceptionTimeClassifier", @@ -13,7 +14,7 @@ "LITETimeClassifier", "IndividualLITEClassifier", ] -from aeon.classification.deep_learning._cnn import CNNClassifier +from aeon.classification.deep_learning._cnn import CNNClassifier, TimeCNNClassifier from aeon.classification.deep_learning._encoder import EncoderClassifier from aeon.classification.deep_learning._fcn import FCNClassifier from aeon.classification.deep_learning._inception_time import ( diff --git a/aeon/classification/deep_learning/_cnn.py b/aeon/classification/deep_learning/_cnn.py index 15fa496f3a..6f8a13a5ae 100644 --- a/aeon/classification/deep_learning/_cnn.py +++ b/aeon/classification/deep_learning/_cnn.py @@ -1,19 +1,27 @@ -"""Time Convolutional Neural Network (CNN) for classification.""" +"""Time Convolutional Neural Network (CNN) classifier.""" -__maintainer__ = [] -__all__ = ["CNNClassifier"] +__maintainer__ = ["hadifawaz1999"] +__all__ = ["CNNClassifier", "TimeCNNClassifier"] import gc import os import time from copy import deepcopy +from deprecated.sphinx import deprecated from sklearn.utils import check_random_state from aeon.classification.deep_learning.base import BaseDeepClassifier -from aeon.networks import CNNNetwork +from aeon.networks import CNNNetwork, TimeCNNNetwork +# TODO: remove v0.12.0 +@deprecated( + version="0.10.0", + reason="CNNClassifier has been renamed to TimeCNNClassifier" + "and will be removed in 0.12.0.", + category=FutureWarning, +) class CNNClassifier(BaseDeepClassifier): """ Time Convolutional Neural Network (CNN). @@ -76,12 +84,17 @@ class CNNClassifier(BaseDeepClassifier): save_last_model : bool, default = False Whether to save the last model, last epoch trained, using the base class method save_last_model_to_file. + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter is discarded. last_file_name : str, default = "last_model" The name of the file of the last model, if save_last_model is set to False, this parameter is discarded. + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. Notes ----- @@ -120,8 +133,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", verbose=False, loss="mean_squared_error", metrics=None, @@ -144,7 +159,9 @@ def __init__( self.file_path = file_path self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.verbose = verbose self.loss = loss self.metrics = metrics @@ -243,6 +260,329 @@ def _fit(self, X, y): self.input_shape = X.shape[1:] self.training_model_ = self.build_model(self.input_shape, self.n_classes_) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + + if self.verbose: + self.training_model_.summary() + + self.file_name_ = ( + self.best_file_name if self.save_best_model else str(time.time_ns()) + ) + + if self.callbacks is None: + self.callbacks_ = [ + tf.keras.callbacks.ModelCheckpoint( + filepath=self.file_path + self.file_name_ + ".keras", + monitor="loss", + save_best_only=True, + ), + ] + else: + self.callbacks_ = self._get_model_checkpoint_callback( + callbacks=self.callbacks, + file_path=self.file_path, + file_name=self.file_name_, + ) + + self.history = self.training_model_.fit( + X, + y_onehot, + batch_size=self.batch_size, + epochs=self.n_epochs, + verbose=self.verbose, + callbacks=self.callbacks_, + ) + + try: + self.model_ = tf.keras.models.load_model( + self.file_path + self.file_name_ + ".keras", compile=False + ) + if not self.save_best_model: + os.remove(self.file_path + self.file_name_ + ".keras") + except FileNotFoundError: + self.model_ = deepcopy(self.training_model_) + + if self.save_last_model: + self.save_last_model_to_file(file_path=self.file_path) + + gc.collect() + return self + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default = "default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return "default" set. + For classifiers, a "default" set of parameters should be provided for + general testing, and a "results_comparison" set for comparing against + previously recorded results if the general set does not produce suitable + probabilities to compare against. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class. + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params`. + """ + param1 = { + "n_epochs": 10, + "batch_size": 4, + "avg_pool_size": 4, + } + + test_params = [param1] + + return test_params + + +class TimeCNNClassifier(BaseDeepClassifier): + """ + Time Convolutional Neural Network (CNN). + + Adapted from the implementation used in [1]_. + + Parameters + ---------- + n_layers : int, default = 2 + The number of convolution layers in the network. + kernel_size : int or list of int, default = 7 + Kernel size of convolution layers, if not a list, the same kernel size + is used for all layer, len(list) should be n_layers. + n_filters : int or list of int, default = [6, 12] + Number of filters for each convolution layer, if not a list, the same n_filters + is used in all layers. + avg_pool_size : int or list of int, default = 3 + The size of the average pooling layer, if not a list, the same + max pooling size is used for all convolution layer. + activation : str or list of str, default = "sigmoid" + Keras activation function used in the model for each layer, if not a list, + the same activation is used for all layers. + padding : str or list of str, default = 'valid' + The method of padding in convolution layers, if not a list, the same padding + used for all convolution layers. + strides : int or list of int, default = 1 + The strides of kernels in the convolution and max pooling layers, if not a + list, the same strides are used for all layers. + dilation_rate : int or list of int, default = 1 + The dilation rate of the convolution layers, if not a list, the same dilation + rate is used all over the network. + use_bias : bool or list of bool, default = True + Condition on whether to use bias values for convolution layers, + if not a list, the same condition is used for all layers. + random_state : int, RandomState instance or None, default=None + If `int`, random_state is the seed used by the random number generator; + If `RandomState` instance, random_state is the random number generator; + If `None`, the random number generator is the `RandomState` instance used + by `np.random`. + Seeded random number generation can only be guaranteed on CPU processing, + GPU processing will be non-deterministic. + n_epochs : int, default = 2000 + The number of epochs to train the model. + batch_size : int, default = 16 + The number of samples per gradient update. + verbose : boolean, default = False + Whether to output extra information. + loss : string, default = "mean_squared_error" + Fit parameter for the keras model. + optimizer : keras.optimizer, default = keras.optimizers.Adam() + metrics : list of strings, default = ["accuracy"] + callbacks : keras.callbacks, default = model_checkpoint + To save best model on training loss. + file_path : file_path for the best model + Only used if checkpoint is used as callback. + save_best_model : bool, default = False + Whether to save the best model, if the modelcheckpoint callback is used by + default, this condition, if True, will prevent the automatic deletion of the + best saved model from file and the user can choose the file name. + save_last_model : bool, default = False + Whether to save the last model, last epoch trained, using the base class method + save_last_model_to_file. + save_init_model : bool, default = False + Whether to save the initialization of the model. + best_file_name : str, default = "best_model" + The name of the file of the best model, if save_best_model is set to False, + this parameter is discarded. + last_file_name : str, default = "last_model" + The name of the file of the last model, if save_last_model is set to False, + this parameter is discarded. + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. + + Notes + ----- + Adapted from the implementation from Fawaz et. al + https://github.com/hfawaz/dl-4-tsc/blob/master/classifiers/cnn.py + + References + ---------- + .. [1] Zhao et. al, Convolutional neural networks for time series classification, + Journal of Systems Engineering and Electronics, 28(1):2017. + + Examples + -------- + >>> from aeon.classification.deep_learning import TimeCNNClassifier + >>> from aeon.datasets import load_unit_test + >>> X_train, y_train = load_unit_test(split="train") + >>> X_test, y_test = load_unit_test(split="test") + >>> cnn = TimeCNNClassifier(n_epochs=20, batch_size=4) # doctest: +SKIP + >>> cnn.fit(X_train, y_train) # doctest: +SKIP + TimeCNNClassifier(...) + """ + + def __init__( + self, + n_layers=2, + kernel_size=7, + n_filters=None, + avg_pool_size=3, + activation="sigmoid", + padding="valid", + strides=1, + dilation_rate=1, + n_epochs=2000, + batch_size=16, + callbacks=None, + file_path="./", + save_best_model=False, + save_last_model=False, + save_init_model=False, + best_file_name="best_model", + last_file_name="last_model", + init_file_name="init_model", + verbose=False, + loss="mean_squared_error", + metrics=None, + random_state=None, + use_bias=True, + optimizer=None, + ): + self.n_layers = n_layers + self.kernel_size = kernel_size + self.n_filters = n_filters + self.padding = padding + self.strides = strides + self.dilation_rate = dilation_rate + self.avg_pool_size = avg_pool_size + self.activation = activation + self.use_bias = use_bias + + self.n_epochs = n_epochs + self.callbacks = callbacks + self.file_path = file_path + self.save_best_model = save_best_model + self.save_last_model = save_last_model + self.save_init_model = save_init_model + self.best_file_name = best_file_name + self.init_file_name = init_file_name + self.verbose = verbose + self.loss = loss + self.metrics = metrics + self.optimizer = optimizer + + self.history = None + + super().__init__( + batch_size=batch_size, + random_state=random_state, + last_file_name=last_file_name, + ) + + self._network = TimeCNNNetwork( + n_layers=self.n_layers, + kernel_size=self.kernel_size, + n_filters=self.n_filters, + avg_pool_size=self.avg_pool_size, + activation=self.activation, + padding=self.padding, + strides=self.strides, + dilation_rate=self.dilation_rate, + use_bias=self.use_bias, + ) + + def build_model(self, input_shape, n_classes, **kwargs): + """Construct a compiled, un-trained, keras model that is ready for training. + + In aeon, time series are stored in numpy arrays of shape (d, m), where d + is the number of dimensions, m is the series length. Keras/tensorflow assume + data is in shape (m, d). This method also assumes (m, d). Transpose should + happen in fit. + + Parameters + ---------- + input_shape : tuple + The shape of the data fed into the input layer, should be (m, d) + n_classes : int + The number of classes, which becomes the size of the output layer + + Returns + ------- + output : a compiled Keras Model + """ + import numpy as np + import tensorflow as tf + + if self.metrics is None: + metrics = ["accuracy"] + else: + metrics = self.metrics + + rng = check_random_state(self.random_state) + self.random_state_ = rng.randint(0, np.iinfo(np.int32).max) + tf.keras.utils.set_random_seed(self.random_state_) + input_layer, output_layer = self._network.build_network(input_shape, **kwargs) + + output_layer = tf.keras.layers.Dense( + units=n_classes, activation=self.activation, use_bias=self.use_bias + )(output_layer) + + self.optimizer_ = ( + tf.keras.optimizers.Adam() if self.optimizer is None else self.optimizer + ) + + model = tf.keras.models.Model(inputs=input_layer, outputs=output_layer) + model.compile( + loss=self.loss, + optimizer=self.optimizer_, + metrics=metrics, + ) + + return model + + def _fit(self, X, y): + """Fit the classifier on the training set (X, y). + + Parameters + ---------- + X : np.ndarray + The training input samples of shape (n_cases, n_channels, n_timepoints) + y : np.ndarray + The training data class labels of shape (n_cases,). + + + Returns + ------- + self : object + """ + import tensorflow as tf + + y_onehot = self.convert_y_to_keras(y) + # Transpose to conform to Keras input style. + X = X.transpose(0, 2, 1) + + self.input_shape = X.shape[1:] + self.training_model_ = self.build_model(self.input_shape, self.n_classes_) + + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/classification/deep_learning/_encoder.py b/aeon/classification/deep_learning/_encoder.py index 7f839456f2..2765c4cbbe 100644 --- a/aeon/classification/deep_learning/_encoder.py +++ b/aeon/classification/deep_learning/_encoder.py @@ -52,6 +52,8 @@ class EncoderClassifier(BaseDeepClassifier): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file. + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -60,6 +62,9 @@ class EncoderClassifier(BaseDeepClassifier): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded. + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. random_state : int, RandomState instance or None, default=None If `int`, random_state is the seed used by the random number generator; If `RandomState` instance, random_state is the random number generator; @@ -102,8 +107,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", verbose=False, loss="categorical_crossentropy", metrics=None, @@ -124,7 +131,9 @@ def __init__( self.file_path = file_path self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.n_epochs = n_epochs self.verbose = verbose self.loss = loss @@ -225,6 +234,9 @@ def _fit(self, X, y): self.input_shape = X.shape[1:] self.training_model_ = self.build_model(self.input_shape, self.n_classes_) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/classification/deep_learning/_fcn.py b/aeon/classification/deep_learning/_fcn.py index e77457646b..ea14036b87 100644 --- a/aeon/classification/deep_learning/_fcn.py +++ b/aeon/classification/deep_learning/_fcn.py @@ -1,6 +1,6 @@ -"""Fully Convolutional Network (FCN) for classification.""" +"""Fully Convolutional Network (FCN) classifier.""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = ["FCNClassifier"] import gc @@ -69,6 +69,8 @@ class FCNClassifier(BaseDeepClassifier): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file. + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -77,6 +79,10 @@ class FCNClassifier(BaseDeepClassifier): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded. + init_file_name : str, default = "init_model" + The name of the file of the init model, if + save_init_model is set to False, + this parameter is discarded. callbacks : keras.callbacks, default = None Notes @@ -112,8 +118,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", n_epochs=2000, batch_size=16, use_mini_batch_size=False, @@ -145,7 +153,9 @@ def __init__( self.file_path = file_path self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.history = None @@ -238,6 +248,9 @@ def _fit(self, X, y): self.input_shape = X.shape[1:] self.training_model_ = self.build_model(self.input_shape, self.n_classes_) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/classification/deep_learning/_inception_time.py b/aeon/classification/deep_learning/_inception_time.py index d5cc3cec56..6b377d565a 100644 --- a/aeon/classification/deep_learning/_inception_time.py +++ b/aeon/classification/deep_learning/_inception_time.py @@ -1,6 +1,6 @@ -"""InceptionTime classifier.""" +"""InceptionTime and Inception classifiers.""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = ["InceptionTimeClassifier"] import gc @@ -103,6 +103,8 @@ class InceptionTimeClassifier(BaseClassifier): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -111,6 +113,9 @@ class InceptionTimeClassifier(BaseClassifier): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. random_state : int, RandomState instance or None, default=None If `int`, random_state is the seed used by the random number generator; If `RandomState` instance, random_state is the random number generator; @@ -181,8 +186,10 @@ def __init__( file_path="./", save_last_model=False, save_best_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", batch_size=64, use_mini_batch_size=False, n_epochs=1500, @@ -218,8 +225,10 @@ def __init__( self.save_last_model = save_last_model self.save_best_model = save_best_model + self.save_init_model = save_init_model self.best_file_name = best_file_name self.last_file_name = last_file_name + self.init_file_name = init_file_name self.callbacks = callbacks self.random_state = random_state @@ -229,7 +238,7 @@ def __init__( self.metrics = metrics self.optimizer = optimizer - self.classifers_ = [] + self.classifiers_ = [] super().__init__() @@ -247,7 +256,7 @@ def _fit(self, X, y): ------- self : object """ - self.classifers_ = [] + self.classifiers_ = [] rng = check_random_state(self.random_state) for n in range(0, self.n_classifiers): @@ -269,8 +278,10 @@ def _fit(self, X, y): file_path=self.file_path, save_best_model=self.save_best_model, save_last_model=self.save_last_model, + save_init_model=self.save_init_model, best_file_name=self.best_file_name + str(n), last_file_name=self.last_file_name + str(n), + init_file_name=self.init_file_name + str(n), batch_size=self.batch_size, use_mini_batch_size=self.use_mini_batch_size, n_epochs=self.n_epochs, @@ -282,7 +293,7 @@ def _fit(self, X, y): verbose=self.verbose, ) cls.fit(X, y) - self.classifers_.append(cls) + self.classifiers_.append(cls) gc.collect() return self @@ -323,7 +334,7 @@ def _predict_proba(self, X) -> np.ndarray: """ probs = np.zeros((X.shape[0], self.n_classes_)) - for cls in self.classifers_: + for cls in self.classifiers_: probs += cls._predict_proba(X) probs = probs / self.n_classifiers @@ -437,14 +448,19 @@ class IndividualInceptionClassifier(BaseDeepClassifier): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter - is discarded + is discarded. last_file_name : str, default = "last_model" The name of the file of the last model, if save_last_model is set to False, this parameter - is discarded + is discarded. + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. random_state : int, RandomState instance or None, default=None If `int`, random_state is the seed used by the random number generator; If `RandomState` instance, random_state is the random number generator; @@ -504,8 +520,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", batch_size=64, use_mini_batch_size=False, n_epochs=1500, @@ -538,7 +556,9 @@ def __init__( self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.callbacks = callbacks self.verbose = verbose @@ -652,6 +672,9 @@ def _fit(self, X, y): mini_batch_size = self.batch_size self.training_model_ = self.build_model(self.input_shape, self.n_classes_) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/classification/deep_learning/_lite_time.py b/aeon/classification/deep_learning/_lite_time.py index ef2479a949..f16e136d71 100644 --- a/aeon/classification/deep_learning/_lite_time.py +++ b/aeon/classification/deep_learning/_lite_time.py @@ -1,6 +1,6 @@ -"""LITETime classifier.""" +"""LITETime and LITE classifiers.""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = ["LITETimeClassifier"] import gc @@ -60,6 +60,8 @@ class LITETimeClassifier(BaseClassifier): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -68,6 +70,9 @@ class LITETimeClassifier(BaseClassifier): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. random_state : int, RandomState instance or None, default=None If `int`, random_state is the seed used by the random number generator; If `RandomState` instance, random_state is the random number generator; @@ -120,8 +125,10 @@ def __init__( file_path="./", save_last_model=False, save_best_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", batch_size=64, use_mini_batch_size=False, n_epochs=1500, @@ -146,8 +153,10 @@ def __init__( self.save_last_model = save_last_model self.save_best_model = save_best_model + self.save_init_model = save_init_model self.best_file_name = best_file_name self.last_file_name = last_file_name + self.init_file_name = init_file_name self.callbacks = callbacks self.random_state = random_state @@ -157,7 +166,7 @@ def __init__( self.metrics = metrics self.optimizer = optimizer - self.classifers_ = [] + self.classifiers_ = [] super().__init__() @@ -175,7 +184,7 @@ def _fit(self, X, y): ------- self : object """ - self.classifers_ = [] + self.classifiers_ = [] rng = check_random_state(self.random_state) for n in range(0, self.n_classifiers): @@ -185,8 +194,10 @@ def _fit(self, X, y): file_path=self.file_path, save_best_model=self.save_best_model, save_last_model=self.save_last_model, + save_init_model=self.save_init_model, best_file_name=self.best_file_name + str(n), last_file_name=self.last_file_name + str(n), + init_file_name=self.init_file_name + str(n), batch_size=self.batch_size, use_mini_batch_size=self.use_mini_batch_size, n_epochs=self.n_epochs, @@ -198,7 +209,7 @@ def _fit(self, X, y): verbose=self.verbose, ) cls.fit(X, y) - self.classifers_.append(cls) + self.classifiers_.append(cls) gc.collect() return self @@ -239,7 +250,7 @@ def _predict_proba(self, X) -> np.ndarray: """ probs = np.zeros((X.shape[0], self.n_classes_)) - for cls in self.classifers_: + for cls in self.classifiers_: probs += cls._predict_proba(X) probs = probs / self.n_classifiers @@ -318,6 +329,8 @@ class IndividualLITEClassifier(BaseDeepClassifier): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -326,6 +339,9 @@ class IndividualLITEClassifier(BaseDeepClassifier): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. random_state : int, RandomState instance or None, default=None If `int`, random_state is the seed used by the random number generator; If `RandomState` instance, random_state is the random number generator; @@ -369,8 +385,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", batch_size=64, use_mini_batch_size=False, n_epochs=1500, @@ -393,7 +411,9 @@ def __init__( self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.callbacks = callbacks self.verbose = verbose @@ -495,6 +515,9 @@ def _fit(self, X, y): mini_batch_size = self.batch_size self.training_model_ = self.build_model(self.input_shape, self.n_classes_) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/classification/deep_learning/_mlp.py b/aeon/classification/deep_learning/_mlp.py index 1624aa9904..48eb8f711e 100644 --- a/aeon/classification/deep_learning/_mlp.py +++ b/aeon/classification/deep_learning/_mlp.py @@ -1,6 +1,6 @@ -"""Multi Layer Perceptron Network (MLP) for classification.""" +"""Multi Layer Perceptron Network (MLP) classifier.""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = ["MLPClassifier"] import gc @@ -51,6 +51,8 @@ class MLPClassifier(BaseDeepClassifier): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -59,6 +61,9 @@ class MLPClassifier(BaseDeepClassifier): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. optimizer : keras.optimizer, default=keras.optimizers.Adadelta(), metrics : list of strings, default=["accuracy"], activation : string or a tf callable, default="sigmoid" @@ -101,8 +106,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", random_state=None, activation="sigmoid", use_bias=True, @@ -119,7 +126,9 @@ def __init__( self.file_path = file_path self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.optimizer = optimizer self.history = None @@ -204,6 +213,9 @@ def _fit(self, X, y): self.input_shape = X.shape[1:] self.training_model_ = self.build_model(self.input_shape, self.n_classes_) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/classification/deep_learning/_resnet.py b/aeon/classification/deep_learning/_resnet.py index ccd6d11f0d..963faec26b 100644 --- a/aeon/classification/deep_learning/_resnet.py +++ b/aeon/classification/deep_learning/_resnet.py @@ -1,6 +1,6 @@ -"""Residual Network (ResNet) for classification.""" +"""Residual Network (ResNet) classifier.""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = ["ResNetClassifier"] import gc @@ -75,12 +75,17 @@ class ResNetClassifier(BaseDeepClassifier): save_last_model : bool, default = False Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file. + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter is discarded. last_file_name : str, default = "last_model" The name of the file of the last model, if save_last_model is set to False, this parameter is discarded. + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. verbose : boolean, default = False whether to output extra information loss : string, default = "mean_squared_error" @@ -131,8 +136,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", optimizer=None, ): self.n_residual_blocks = n_residual_blocks @@ -153,7 +160,9 @@ def __init__( self.file_path = file_path self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.optimizer = optimizer self.history = None @@ -250,6 +259,9 @@ def _fit(self, X, y): self.input_shape = X.shape[1:] self.training_model_ = self.build_model(self.input_shape, self.n_classes_) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/classification/deep_learning/_tapnet.py b/aeon/classification/deep_learning/_tapnet.py index 4bfe51e4c2..2ad048827b 100644 --- a/aeon/classification/deep_learning/_tapnet.py +++ b/aeon/classification/deep_learning/_tapnet.py @@ -1,6 +1,6 @@ -"""Time Convolutional Neural Network (CNN) for classification.""" +"""Time series Attentional Prototype Network (TapNet) Classifier.""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = [ "TapNetClassifier", ] diff --git a/aeon/classification/deep_learning/base.py b/aeon/classification/deep_learning/base.py index 45974298a0..837945eead 100644 --- a/aeon/classification/deep_learning/base.py +++ b/aeon/classification/deep_learning/base.py @@ -5,7 +5,7 @@ because we can generalise tags, _predict and _predict_proba """ -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = ["BaseDeepClassifier"] from abc import ABC, abstractmethod diff --git a/aeon/classification/deep_learning/tests/test_deep_classifier_base.py b/aeon/classification/deep_learning/tests/test_deep_classifier_base.py index 203c256eef..e0c4157c7a 100644 --- a/aeon/classification/deep_learning/tests/test_deep_classifier_base.py +++ b/aeon/classification/deep_learning/tests/test_deep_classifier_base.py @@ -10,7 +10,7 @@ from aeon.testing.data_generation import make_example_2d_numpy_collection from aeon.utils.validation._dependencies import _check_soft_dependencies -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] class _DummyDeepClassifier(BaseDeepClassifier): diff --git a/aeon/classification/deep_learning/tests/test_random_state_deep_learning.py b/aeon/classification/deep_learning/tests/test_random_state_deep_learning.py index 4647de8b00..b426169cce 100644 --- a/aeon/classification/deep_learning/tests/test_random_state_deep_learning.py +++ b/aeon/classification/deep_learning/tests/test_random_state_deep_learning.py @@ -12,35 +12,37 @@ __maintainer__ = ["hadifawaz1999"] +_deep_cls_classes = [ + member[1] for member in inspect.getmembers(deep_learning, inspect.isclass) +] + + @pytest.mark.skipif( not _check_soft_dependencies(["tensorflow"], severity="none"), reason="skip test if required soft dependency not available", ) -def test_random_state_deep_learning_cls(): +@pytest.mark.parametrize("deep_cls", _deep_cls_classes) +def test_random_state_deep_learning_cls(deep_cls): """Test Deep Classifier seeding.""" - random_state = 42 - - X, y = make_example_3d_numpy(random_state=random_state) - - deep_cls_classes = [ - member[1] for member in inspect.getmembers(deep_learning, inspect.isclass) - ] - - for i in range(len(deep_cls_classes)): - if ( - "BaseDeepClassifier" in str(deep_cls_classes[i]) - or "InceptionTimeClassifier" in str(deep_cls_classes[i]) - or "LITETimeClassifier" in str(deep_cls_classes[i]) - or "TapNetClassifier" in str(deep_cls_classes[i]) - ): - continue - - deep_cls1 = deep_cls_classes[i](random_state=random_state, n_epochs=4) + if not ( + deep_cls.__name__ + in [ + "BaseDeepClassifier", + "InceptionTimeClassifier", + "LITETimeClassifier", + "TapNetClassifier", + ] + ): + random_state = 42 + + X, y = make_example_3d_numpy(random_state=random_state) + + deep_cls1 = deep_cls(random_state=random_state, n_epochs=4) deep_cls1.fit(X, y) layers1 = deep_cls1.training_model_.layers[1:] - deep_cls2 = deep_cls_classes[i](random_state=random_state, n_epochs=4) + deep_cls2 = deep_cls(random_state=random_state, n_epochs=4) deep_cls2.fit(X, y) layers2 = deep_cls2.training_model_.layers[1:] diff --git a/aeon/classification/deep_learning/tests/test_saving_loading_deep_learning_cls.py b/aeon/classification/deep_learning/tests/test_saving_loading_deep_learning_cls.py new file mode 100644 index 0000000000..d90393a369 --- /dev/null +++ b/aeon/classification/deep_learning/tests/test_saving_loading_deep_learning_cls.py @@ -0,0 +1,83 @@ +"""Unit tests for classifiers deep learners save/load functionalities.""" + +import inspect +import os +import tempfile +import time + +import numpy as np +import pytest + +from aeon.classification import deep_learning +from aeon.testing.data_generation import make_example_3d_numpy +from aeon.utils.validation._dependencies import _check_soft_dependencies + +__maintainer__ = ["hadifawaz1999"] + + +_deep_cls_classes = [ + member[1] for member in inspect.getmembers(deep_learning, inspect.isclass) +] + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="skip test if required soft dependency not available", +) +@pytest.mark.parametrize("deep_cls", _deep_cls_classes) +def test_saving_loading_deep_learning_cls(deep_cls): + """Test Deep Classifier saving.""" + with tempfile.TemporaryDirectory() as tmp: + if not ( + deep_cls.__name__ + in [ + "BaseDeepClassifier", + "InceptionTimeClassifier", + "LITETimeClassifier", + "TapNetClassifier", + ] + ): + if tmp[-1] != "/": + tmp = tmp + "/" + curr_time = str(time.time_ns()) + last_file_name = curr_time + "last" + best_file_name = curr_time + "best" + init_file_name = curr_time + "init" + + X, y = make_example_3d_numpy() + + deep_cls_train = deep_cls( + n_epochs=2, + save_best_model=True, + save_last_model=True, + save_init_model=True, + best_file_name=best_file_name, + last_file_name=last_file_name, + init_file_name=init_file_name, + file_path=tmp, + ) + deep_cls_train.fit(X, y) + + deep_cls_best = deep_cls() + deep_cls_best.load_model( + model_path=os.path.join(tmp, best_file_name + ".keras"), + classes=np.unique(y), + ) + ypred_best = deep_cls_best.predict(X) + assert len(ypred_best) == len(y) + + deep_cls_last = deep_cls() + deep_cls_last.load_model( + model_path=os.path.join(tmp, last_file_name + ".keras"), + classes=np.unique(y), + ) + ypred_last = deep_cls_last.predict(X) + assert len(ypred_last) == len(y) + + deep_cls_init = deep_cls() + deep_cls_init.load_model( + model_path=os.path.join(tmp, init_file_name + ".keras"), + classes=np.unique(y), + ) + ypred_init = deep_cls_init.predict(X) + assert len(ypred_init) == len(y) diff --git a/aeon/classification/dictionary_based/__init__.py b/aeon/classification/dictionary_based/__init__.py index 832eae10a5..29f5970edc 100644 --- a/aeon/classification/dictionary_based/__init__.py +++ b/aeon/classification/dictionary_based/__init__.py @@ -10,10 +10,12 @@ "WEASEL_V2", "MUSE", "REDCOMETS", + "MrSQMClassifier", ] from aeon.classification.dictionary_based._boss import BOSSEnsemble, IndividualBOSS from aeon.classification.dictionary_based._cboss import ContractableBOSS +from aeon.classification.dictionary_based._mrsqm import MrSQMClassifier from aeon.classification.dictionary_based._muse import MUSE from aeon.classification.dictionary_based._redcomets import REDCOMETS from aeon.classification.dictionary_based._tde import ( diff --git a/aeon/classification/shapelet_based/_mrsqm.py b/aeon/classification/dictionary_based/_mrsqm.py similarity index 92% rename from aeon/classification/shapelet_based/_mrsqm.py rename to aeon/classification/dictionary_based/_mrsqm.py index a3b1d4cc57..6c9304a144 100644 --- a/aeon/classification/shapelet_based/_mrsqm.py +++ b/aeon/classification/dictionary_based/_mrsqm.py @@ -1,13 +1,12 @@ """Multiple Representations Sequence Miner (MrSQM) Classifier.""" -__maintainer__ = [] +__maintainer__ = ["TonyBagnall"] __all__ = ["MrSQMClassifier"] from typing import List, Union import numpy as np import pandas as pd -from deprecated.sphinx import deprecated from aeon.classification import BaseClassifier @@ -31,13 +30,6 @@ def _from_numpy3d_to_nested_dataframe(X): return df -# TODO: Move in v0.11.0 -@deprecated( - version="0.10.0", - reason="MrSQMClassifier will be moved to the dictionary_based package in version " - "0.11.0 at the request of the author.", - category=FutureWarning, -) class MrSQMClassifier(BaseClassifier): """ Multiple Representations Sequence Miner (MrSQM) classifier. @@ -46,7 +38,7 @@ class MrSQMClassifier(BaseClassifier): MrSQM is not included in all extras as it requires gcc and fftw (http://www.fftw.org/index.html) to be installed for Windows and some Linux OS. - Overview: MrSQM is an efficient time series classifier utilising symbolic + Overview: MrSQM is a time series classifier utilising symbolic representations of time series. MrSQM implements four different feature selection strategies (R,S,RS,SR) that can quickly select subsequences from multiple symbolic representations of time series data. @@ -92,7 +84,7 @@ class MrSQMClassifier(BaseClassifier): Examples -------- - >>> from aeon.classification.shapelet_based import MrSQMClassifier + >>> from aeon.classification.dictionary_based import MrSQMClassifier >>> from aeon.testing.data_generation import make_example_3d_numpy >>> X, y = make_example_3d_numpy(random_state=0) >>> clf = MrSQMClassifier(random_state=0) # doctest: +SKIP @@ -102,7 +94,7 @@ class MrSQMClassifier(BaseClassifier): """ _tags = { - "X_inner_type": "numpy3D", # we don't like this, but it's the only input! + "X_inner_type": "numpy3D", "algorithm_type": "shapelet", "cant-pickle": True, "python_dependencies": "mrsqm", diff --git a/aeon/classification/shapelet_based/__init__.py b/aeon/classification/shapelet_based/__init__.py index 28702fe52f..3b76ddddec 100644 --- a/aeon/classification/shapelet_based/__init__.py +++ b/aeon/classification/shapelet_based/__init__.py @@ -1,7 +1,6 @@ """Shapelet based time series classifiers.""" __all__ = [ - "MrSQMClassifier", "ShapeletTransformClassifier", "RDSTClassifier", "SASTClassifier", @@ -10,7 +9,6 @@ ] from aeon.classification.shapelet_based._ls import LearningShapeletClassifier -from aeon.classification.shapelet_based._mrsqm import MrSQMClassifier from aeon.classification.shapelet_based._rdst import RDSTClassifier from aeon.classification.shapelet_based._rsast import RSASTClassifier from aeon.classification.shapelet_based._sast import SASTClassifier diff --git a/aeon/clustering/_k_means.py b/aeon/clustering/_k_means.py index c7fadd2f8e..8c46a5df9c 100644 --- a/aeon/clustering/_k_means.py +++ b/aeon/clustering/_k_means.py @@ -326,6 +326,9 @@ def _check_params(self, X: np.ndarray) -> None: # Invalid distance passed for ba so default to dba self._average_params["distance"] = "dtw" + if "random_state" not in self._average_params: + self._average_params["random_state"] = self._random_state + self._averaging_method = _resolve_average_callable(self.averaging_method) if self.n_clusters > X.shape[0]: diff --git a/aeon/clustering/deep_learning/_ae_fcn.py b/aeon/clustering/deep_learning/_ae_fcn.py index 322a614a01..a9f33751ce 100644 --- a/aeon/clustering/deep_learning/_ae_fcn.py +++ b/aeon/clustering/deep_learning/_ae_fcn.py @@ -1,6 +1,6 @@ """Deep Learning Auto-Encoder using FCN Network.""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = ["AEFCNClusterer"] import gc diff --git a/aeon/clustering/deep_learning/_ae_resnet.py b/aeon/clustering/deep_learning/_ae_resnet.py index 19ab56549c..2d1ccf13e0 100644 --- a/aeon/clustering/deep_learning/_ae_resnet.py +++ b/aeon/clustering/deep_learning/_ae_resnet.py @@ -1,6 +1,6 @@ """Residual Network (ResNet) for clustering.""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = ["AEResNetClusterer"] import gc diff --git a/aeon/clustering/deep_learning/tests/test_deep_clusterer_base.py b/aeon/clustering/deep_learning/tests/test_deep_clusterer_base.py index cc8e952e85..421aa69a85 100644 --- a/aeon/clustering/deep_learning/tests/test_deep_clusterer_base.py +++ b/aeon/clustering/deep_learning/tests/test_deep_clusterer_base.py @@ -9,7 +9,7 @@ from aeon.testing.mock_estimators import MockDeepClusterer from aeon.utils.validation._dependencies import _check_soft_dependencies -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] @pytest.mark.skipif( diff --git a/aeon/datasets/_data_loaders.py b/aeon/datasets/_data_loaders.py index 4f16e02eea..19f83ff52b 100644 --- a/aeon/datasets/_data_loaders.py +++ b/aeon/datasets/_data_loaders.py @@ -962,7 +962,7 @@ def load_forecasting(name, extract_path=None, return_metadata=False): extract_path : optional (default = None) Path of the location for the data file. If none, data is written to os.path.dirname(__file__)/data/ - return_metadata : boolean, default = True + return_metadata : boolean, default = False If True, returns a tuple (data, metadata) Returns diff --git a/aeon/forecasting/compose/_reduce.py b/aeon/forecasting/compose/_reduce.py index 05335e5217..8a6a1551cc 100644 --- a/aeon/forecasting/compose/_reduce.py +++ b/aeon/forecasting/compose/_reduce.py @@ -27,7 +27,7 @@ from aeon.forecasting.base._fh import _index_range from aeon.regression.base import BaseRegressor from aeon.transformations._legacy.compose import FeatureUnion -from aeon.transformations.summarize import WindowSummarizer +from aeon.transformations._legacy.summarize import WindowSummarizer from aeon.utils.datetime import _shift from aeon.utils.index_functions import get_time_index from aeon.utils.sklearn import is_sklearn_regressor @@ -1839,7 +1839,7 @@ def _predict(self, X=None, fh=None): def _fit_shifted(self, y, X=None, fh=None): """Fit to training data.""" - from aeon.transformations.lag import Lag, ReducerTransform + from aeon.transformations._legacy.lag import Lag, ReducerTransform impute_method = self.impute_method lags = self._lags @@ -1918,7 +1918,7 @@ def _predict_shifted(self, fh=None, X=None): def _fit_concurrent(self, y, X=None, fh=None): """Fit to training data.""" - from aeon.transformations.lag import Lag, ReducerTransform + from aeon.transformations._legacy.lag import Lag, ReducerTransform impute_method = self.impute_method @@ -1983,7 +1983,7 @@ def _fit_concurrent(self, y, X=None, fh=None): def _predict_concurrent(self, X=None, fh=None): """Fit to training data.""" - from aeon.transformations.lag import Lag + from aeon.transformations._legacy.lag import Lag if X is not None and self._X is not None: X_pool = X.combine_first(self._X) @@ -2178,8 +2178,8 @@ def _fit(self, y, X=None, fh=None): ------- self : reference to self """ - from aeon.transformations._legacy.impute import Imputer - from aeon.transformations.lag import Lag + from aeon.transformations._legacy.lag import Lag + from aeon.transformations.impute import Imputer impute_method = self.impute_method @@ -2265,8 +2265,8 @@ def _predict(self, X=None, fh=None): def _predict_out_of_sample(self, X_pool, fh): """Recursive reducer: predict out of sample (ahead of cutoff).""" # very similar to _predict_concurrent of DirectReductionForecaster - refactor? - from aeon.transformations._legacy.impute import Imputer - from aeon.transformations.lag import Lag + from aeon.transformations._legacy.lag import Lag + from aeon.transformations.impute import Imputer fh_idx = self._get_expected_pred_idx(fh=fh) y_cols = self._y.columns @@ -2332,8 +2332,8 @@ def _predict_out_of_sample(self, X_pool, fh): def _predict_in_sample(self, X_pool, fh): """Recursive reducer: predict out of sample (in past of of cutoff).""" - from aeon.transformations._legacy.impute import Imputer - from aeon.transformations.lag import Lag + from aeon.transformations._legacy.lag import Lag + from aeon.transformations.impute import Imputer fh_idx = self._get_expected_pred_idx(fh=fh) y_cols = self._y.columns diff --git a/aeon/forecasting/compose/tests/test_pipeline.py b/aeon/forecasting/compose/tests/test_pipeline.py index 82e75b36ce..d0bc059a4c 100644 --- a/aeon/forecasting/compose/tests/test_pipeline.py +++ b/aeon/forecasting/compose/tests/test_pipeline.py @@ -19,10 +19,10 @@ from aeon.testing.mock_estimators import MockForecaster, MockTransformer from aeon.testing.utils.estimator_checks import _assert_array_almost_equal from aeon.transformations._legacy.adapt import TabularToSeriesAdaptor -from aeon.transformations._legacy.impute import Imputer +from aeon.transformations._legacy.outlier_detection import HampelFilter from aeon.transformations.detrend import Detrender from aeon.transformations.hierarchical.aggregate import Aggregator -from aeon.transformations.outlier_detection import HampelFilter +from aeon.transformations.impute import Imputer from aeon.utils.index_functions import get_window from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/forecasting/compose/tests/test_reduce_global.py b/aeon/forecasting/compose/tests/test_reduce_global.py deleted file mode 100644 index 042f6a9cfb..0000000000 --- a/aeon/forecasting/compose/tests/test_reduce_global.py +++ /dev/null @@ -1,306 +0,0 @@ -"""Test extraction of features across (shifted) windows.""" - -__maintainer__ = [] - -from aeon.utils.validation._dependencies import _check_soft_dependencies - -# HistGradientBoostingRegressor requires experimental flag in old sklearn versions -if _check_soft_dependencies("sklearn<1.0", severity="none"): - from sklearn.experimental import enable_hist_gradient_boosting # noqa - -import numpy as np -import pandas as pd -import pytest -from sklearn.ensemble import RandomForestRegressor -from sklearn.linear_model import LinearRegression -from sklearn.pipeline import make_pipeline - -from aeon.datasets import load_airline -from aeon.forecasting.base import ForecastingHorizon -from aeon.forecasting.compose import make_reduction -from aeon.forecasting.model_selection import temporal_train_test_split -from aeon.performance_metrics.forecasting import mean_absolute_percentage_error -from aeon.testing.data_generation import _make_hierarchical -from aeon.testing.data_generation._legacy import get_examples -from aeon.transformations.summarize import WindowSummarizer - -# Load data that will be the basis of tests -y = load_airline() -y_multi = get_examples("pd-multiindex")[0] - -# y Train will be univariate data set -y_train, y_test = temporal_train_test_split(y) - -# y_int = y.copy() -# y_int.index = [i for i in range(len(y_int))] - -# y_train_int, y_test_int = temporal_train_test_split(y_int) - -# Create traina nd test panel sample data -mi = pd.MultiIndex.from_product([[0], y_train.index], names=["instances", "timepoints"]) -y_group1 = pd.DataFrame(y_train.values, index=mi, columns=["y"]) - -mi = pd.MultiIndex.from_product([[1], y_train.index], names=["instances", "timepoints"]) -y_group2 = pd.DataFrame(y_train.values, index=mi, columns=["y"]) - -y_train_grp = pd.concat([y_group1, y_group2]) - -mi = pd.MultiIndex.from_product([[0], y_test.index], names=["instances", "timepoints"]) -y_group1 = pd.DataFrame(y_test.values, index=mi, columns=["y"]) - -mi = pd.MultiIndex.from_product([[1], y_test.index], names=["instances", "timepoints"]) -y_group2 = pd.DataFrame(y_test.values, index=mi, columns=["y"]) - -y_test_grp = pd.concat([y_group1, y_group2]) - -# Get hierachical data -y_train_hier = get_examples("pd_multiindex_hier")[0] - -# Create unbalanced hierachical data -X = y_train_hier.reset_index().copy() -X = X[~((X["bar"] == 2) & (X["foo"] == "b"))] -X = X[["foo", "bar"]].drop_duplicates() - -time_names = y_train.index.names[-1] -timeframe = y_train.index.to_frame() - -X2 = X.merge(timeframe, how="cross") - -freq_inferred = y_train.index.freq - -x_names = X.columns -if not isinstance(x_names, list): - x_names = x_names.to_list() - -y_train_reset = y_train.reset_index() - -X3 = X2.merge(y_train_reset, on="Period") - -freq_inferred = y_train.index.freq - -y_train_hier_unequal = X3.groupby(x_names, as_index=True).apply( - lambda df: df.drop(x_names, axis=1).set_index(time_names).asfreq(freq_inferred) -) - -# Create integer index data -y_numeric = y_train.copy() -y_numeric.index = pd.to_numeric(y_numeric.index) - - -# Get different WindowSummarizer functions -kwargs = WindowSummarizer.get_test_params()[0] -kwargs_alternames = WindowSummarizer.get_test_params()[1] -kwargs_variant = WindowSummarizer.get_test_params()[2] - - -def check_eval(test_input, expected): - """Test which columns are returned for different arguments. - - For a detailed description what these arguments do, - and how theyinteract see docstring of DateTimeFeatures. - """ - if test_input is not None: - assert len(test_input) == len(expected) - assert all([a == b for a, b in zip(test_input, expected)]) - else: - assert expected is None - - -@pytest.mark.parametrize( - "y, index_names", - [ - ( - y_train_grp, - ["instances", "timepoints"], - ), - ( - y_train, - [None], - ), - ( - y_numeric, - [None], - ), - ( - y_train_hier_unequal, - ["foo", "bar", "Period"], - ), - ], -) -def test_recursive_reduction(y, index_names): - """Test index columns match input.""" - regressor = make_pipeline( - RandomForestRegressor(random_state=1), - ) - - forecaster2 = make_reduction( - regressor, - scitype="tabular-regressor", - transformers=[WindowSummarizer(**kwargs, n_jobs=1)], - window_length=None, - strategy="recursive", - pooling="global", - ) - - forecaster2.fit(y, fh=[1, 2]) - y_pred = forecaster2.predict(fh=[1, 2, 12]) - check_eval(y_pred.index.names, index_names) - - -@pytest.mark.parametrize( - "y, index_names", - [ - ( - y_train_grp, - ["instances", "timepoints"], - ), - ( - y_train, - [None], - ), - ( - y_numeric, - [None], - ), - ( - y_train_hier_unequal, - ["foo", "bar", "Period"], - ), - ], -) -def test_direct_reduction(y, index_names): - """Test index columns match input.""" - regressor = make_pipeline( - RandomForestRegressor(random_state=1), - ) - - forecaster2 = make_reduction( - regressor, - transformers=[WindowSummarizer(**kwargs, n_jobs=1)], - window_length=None, - strategy="recursive", - pooling="global", - ) - - forecaster2.fit(y, fh=[1, 2]) - y_pred = forecaster2.predict(fh=[1, 2, 12]) - check_eval(y_pred.index.names, index_names) - - -@pytest.mark.parametrize( - "y, index_names", - [ - ( - y_train_grp, - ["instances", "timepoints"], - ), - ( - y_train, - [None], - ), - ( - y_numeric, - [None], - ), - ( - y_train_hier_unequal, - ["foo", "bar", "Period"], - ), - ], -) -def test_list_reduction(y, index_names): - """Test index columns match input.""" - regressor = make_pipeline( - RandomForestRegressor(random_state=1), - ) - - forecaster2 = make_reduction( - regressor, - transformers=[WindowSummarizer(**kwargs), WindowSummarizer(**kwargs_variant)], - window_length=None, - strategy="recursive", - pooling="global", - ) - - forecaster2.fit(y, fh=[1, 2, 12]) - y_pred = forecaster2.predict(fh=[1, 2, 12]) - check_eval(y_pred.index.names, index_names) - - -def test_equality_transfo_nontranso(): - """Test that recursive reducers return same results for global / local forecasts.""" - y = load_airline()[:36] - y_train, y_test = temporal_train_test_split(y, test_size=12) - fh = ForecastingHorizon(y_test.index, is_relative=False) - - lag_vec = list(range(12, 0, -1)) - kwargs = { - "lag_feature": { - "lag": lag_vec, - } - } - - regressor = RandomForestRegressor(random_state=42) - - forecaster = make_reduction(regressor, window_length=12, strategy="recursive") - forecaster.fit(y_train) - y_pred = forecaster.predict(fh) - recursive_without = mean_absolute_percentage_error(y_test, y_pred, symmetric=False) - forecaster = make_reduction( - regressor, - window_length=None, - strategy="recursive", - transformers=[WindowSummarizer(**kwargs, n_jobs=1)], - pooling="global", - ) - - forecaster.fit(y_train) - y_pred = forecaster.predict(fh) - recursive_global = mean_absolute_percentage_error(y_test, y_pred, symmetric=False) - np.testing.assert_almost_equal(recursive_without, recursive_global) - - -def test_nofreq_pass(): - """Test that recursive reducers return same results with / without freq given.""" - regressor = make_pipeline( - LinearRegression(), - ) - - kwargs = { - "lag_feature": { - "lag": [1], - } - } - - forecaster_global = make_reduction( - regressor, - scitype="tabular-regressor", - transformers=[WindowSummarizer(**kwargs, n_jobs=1, truncate="bfill")], - window_length=None, - strategy="recursive", - pooling="global", - ) - - forecaster_global_freq = make_reduction( - regressor, - scitype="tabular-regressor", - transformers=[WindowSummarizer(**kwargs, n_jobs=1, truncate="bfill")], - window_length=None, - strategy="recursive", - pooling="global", - ) - - y = _make_hierarchical( - hierarchy_levels=(100,), min_timepoints=1000, max_timepoints=1000 - ) - - y_no_freq = y.reset_index().set_index(["h0", "time"]) - - forecaster_global.fit(y) - forecaster_global_freq.fit(y_no_freq) - - y_pred_global = forecaster_global.predict(fh=[1, 2]) - y_pred_nofreq = forecaster_global_freq.predict(fh=[1, 2]) - np.testing.assert_almost_equal( - y_pred_global["c0"].values, y_pred_nofreq["c0"].values - ) diff --git a/aeon/networks/__init__.py b/aeon/networks/__init__.py index b7ec3ca75a..df56152dea 100644 --- a/aeon/networks/__init__.py +++ b/aeon/networks/__init__.py @@ -4,6 +4,7 @@ "BaseDeepNetwork", "BaseDeepLearningNetwork", "CNNNetwork", + "TimeCNNNetwork", "EncoderNetwork", "FCNNetwork", "InceptionNetwork", @@ -18,7 +19,7 @@ from aeon.networks._ae_bgru import AEBiGRUNetwork from aeon.networks._ae_fcn import AEFCNNetwork from aeon.networks._ae_resnet import AEResNetNetwork -from aeon.networks._cnn import CNNNetwork +from aeon.networks._cnn import CNNNetwork, TimeCNNNetwork from aeon.networks._encoder import EncoderNetwork from aeon.networks._fcn import FCNNetwork from aeon.networks._inception import InceptionNetwork diff --git a/aeon/networks/_ae_bgru.py b/aeon/networks/_ae_bgru.py index 7fc2616f5f..5e2e78a71e 100644 --- a/aeon/networks/_ae_bgru.py +++ b/aeon/networks/_ae_bgru.py @@ -1,4 +1,6 @@ -"""Implement Auto-Encoder based on Bidirectional GRUs.""" +"""Auto-Encoder using Bidirectional GRU Network (AEBiGRUNetwork).""" + +__maintainer__ = ["aadya940", "hadifawaz1999"] from aeon.networks.base import BaseDeepLearningNetwork diff --git a/aeon/networks/_ae_fcn.py b/aeon/networks/_ae_fcn.py index 37bbdf1aa5..1cef7d5c31 100644 --- a/aeon/networks/_ae_fcn.py +++ b/aeon/networks/_ae_fcn.py @@ -1,6 +1,6 @@ """Auto-Encoder using Fully Convolutional Network (FCN).""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] import numpy as np diff --git a/aeon/networks/_ae_resnet.py b/aeon/networks/_ae_resnet.py index 3b817efe8e..4578dd8092 100644 --- a/aeon/networks/_ae_resnet.py +++ b/aeon/networks/_ae_resnet.py @@ -1,4 +1,7 @@ -"""Residual Network (ResNet) (minus the final output layer).""" +"""Auto-Encoder using Residual Network (AEResNetNetwork).""" + +__maintainer__ = ["hadifawaz1999"] + import numpy as np diff --git a/aeon/networks/_cnn.py b/aeon/networks/_cnn.py index cb40b8f373..aafb9875c4 100644 --- a/aeon/networks/_cnn.py +++ b/aeon/networks/_cnn.py @@ -1,10 +1,19 @@ -"""Time Convolutional Neural Network (CNN) (minus the final output layer).""" +"""Time Convolutional Neural Network (TimeCNNNetwork).""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] + +from deprecated.sphinx import deprecated from aeon.networks.base import BaseDeepLearningNetwork +# TODO: remove v0.12.0 +@deprecated( + version="0.10.0", + reason="CNNNetwork has been renamed to TimeCNNNetwork" + "and will be removed in 0.12.0.", + category=FutureWarning, +) class CNNNetwork(BaseDeepLearningNetwork): """Establish the network structure for a CNN. @@ -167,3 +176,167 @@ def build_network(self, input_shape, **kwargs): flatten_layer = tf.keras.layers.Flatten()(conv) return input_layer, flatten_layer + + +class TimeCNNNetwork(BaseDeepLearningNetwork): + """Establish the network structure for a CNN. + + Adapted from the implementation used in [1]_. + + Parameters + ---------- + n_layers : int, default = 2 + The number of convolution layers in the network. + kernel_size : int or list of int, default = 7 + Kernel size of convolution layers, if not a list, the same kernel size is + used for all layer, len(list) should be n_layers. + n_filters : int or list of int, default = [6, 12] + Number of filters for each convolution layer, if not a list, the same + `n_filters` is used in all layers. + avg_pool_size : int or list of int, default = 3 + The size of the average pooling layer, if not a list, the same max pooling + size is used for all convolution layer. + activation : str or list of str, default = "sigmoid" + Keras activation function used in the model for each layer, if not a list, + the same activation is used for all layers. + padding : str or list of str, default = "valid" + The method of padding in convolution layers, if not a list, the same padding + used for all convolution layers. + strides : int or list of int, default = 1 + The strides of kernels in the convolution and max pooling layers, if not a list, + the same strides are used for all layers. + dilation_rate : int or list of int, default = 1 + The dilation rate of the convolution layers, if not a list, the same dilation + rate is used all over the network. + use_bias : bool or list of bool, default = True + Condition on whether or not to use bias values for convolution layers, if not + a list, the same condition is used for all layers. + + Notes + ----- + Adapted from source code + https://github.com/hfawaz/dl-4-tsc/blob/master/classifiers/cnn.py + + References + ---------- + .. [1] Zhao et al. Convolutional neural networks for time series classification, + Journal of Systems Engineering and Electronics 28(1), 162--169, 2017 + """ + + def __init__( + self, + n_layers=2, + kernel_size=7, + n_filters=None, + avg_pool_size=3, + activation="sigmoid", + padding="valid", + strides=1, + dilation_rate=1, + use_bias=True, + ): + self.n_layers = n_layers + self.n_filters = n_filters + self.kernel_size = kernel_size + self.avg_pool_size = avg_pool_size + self.activation = activation + self.padding = padding + self.strides = strides + self.dilation_rate = dilation_rate + self.use_bias = use_bias + + super().__init__() + + def build_network(self, input_shape, **kwargs): + """ + Construct a network and return its input and output layers. + + Parameters + ---------- + input_shape : tuple + The shape of the data fed into the input layer. + + Returns + ------- + input_layer : a keras layer + output_layer : a keras layer + """ + import tensorflow as tf + + self._n_filters_ = [6, 12] if self.n_filters is None else self.n_filters + + if isinstance(self.kernel_size, list): + assert len(self.kernel_size) == self.n_layers + self._kernel_size = self.kernel_size + else: + self._kernel_size = [self.kernel_size] * self.n_layers + + if isinstance(self._n_filters_, list): + assert len(self._n_filters_) == self.n_layers + self._n_filters = self._n_filters_ + else: + self._n_filters = [self._n_filters_] * self.n_layers + + if isinstance(self.avg_pool_size, list): + assert len(self.avg_pool_size) == self.n_layers + self._avg_pool_size = self.avg_pool_size + else: + self._avg_pool_size = [self.avg_pool_size] * self.n_layers + + if isinstance(self.activation, list): + assert len(self.activation) == self.n_layers + self._activation = self.activation + else: + self._activation = [self.activation] * self.n_layers + + if isinstance(self.padding, list): + assert len(self.padding) == self.n_layers + self._padding = self.padding + else: + self._padding = [self.padding] * self.n_layers + + if isinstance(self.strides, list): + assert len(self.strides) == self.n_layers + self._strides = self.strides + else: + self._strides = [self.strides] * self.n_layers + + if isinstance(self.dilation_rate, list): + assert len(self.dilation_rate) == self.n_layers + self._dilation_rate = self.dilation_rate + else: + self._dilation_rate = [self.dilation_rate] * self.n_layers + + if isinstance(self.use_bias, list): + assert len(self.use_bias) == self.n_layers + self._use_bias = self.use_bias + else: + self._use_bias = [self.use_bias] * self.n_layers + + input_layer = tf.keras.layers.Input(input_shape) + + if input_shape[0] < 60: + self._padding = ["same"] * self.n_layers + + x = input_layer + + for i in range(self.n_layers): + conv = tf.keras.layers.Conv1D( + filters=self._n_filters[i], + kernel_size=self._kernel_size[i], + strides=self._strides[i], + padding=self._padding[i], + dilation_rate=self._dilation_rate[i], + activation=self._activation[i], + use_bias=self._use_bias[i], + )(x) + + conv = tf.keras.layers.AveragePooling1D(pool_size=self._avg_pool_size[i])( + conv + ) + + x = conv + + flatten_layer = tf.keras.layers.Flatten()(conv) + + return input_layer, flatten_layer diff --git a/aeon/networks/_encoder.py b/aeon/networks/_encoder.py index cc600aa932..e2423fc03c 100644 --- a/aeon/networks/_encoder.py +++ b/aeon/networks/_encoder.py @@ -1,4 +1,4 @@ -"""Encoder Classifier.""" +"""Encoder Network (EncoderNetwork).""" __maintainer__ = ["hadifawaz1999"] @@ -46,7 +46,7 @@ class EncoderNetwork(BaseDeepLearningNetwork): """ _config = { - "python_dependencies": ["tensorflow", "tensorflow-addons"], + "python_dependencies": ["tensorflow"], "python_version": "<3.12", "structure": "encoder", } diff --git a/aeon/networks/_fcn.py b/aeon/networks/_fcn.py index 9b6c10de96..fa4e3f763a 100644 --- a/aeon/networks/_fcn.py +++ b/aeon/networks/_fcn.py @@ -1,6 +1,7 @@ -"""Fully Convolutional Network (FCN) (minus the final output layer).""" +"""Fully Convolutional Network (FCNNetwork).""" + +__maintainer__ = ["hadifawaz1999"] -__maintainer__ = [] from aeon.networks.base import BaseDeepLearningNetwork diff --git a/aeon/networks/_inception.py b/aeon/networks/_inception.py index 1b209dd8dc..4c8abaa449 100644 --- a/aeon/networks/_inception.py +++ b/aeon/networks/_inception.py @@ -1,6 +1,6 @@ -"""Inception Network.""" +"""Inception Network (InceptionNetwork).""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] from aeon.networks.base import BaseDeepLearningNetwork diff --git a/aeon/networks/_lite.py b/aeon/networks/_lite.py index 64a5961ab4..df19fba0d0 100644 --- a/aeon/networks/_lite.py +++ b/aeon/networks/_lite.py @@ -1,6 +1,7 @@ -"""LITE Network.""" +"""LITE Network (LITENetwork).""" + +__maintainer__ = ["hadifawaz1999"] -__maintainer__ = [] from aeon.networks.base import BaseDeepLearningNetwork diff --git a/aeon/networks/_mlp.py b/aeon/networks/_mlp.py index 537fadce0b..84b1570ff7 100644 --- a/aeon/networks/_mlp.py +++ b/aeon/networks/_mlp.py @@ -1,6 +1,7 @@ -"""Multi Layer Perceptron (MLP) (minus the final output layer).""" +"""Multi Layer Perceptron Network (MLPNetwork).""" + +__maintainer__ = ["hadifawaz1999"] -__maintainer__ = [] from aeon.networks.base import BaseDeepLearningNetwork diff --git a/aeon/networks/_resnet.py b/aeon/networks/_resnet.py index 91aaf5b154..d1ab38883a 100644 --- a/aeon/networks/_resnet.py +++ b/aeon/networks/_resnet.py @@ -1,6 +1,7 @@ -"""Residual Network (ResNet) (minus the final output layer).""" +"""Residual Network (ResNetNetwork).""" + +__maintainer__ = ["hadifawaz1999"] -__maintainer__ = [] from aeon.networks.base import BaseDeepLearningNetwork diff --git a/aeon/networks/_tapnet.py b/aeon/networks/_tapnet.py index 4e06d942c3..720925f2d2 100644 --- a/aeon/networks/_tapnet.py +++ b/aeon/networks/_tapnet.py @@ -1,6 +1,6 @@ -"""Time Convolutional Neural Network (CNN) (minus the final output layer).""" +"""Time series Attentional Prototype Network (TapNetNetwork).""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] import math diff --git a/aeon/networks/tests/test_all_networks.py b/aeon/networks/tests/test_all_networks.py index 37ce19bf07..106a5b8b4f 100644 --- a/aeon/networks/tests/test_all_networks.py +++ b/aeon/networks/tests/test_all_networks.py @@ -35,7 +35,7 @@ def test_all_networks_functionality(network): """Test the functionality of all networks.""" input_shape = (100, 2) - if not (network.__name__ in ["BaseDeepLearningNetwork", "EncoderNetwork"]): + if not (network.__name__ in ["BaseDeepLearningNetwork"]): if _check_soft_dependencies( network._config["python_dependencies"], severity="none" ) and _check_python_version(network._config["python_version"], severity="none"): @@ -62,3 +62,75 @@ def test_all_networks_functionality(network): ) else: pytest.skip(f"{network.__name__} not to be tested since its a base class.") + + +@pytest.mark.parametrize("network", _networks) +def test_all_networks_params(network): + """Test the functionality of all networks.""" + input_shape = (100, 2) + + if network.__name__ in ["BaseDeepLearningNetwork", "EncoderNetwork"]: + pytest.skip(f"{network.__name__} not to be tested since its a base class.") + + if network._config["structure"] == "auto-encoder": + pytest.skip( + f"{network.__name__} not to be tested (AE networks have their own tests)." + ) + + if not ( + _check_soft_dependencies( + network._config["python_dependencies"], severity="none" + ) + and _check_python_version(network._config["python_version"], severity="none") + ): + pytest.skip( + f"{network.__name__} dependencies not satisfied or invalid \ + Python version." + ) + + # check with default parameters + my_network = network() + my_network.build_network(input_shape=input_shape) + + # check with list parameters + params = dict() + for attrname in [ + "kernel_size", + "n_filters", + "avg_pool_size", + "activation", + "padding", + "strides", + "dilation_rate", + "use_bias", + ]: + + # Exceptions to fix + if ( + attrname in ["kernel_size", "padding"] + and network.__name__ == "TapNetNetwork" + ): + continue + # LITENetwork does not seem to work with list args + if network.__name__ == "LITENetwork": + continue + + # Here we use 'None' string as default to differentiate with None values + attr = getattr(my_network, attrname, "None") + if attr != "None": + if attr is None: + attr = 3 + elif isinstance(attr, list): + attr = attr[0] + else: + if network.__name__ in ["ResNetNetwork"]: + attr = [attr] * my_network.n_conv_per_residual_block + elif network.__name__ in ["InceptionNetwork"]: + attr = [attr] * my_network.depth + else: + attr = [attr] * my_network.n_layers + params[attrname] = attr + + if params: + my_network = network(**params) + my_network.build_network(input_shape=input_shape) diff --git a/aeon/networks/tests/test_cnn.py b/aeon/networks/tests/test_cnn.py new file mode 100644 index 0000000000..c859397b34 --- /dev/null +++ b/aeon/networks/tests/test_cnn.py @@ -0,0 +1,22 @@ +"""Tests for the CNN Model.""" + +import pytest + +from aeon.networks import TimeCNNNetwork +from aeon.utils.validation._dependencies import _check_soft_dependencies + +__maintainer__ = [] + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +def test_cnn_input_shape_padding(): + """Test of CNN network with input_shape < 60.""" + input_shape = (40, 2) + network = TimeCNNNetwork() + input_layer, output_layer = network.build_network(input_shape=input_shape) + + assert input_layer is not None + assert output_layer is not None diff --git a/aeon/networks/tests/test_inception.py b/aeon/networks/tests/test_inception.py new file mode 100644 index 0000000000..afd046883f --- /dev/null +++ b/aeon/networks/tests/test_inception.py @@ -0,0 +1,74 @@ +"""Tests for the Inception Model.""" + +import pytest + +from aeon.networks import InceptionNetwork +from aeon.utils.validation._dependencies import _check_soft_dependencies + +__maintainer__ = [] + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +def test_inceptionnetwork_bottleneck(): + """Test of Inception network without bottleneck.""" + input_shape = (100, 2) + inception = InceptionNetwork(use_bottleneck=False) + input_layer, output_layer = inception.build_network(input_shape=input_shape) + + assert input_layer is not None + assert output_layer is not None + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +def test_inceptionnetwork_max_pooling(): + """Test of Inception network without max pooling.""" + input_shape = (100, 2) + inception = InceptionNetwork(use_max_pooling=False) + input_layer, output_layer = inception.build_network(input_shape=input_shape) + + assert input_layer is not None + assert output_layer is not None + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +def test_inceptionnetwork_custom_filters(): + """Test of Inception network with custom filters.""" + input_shape = (100, 2) + inception = InceptionNetwork(use_custom_filters=True) + input_layer, output_layer = inception.build_network(input_shape=input_shape) + + assert input_layer is not None + assert output_layer is not None + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +def test_inceptionnetwork_list_parameters(): + """Test of Inception network with list parameters not in test_all_networks.""" + input_shape = (100, 2) + depth = 6 + n_conv_per_layer = [3] * depth + use_max_pooling = [True] * depth + max_pool_size = [3] * depth + + inception = InceptionNetwork( + depth=depth, + n_conv_per_layer=n_conv_per_layer, + use_max_pooling=use_max_pooling, + max_pool_size=max_pool_size, + ) + input_layer, output_layer = inception.build_network(input_shape=input_shape) + + assert input_layer is not None + assert output_layer is not None diff --git a/aeon/regression/deep_learning/__init__.py b/aeon/regression/deep_learning/__init__.py index 13077e96e8..eb69f1e0f0 100644 --- a/aeon/regression/deep_learning/__init__.py +++ b/aeon/regression/deep_learning/__init__.py @@ -1,7 +1,9 @@ """Deep learning based regressors.""" __all__ = [ + "BaseDeepRegressor", "CNNRegressor", + "TimeCNNRegressor", "FCNRegressor", "InceptionTimeRegressor", "IndividualInceptionRegressor", @@ -13,7 +15,7 @@ "MLPRegressor", ] -from aeon.regression.deep_learning._cnn import CNNRegressor +from aeon.regression.deep_learning._cnn import CNNRegressor, TimeCNNRegressor from aeon.regression.deep_learning._encoder import EncoderRegressor from aeon.regression.deep_learning._fcn import FCNRegressor from aeon.regression.deep_learning._inception_time import ( @@ -27,3 +29,4 @@ from aeon.regression.deep_learning._mlp import MLPRegressor from aeon.regression.deep_learning._resnet import ResNetRegressor from aeon.regression.deep_learning._tapnet import TapNetRegressor +from aeon.regression.deep_learning.base import BaseDeepRegressor diff --git a/aeon/regression/deep_learning/_cnn.py b/aeon/regression/deep_learning/_cnn.py index c5854b2438..988e823c63 100644 --- a/aeon/regression/deep_learning/_cnn.py +++ b/aeon/regression/deep_learning/_cnn.py @@ -1,19 +1,27 @@ -"""Time Convolutional Neural Network (CNN) for regression.""" +"""Time Convolutional Neural Network (TimeCNN) regressor.""" -__maintainer__ = [] -__all__ = ["CNNRegressor"] +__maintainer__ = ["hadifawaz1999"] +__all__ = ["CNNRegressor", "TimeCNNRegressor"] import gc import os import time from copy import deepcopy +from deprecated.sphinx import deprecated from sklearn.utils import check_random_state -from aeon.networks import CNNNetwork +from aeon.networks import CNNNetwork, TimeCNNNetwork from aeon.regression.deep_learning.base import BaseDeepRegressor +# TODO: remove v0.12.0 +@deprecated( + version="0.10.0", + reason="CNNRegressor has been renamed to TimeCNNRegressor" + "and will be removed in 0.12.0.", + category=FutureWarning, +) class CNNRegressor(BaseDeepRegressor): """Time Series Convolutional Neural Network (CNN). @@ -86,6 +94,8 @@ class CNNRegressor(BaseDeepRegressor): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -94,6 +104,9 @@ class CNNRegressor(BaseDeepRegressor): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. Notes ----- @@ -133,8 +146,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", verbose=False, loss="mse", output_activation="linear", @@ -151,7 +166,9 @@ def __init__( self.file_path = file_path self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.strides = strides self.dilation_rate = dilation_rate self.callbacks = callbacks @@ -253,6 +270,337 @@ def _fit(self, X, y): self.input_shape = X.shape[1:] self.training_model_ = self.build_model(self.input_shape) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + + if self.verbose: + self.training_model_.summary() + + self.file_name_ = ( + self.best_file_name if self.save_best_model else str(time.time_ns()) + ) + + if self.callbacks is None: + self.callbacks_ = [ + tf.keras.callbacks.ModelCheckpoint( + filepath=self.file_path + self.file_name_ + ".keras", + monitor="loss", + save_best_only=True, + ), + ] + else: + self.callbacks_ = self._get_model_checkpoint_callback( + callbacks=self.callbacks, + file_path=self.file_path, + file_name=self.file_name_, + ) + + self.history = self.training_model_.fit( + X, + y, + batch_size=self.batch_size, + epochs=self.n_epochs, + verbose=self.verbose, + callbacks=self.callbacks_, + ) + + try: + self.model_ = tf.keras.models.load_model( + self.file_path + self.file_name_ + ".keras", compile=False + ) + if not self.save_best_model: + os.remove(self.file_path + self.file_name_ + ".keras") + except FileNotFoundError: + self.model_ = deepcopy(self.training_model_) + + if self.save_last_model: + self.save_last_model_to_file(file_path=self.file_path) + + gc.collect() + return self + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + For regressors, a "default" set of parameters should be provided for + general testing, and a "results_comparison" set for comparing against + previously recorded results if the general set does not produce suitable + probabilities to compare against. + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class. + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params`. + """ + param = { + "n_epochs": 10, + "batch_size": 4, + "avg_pool_size": 4, + } + + return [param] + + +class TimeCNNRegressor(BaseDeepRegressor): + """Time Series Convolutional Neural Network (CNN). + + Adapted from the implementation used in [1]_. + + Parameters + ---------- + n_layers : int, default = 2, + the number of convolution layers in the network + kernel_size : int or list of int, default = 7, + kernel size of convolution layers, if not a list, the same kernel size + is used for all layer, len(list) should be n_layers + n_filters : int or list of int, default = [6, 12], + number of filters for each convolution layer, if not a list, the same n_filters + is used in all layers. + avg_pool_size : int or list of int, default = 3, + the size of the average pooling layer, if not a list, the same + max pooling size is used + for all convolution layer + output_activation : str, default = "linear", + the output activation for the regressor + activation : str or list of str, default = "sigmoid", + keras activation function used in the model for each layer, + if not a list, the same + activation is used for all layers + padding : str or list of str, default = 'valid', + the method of padding in convolution layers, if not a list, + the same padding used + for all convolution layers + strides : int or list of int, default = 1, + the strides of kernels in the convolution and max pooling layers, + if not a list, the same strides are used for all layers + dilation_rate : int or list of int, default = 1, + the dilation rate of the convolution layers, if not a list, + the same dilation rate is used all over the network + use_bias : bool or list of bool, default = True, + condition on whether or not to use bias values for convolution layers, + if not a list, the same condition is used for all layers + random_state : int, RandomState instance or None, default=None + If `int`, random_state is the seed used by the random number generator; + If `RandomState` instance, random_state is the random number generator; + If `None`, the random number generator is the `RandomState` instance used + by `np.random`. + Seeded random number generation can only be guaranteed on CPU processing, + GPU processing will be non-deterministic. + n_epochs : int, default = 2000 + the number of epochs to train the model + batch_size : int, default = 16 + the number of samples per gradient update. + verbose : boolean, default = False + whether to output extra information + loss : string, default="mean_squared_error" + fit parameter for the keras model + optimizer : keras.optimizer, default=keras.optimizers.Adam(), + metrics : str or list of str, default="mean_squared_error" + The evaluation metrics to use during training. If + a single string metric is provided, it will be + used as the only metric. If a list of metrics are + provided, all will be used for evaluation. + callbacks : keras.callbacks, default=model_checkpoint to save best + model on training loss + file_path : file_path for the best model (if checkpoint is used as callback) + save_best_model : bool, default = False + Whether or not to save the best model, if the + modelcheckpoint callback is used by default, + this condition, if True, will prevent the + automatic deletion of the best saved model from + file and the user can choose the file name + save_last_model : bool, default = False + Whether or not to save the last model, last + epoch trained, using the base class method + save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. + best_file_name : str, default = "best_model" + The name of the file of the best model, if + save_best_model is set to False, this parameter + is discarded + last_file_name : str, default = "last_model" + The name of the file of the last model, if + save_last_model is set to False, this parameter + is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. + + Notes + ----- + Adapted from the implementation from Fawaz et. al + https://github.com/hfawaz/dl-4-tsc/blob/master/classifiers/cnn.py + + References + ---------- + .. [1] Zhao et. al, Convolutional neural networks for time series classification, + Journal of Systems Engineering and Electronics, 28(1):2017. + + Examples + -------- + >>> from aeon.regression.deep_learning import TimeCNNRegressor + >>> from aeon.testing.data_generation import make_example_3d_numpy + >>> X, y = make_example_3d_numpy(n_cases=10, n_channels=1, n_timepoints=12, + ... return_y=True, regression_target=True, + ... random_state=0) + >>> rgs = TimeCNNRegressor(n_epochs=20, bacth_size=4) # doctest: +SKIP + >>> rgs.fit(X, y) # doctest: +SKIP + TimeCNNRegressor(...) + """ + + def __init__( + self, + n_layers=2, + kernel_size=7, + n_filters=None, + avg_pool_size=3, + activation="sigmoid", + padding="valid", + strides=1, + dilation_rate=1, + n_epochs=2000, + batch_size=16, + callbacks=None, + file_path="./", + save_best_model=False, + save_last_model=False, + save_init_model=False, + best_file_name="best_model", + last_file_name="last_model", + init_file_name="init_model", + verbose=False, + loss="mse", + output_activation="linear", + metrics="mean_squared_error", + random_state=None, + use_bias=True, + optimizer=None, + ): + self.n_layers = n_layers + self.avg_pool_size = avg_pool_size + self.padding = padding + self.n_filters = n_filters + self.kernel_size = kernel_size + self.file_path = file_path + self.save_best_model = save_best_model + self.save_last_model = save_last_model + self.save_init_model = save_init_model + self.best_file_name = best_file_name + self.init_file_name = init_file_name + self.strides = strides + self.dilation_rate = dilation_rate + self.callbacks = callbacks + self.n_epochs = n_epochs + self.verbose = verbose + self.loss = loss + self.output_activation = output_activation + self.metrics = metrics + self.random_state = random_state + self.activation = activation + self.use_bias = use_bias + self.optimizer = optimizer + + self.history = None + + super().__init__( + batch_size=batch_size, + last_file_name=last_file_name, + ) + + self._network = TimeCNNNetwork( + n_layers=self.n_layers, + kernel_size=self.kernel_size, + n_filters=self.n_filters, + avg_pool_size=self.avg_pool_size, + activation=self.activation, + padding=self.padding, + strides=self.strides, + dilation_rate=self.dilation_rate, + use_bias=self.use_bias, + ) + + def build_model(self, input_shape, **kwargs): + """Construct a compiled, un-trained, keras model that is ready for training. + + In aeon, time series are stored in numpy arrays of shape (d,m), where d + is the number of dimensions, m is the series length. Keras/tensorflow assume + data is in shape (m,d). This method also assumes (m,d). Transpose should + happen in fit. + + Parameters + ---------- + input_shape : tuple + The shape of the data fed into the input layer, should be (m,d) + + Returns + ------- + output : a compiled Keras Model + """ + import numpy as np + import tensorflow as tf + from tensorflow import keras + + rng = check_random_state(self.random_state) + self.random_state_ = rng.randint(0, np.iinfo(np.int32).max) + tf.keras.utils.set_random_seed(self.random_state_) + input_layer, output_layer = self._network.build_network(input_shape, **kwargs) + + output_layer = keras.layers.Dense(units=1, activation=self.output_activation)( + output_layer + ) + + self.optimizer_ = ( + keras.optimizers.Adam() if self.optimizer is None else self.optimizer + ) + + model = keras.models.Model(inputs=input_layer, outputs=output_layer) + + model.compile( + loss=self.loss, + optimizer=self.optimizer_, + metrics=self._metrics, + ) + return model + + def _fit(self, X, y): + """Fit the regressor on the training set (X, y). + + Parameters + ---------- + X : np.ndarray + The training input samples of shape (n_cases, n_channels, n_timepoints). + y : np.ndarray + The training data target values of shape (n_cases,). + + Returns + ------- + self : object + """ + import tensorflow as tf + + # Transpose to conform to Keras input style. + X = X.transpose(0, 2, 1) + + if isinstance(self.metrics, str): + self._metrics = [self.metrics] + else: + self._metrics = self.metrics + self.input_shape = X.shape[1:] + self.training_model_ = self.build_model(self.input_shape) + + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/regression/deep_learning/_encoder.py b/aeon/regression/deep_learning/_encoder.py index be65733d0f..4b73047c05 100644 --- a/aeon/regression/deep_learning/_encoder.py +++ b/aeon/regression/deep_learning/_encoder.py @@ -1,6 +1,6 @@ """Encoder Regressor.""" -__author__ = ["AnonymousCodes911"] +__author__ = ["AnonymousCodes911", "hadifawaz1999"] __all__ = ["EncoderRegressor"] import gc @@ -54,6 +54,8 @@ class EncoderRegressor(BaseDeepRegressor): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file. + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -62,6 +64,9 @@ class EncoderRegressor(BaseDeepRegressor): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded. + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. n_epochs: The number of times the entire training dataset will be passed forward and backward @@ -121,8 +126,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", verbose=False, loss="mean_squared_error", metrics="mean_squared_error", @@ -144,7 +151,9 @@ def __init__( self.file_path = file_path self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.n_epochs = n_epochs self.verbose = verbose self.loss = loss @@ -239,6 +248,9 @@ def _fit(self, X, y): self.input_shape = X.shape[1:] self.training_model_ = self.build_model(self.input_shape) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/regression/deep_learning/_fcn.py b/aeon/regression/deep_learning/_fcn.py index 7c136cae34..374feb5b93 100644 --- a/aeon/regression/deep_learning/_fcn.py +++ b/aeon/regression/deep_learning/_fcn.py @@ -1,6 +1,6 @@ -"""Fully Convolutional Network (FCN) for regression.""" +"""Fully Convolutional Network (FCN) regressor.""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = ["FCNRegressor"] import gc @@ -75,6 +75,8 @@ class FCNRegressor(BaseDeepRegressor): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -83,6 +85,9 @@ class FCNRegressor(BaseDeepRegressor): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. callbacks : keras.callbacks, default = None Notes @@ -119,8 +124,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", n_epochs=2000, batch_size=16, use_mini_batch_size=False, @@ -153,7 +160,9 @@ def __init__( self.file_path = file_path self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.history = None @@ -239,6 +248,9 @@ def _fit(self, X, y): self.input_shape = X.shape[1:] self.training_model_ = self.build_model(self.input_shape) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/regression/deep_learning/_inception_time.py b/aeon/regression/deep_learning/_inception_time.py index dc14749d01..a5bd459545 100644 --- a/aeon/regression/deep_learning/_inception_time.py +++ b/aeon/regression/deep_learning/_inception_time.py @@ -1,6 +1,6 @@ -"""InceptionTime regressor.""" +"""InceptionTime and Inception regressors.""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = ["InceptionTimeRegressor"] import gc @@ -107,6 +107,8 @@ class InceptionTimeRegressor(BaseRegressor): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -115,6 +117,9 @@ class InceptionTimeRegressor(BaseRegressor): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. random_state : int, RandomState instance or None, default=None If `int`, random_state is the seed used by the random number generator; If `RandomState` instance, random_state is the random number generator; @@ -187,8 +192,10 @@ def __init__( file_path="./", save_last_model=False, save_best_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", batch_size=64, use_mini_batch_size=False, n_epochs=1500, @@ -223,8 +230,10 @@ def __init__( self.save_last_model = save_last_model self.save_best_model = save_best_model + self.save_init_model = save_init_model self.best_file_name = best_file_name self.last_file_name = last_file_name + self.init_file_name = init_file_name self.callbacks = callbacks self.random_state = random_state @@ -275,8 +284,10 @@ def _fit(self, X, y): file_path=self.file_path, save_best_model=self.save_best_model, save_last_model=self.save_last_model, + save_init_model=self.save_init_model, best_file_name=self.best_file_name + str(n), last_file_name=self.last_file_name + str(n), + init_file_name=self.init_file_name + str(n), batch_size=self.batch_size, use_mini_batch_size=self.use_mini_batch_size, n_epochs=self.n_epochs, @@ -424,6 +435,8 @@ class IndividualInceptionRegressor(BaseDeepRegressor): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -432,6 +445,9 @@ class IndividualInceptionRegressor(BaseDeepRegressor): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. random_state : int, RandomState instance or None, default=None If `int`, random_state is the seed used by the random number generator; If `RandomState` instance, random_state is the random number generator; @@ -492,8 +508,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", batch_size=64, use_mini_batch_size=False, n_epochs=1500, @@ -526,7 +544,9 @@ def __init__( self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.callbacks = callbacks self.random_state = random_state @@ -626,6 +646,9 @@ def _fit(self, X, y): mini_batch_size = self.batch_size self.training_model_ = self.build_model(self.input_shape_) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/regression/deep_learning/_lite_time.py b/aeon/regression/deep_learning/_lite_time.py index 8b2e32f200..5a2079df94 100644 --- a/aeon/regression/deep_learning/_lite_time.py +++ b/aeon/regression/deep_learning/_lite_time.py @@ -1,6 +1,6 @@ -"""LITETime Regressor.""" +"""LITETime and LITE regressors.""" -__author__ = ["aadya940"] +__author__ = ["aadya940", "hadifawaz1999"] __all__ = ["IndividualLITERegressor", "LITETimeRegressor"] import gc @@ -61,6 +61,8 @@ class LITETimeRegressor(BaseRegressor): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -69,6 +71,9 @@ class LITETimeRegressor(BaseRegressor): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. random_state : int, RandomState instance or None, default=None If `int`, random_state is the seed used by the random number generator; If `RandomState` instance, random_state is the random number generator; @@ -122,8 +127,10 @@ def __init__( file_path="./", save_last_model=False, save_best_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", batch_size=64, use_mini_batch_size=False, n_epochs=1500, @@ -149,8 +156,10 @@ def __init__( self.save_last_model = save_last_model self.save_best_model = save_best_model + self.save_init_model = save_init_model self.best_file_name = best_file_name self.last_file_name = last_file_name + self.init_file_name = init_file_name self.callbacks = callbacks self.random_state = random_state @@ -188,8 +197,10 @@ def _fit(self, X, y): file_path=self.file_path, save_best_model=self.save_best_model, save_last_model=self.save_last_model, + save_init_model=self.save_init_model, best_file_name=self.best_file_name + str(n), last_file_name=self.last_file_name + str(n), + init_file_name=self.init_file_name + str(n), batch_size=self.batch_size, use_mini_batch_size=self.use_mini_batch_size, n_epochs=self.n_epochs, @@ -302,6 +313,8 @@ class IndividualLITERegressor(BaseDeepRegressor): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -310,6 +323,9 @@ class IndividualLITERegressor(BaseDeepRegressor): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. random_state : int, RandomState instance or None, default=None If `int`, random_state is the seed used by the random number generator; If `RandomState` instance, random_state is the random number generator; @@ -354,8 +370,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", batch_size=64, use_mini_batch_size=False, n_epochs=1500, @@ -379,7 +397,9 @@ def __init__( self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.callbacks = callbacks self.random_state = random_state @@ -476,6 +496,9 @@ def _fit(self, X, y): mini_batch_size = self.batch_size self.training_model_ = self.build_model(self.input_shape) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/regression/deep_learning/_mlp.py b/aeon/regression/deep_learning/_mlp.py index a8883f3f12..cb9907fe7c 100644 --- a/aeon/regression/deep_learning/_mlp.py +++ b/aeon/regression/deep_learning/_mlp.py @@ -1,6 +1,6 @@ -"""Multi Layer Perceptron Network (MLP) for Regression.""" +"""Multi Layer Perceptron Network (MLP) regressor.""" -__author__ = ["Aadya-Chinubhai"] +__author__ = ["Aadya-Chinubhai", "hadifawaz1999"] __all__ = ["MLPRegressor"] import gc @@ -47,6 +47,8 @@ class MLPRegressor(BaseDeepRegressor): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -55,6 +57,9 @@ class MLPRegressor(BaseDeepRegressor): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. random_state : int, RandomState instance or None, default=None If `int`, random_state is the seed used by the random number generator; If `RandomState` instance, random_state is the random number generator; @@ -100,8 +105,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", random_state=None, activation="relu", output_activation="linear", @@ -118,7 +125,9 @@ def __init__( self.file_path = file_path self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.optimizer = optimizer self.random_state = random_state self.output_activation = output_activation @@ -202,6 +211,9 @@ def _fit(self, X, y): self.training_model_ = self.build_model(self.input_shape) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/regression/deep_learning/_resnet.py b/aeon/regression/deep_learning/_resnet.py index b9a411891c..48f2d3c5f8 100644 --- a/aeon/regression/deep_learning/_resnet.py +++ b/aeon/regression/deep_learning/_resnet.py @@ -1,6 +1,6 @@ -"""Residual Network (ResNet) for regression.""" +"""Residual Network (ResNet) regressor.""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = ["ResNetRegressor"] import gc @@ -82,6 +82,8 @@ class ResNetRegressor(BaseDeepRegressor): Whether or not to save the last model, last epoch trained, using the base class method save_last_model_to_file + save_init_model : bool, default = False + Whether to save the initialization of the model. best_file_name : str, default = "best_model" The name of the file of the best model, if save_best_model is set to False, this parameter @@ -90,6 +92,9 @@ class ResNetRegressor(BaseDeepRegressor): The name of the file of the last model, if save_last_model is set to False, this parameter is discarded + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. verbose : boolean, default = False whether to output extra information loss : string, default="mean_squared_error" @@ -147,8 +152,10 @@ def __init__( file_path="./", save_best_model=False, save_last_model=False, + save_init_model=False, best_file_name="best_model", last_file_name="last_model", + init_file_name="init_model", optimizer=None, ): self.n_residual_blocks = n_residual_blocks @@ -171,7 +178,9 @@ def __init__( self.file_path = file_path self.save_best_model = save_best_model self.save_last_model = save_last_model + self.save_init_model = save_init_model self.best_file_name = best_file_name + self.init_file_name = init_file_name self.optimizer = optimizer self.history = None @@ -261,6 +270,9 @@ def _fit(self, X, y): self.input_shape = X.shape[1:] self.training_model_ = self.build_model(self.input_shape) + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + if self.verbose: self.training_model_.summary() diff --git a/aeon/regression/deep_learning/_tapnet.py b/aeon/regression/deep_learning/_tapnet.py index 380d2ee533..4a03f02c66 100644 --- a/aeon/regression/deep_learning/_tapnet.py +++ b/aeon/regression/deep_learning/_tapnet.py @@ -1,6 +1,6 @@ -"""Time Convolutional Neural Network (CNN) for classification.""" +"""Time series Attentional Prototype Network (TapNet) regressor.""" -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] __all__ = [ "TapNetRegressor", ] diff --git a/aeon/regression/deep_learning/tests/test_deep_regressor_base.py b/aeon/regression/deep_learning/tests/test_deep_regressor_base.py index 000b39173d..9dad88f0b2 100644 --- a/aeon/regression/deep_learning/tests/test_deep_regressor_base.py +++ b/aeon/regression/deep_learning/tests/test_deep_regressor_base.py @@ -10,7 +10,7 @@ from aeon.testing.data_generation import make_example_2d_numpy_collection from aeon.utils.validation._dependencies import _check_soft_dependencies -__maintainer__ = [] +__maintainer__ = ["hadifawaz1999"] class _DummyDeepRegressor(BaseDeepRegressor): diff --git a/aeon/regression/deep_learning/tests/test_saving_loading_deep_learning_cls.py b/aeon/regression/deep_learning/tests/test_saving_loading_deep_learning_cls.py new file mode 100644 index 0000000000..736d99baf3 --- /dev/null +++ b/aeon/regression/deep_learning/tests/test_saving_loading_deep_learning_cls.py @@ -0,0 +1,79 @@ +"""Unit tests for regressors deep learners save/load functionalities.""" + +import inspect +import os +import tempfile +import time + +import pytest + +from aeon.regression import deep_learning +from aeon.testing.data_generation import make_example_3d_numpy +from aeon.utils.validation._dependencies import _check_soft_dependencies + +__maintainer__ = ["hadifawaz1999"] + + +_deep_rgs_classes = [ + member[1] for member in inspect.getmembers(deep_learning, inspect.isclass) +] + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="skip test if required soft dependency not available", +) +@pytest.mark.parametrize("deep_rgs", _deep_rgs_classes) +def test_saving_loading_deep_learning_rgs(deep_rgs): + """Test Deep Regressor saving.""" + with tempfile.TemporaryDirectory() as tmp: + if not ( + deep_rgs.__name__ + in [ + "BaseDeepRegressor", + "InceptionTimeRegressor", + "LITETimeRegressor", + "TapNetRegressor", + ] + ): + if tmp[-1] != "/": + tmp = tmp + "/" + curr_time = str(time.time_ns()) + last_file_name = curr_time + "last" + best_file_name = curr_time + "best" + init_file_name = curr_time + "init" + + X, y = make_example_3d_numpy() + + deep_rgs_train = deep_rgs( + n_epochs=2, + save_best_model=True, + save_last_model=True, + save_init_model=True, + best_file_name=best_file_name, + last_file_name=last_file_name, + init_file_name=init_file_name, + file_path=tmp, + ) + deep_rgs_train.fit(X, y) + + deep_rgs_best = deep_rgs() + deep_rgs_best.load_model( + model_path=os.path.join(tmp, best_file_name + ".keras"), + ) + ypred_best = deep_rgs_best.predict(X) + assert len(ypred_best) == len(y) + + deep_rgs_last = deep_rgs() + deep_rgs_last.load_model( + model_path=os.path.join(tmp, last_file_name + ".keras"), + ) + ypred_last = deep_rgs_last.predict(X) + assert len(ypred_last) == len(y) + + deep_rgs_init = deep_rgs() + deep_rgs_init.load_model( + model_path=os.path.join(tmp, init_file_name + ".keras"), + ) + ypred_init = deep_rgs_init.predict(X) + assert len(ypred_init) == len(y) diff --git a/aeon/testing/expected_results/classifier_results_reproduction.py b/aeon/testing/expected_results/classifier_results_reproduction.py index 06c5231d2e..441a496442 100644 --- a/aeon/testing/expected_results/classifier_results_reproduction.py +++ b/aeon/testing/expected_results/classifier_results_reproduction.py @@ -23,6 +23,7 @@ WEASEL_V2, BOSSEnsemble, ContractableBOSS, + MrSQMClassifier, TemporalDictionaryEnsemble, ) from aeon.classification.distance_based import ( @@ -56,7 +57,6 @@ from aeon.classification.ordinal_classification import OrdinalTDE from aeon.classification.shapelet_based import ( LearningShapeletClassifier, - MrSQMClassifier, SASTClassifier, ShapeletTransformClassifier, ) diff --git a/aeon/testing/expected_results/regressor_results_reproduction.py b/aeon/testing/expected_results/regressor_results_reproduction.py index d60feb2067..5c47340ef7 100644 --- a/aeon/testing/expected_results/regressor_results_reproduction.py +++ b/aeon/testing/expected_results/regressor_results_reproduction.py @@ -7,7 +7,7 @@ from aeon.regression.convolution_based import ( HydraRegressor, MultiRocketHydraRegressor, - RocketRegressor + RocketRegressor, ) from aeon.regression.distance_based import KNeighborsTimeSeriesRegressor from aeon.regression.feature_based import ( @@ -25,9 +25,8 @@ RandomIntervalSpectralEnsembleRegressor, TimeSeriesForestRegressor, ) -from aeon.regression.shapelet_based import ( - RDSTRegressor, -) +from aeon.regression.shapelet_based import RDSTRegressor + def _reproduce_regression_covid_3month(estimator): X_train, y_train = load_covid_3month(split="train") diff --git a/aeon/transformations/lag.py b/aeon/transformations/_legacy/lag.py similarity index 98% rename from aeon/transformations/lag.py rename to aeon/transformations/_legacy/lag.py index 93d14666fe..ff3ef76d0b 100644 --- a/aeon/transformations/lag.py +++ b/aeon/transformations/_legacy/lag.py @@ -6,7 +6,6 @@ import numpy as np import pandas as pd -from deprecated.sphinx import deprecated from pandas.api.types import is_integer_dtype from aeon.transformations.base import BaseTransformer @@ -24,12 +23,6 @@ def _coerce_to_int(obj): return obj -# TODO: remove in v0.11.0 -@deprecated( - version="0.10.0", - reason="Lag will be removed in version 0.11.0.", - category=FutureWarning, -) class Lag(BaseTransformer): """Lagging transformer. Lags time series by one or multiple lags. @@ -298,12 +291,6 @@ def get_test_params(cls, parameter_set="default"): return [params1, params2, params3] -# TODO: remove in v0.11.0 -@deprecated( - version="0.10.0", - reason="ReducerTransform will be removed in version 0.11.0.", - category=FutureWarning, -) class ReducerTransform(BaseTransformer): """Transformer for forecasting reduction. Prepares tabular X/y via lag and trafos. diff --git a/aeon/transformations/outlier_detection.py b/aeon/transformations/_legacy/outlier_detection.py similarity index 93% rename from aeon/transformations/outlier_detection.py rename to aeon/transformations/_legacy/outlier_detection.py index 81c8e8b40a..26b9826aa0 100644 --- a/aeon/transformations/outlier_detection.py +++ b/aeon/transformations/_legacy/outlier_detection.py @@ -7,18 +7,11 @@ import numpy as np import pandas as pd -from deprecated.sphinx import deprecated from aeon.forecasting.model_selection import SlidingWindowSplitter from aeon.transformations.base import BaseTransformer -# TODO: remove in v0.11.0 -@deprecated( - version="0.10.0", - reason="HampelFilter will be removed in version 0.11.0.", - category=FutureWarning, -) class HampelFilter(BaseTransformer): """Use HampelFilter to detect outliers based on a sliding window. @@ -46,14 +39,6 @@ class HampelFilter(BaseTransformer): ---------- .. [1] Hampel F. R., "The influence curve and its role in robust estimation", Journal of the American Statistical Association, 69, 382–393, 1974 - - Examples - -------- - >>> from aeon.transformations.outlier_detection import HampelFilter - >>> from aeon.datasets import load_airline - >>> y = load_airline() - >>> transformer = HampelFilter(window_length=10) - >>> y_hat = transformer.fit_transform(y) """ _tags = { diff --git a/aeon/transformations/_legacy/subset.py b/aeon/transformations/_legacy/subset.py index 48facac3b5..e8dea367d3 100644 --- a/aeon/transformations/_legacy/subset.py +++ b/aeon/transformations/_legacy/subset.py @@ -8,106 +8,6 @@ from aeon.transformations.base import BaseTransformer -class _IndexSubset(BaseTransformer): - r"""Index subsetting transformer. - - In transform, subsets `X` to the indices in `y.index`. - If `y` is None, returns `X` without subsetting. - numpy-based `X` are interpreted as having a RangeIndex starting at n, - where n is the number of numpy rows seen so far through `fit` and `update`. - Non-pandas types are interpreted as having index as after conversion to the - `"pd.DataFrame"` aeon type. - - Parameters - ---------- - index_treatment : str, optional, one of "keep" (default) or "remove" - determines which indices are kept in `Xt = transform(X, y)` - "keep" = all indices in y also appear in Xt. If not present in X, NA is filled. - "remove" = only indices that appear in both X and y are present in Xt. - """ - - _tags = { - "input_data_type": "Series", - # what is the abstract type of X: Series, or Panel - "output_data_type": "Series", - # what abstract type is returned: Primitives, Series, Panel - "instancewise": True, # is this an instance-wise transform? - "X_inner_type": ["pd.DataFrame", "pd.Series"], - "y_inner_type": ["pd.DataFrame", "pd.Series"], - "transform-returns-same-time-index": False, - "fit_is_empty": False, - "capability:multivariate": True, - "capability:inverse_transform": False, - "remember_data": True, # remember all data seen as _X - } - - def __init__(self, index_treatment="keep"): - self.index_treatment = index_treatment - super().__init__() - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - private _transform containing the core logic, called from transform - - Parameters - ---------- - X : pd.DataFrame or pd.Series - Data to be transformed - y : pd.DataFrame or pd.Series - Additional data, e.g., labels for transformation - - Returns - ------- - Xt : pd.DataFrame or pd.Series, same type as X - transformed version of X - """ - if y is None: - return X - - X = self._X - - index_treatment = self.index_treatment - ind_X_and_y = X.index.intersection(y.index) - - if index_treatment == "remove": - Xt = X.loc[ind_X_and_y] - elif index_treatment == "keep": - Xt = X.loc[ind_X_and_y] - y_idx_frame = type(X)(index=y.index) - Xt = Xt.combine_first(y_idx_frame) - else: - raise ValueError( - f'index_treatment must be one of "remove", "keep", but found' - f' "{index_treatment}"' - ) - return Xt - - @classmethod - def get_test_params(cls, parameter_set="default"): - """Return testing parameter settings for the estimator. - - Parameters - ---------- - parameter_set : str, default="default" - Name of the set of test parameters to return, for use in tests. If no - special parameters are defined for a value, will return `"default"` set. - There are currently no reserved values for transformers. - - Returns - ------- - params : dict or list of dict, default = {} - Parameters to create testing instances of the class - Each dict are parameters to construct an "interesting" test instance, i.e., - `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. - `create_test_instance` uses the first (or only) dictionary in `params` - """ - params1 = {"index_treatment": "remove"} - params2 = {"index_treatment": "keep"} - - return [params1, params2] - - class _ColumnSelect(BaseTransformer): r"""Column selection transformer. diff --git a/aeon/transformations/summarize.py b/aeon/transformations/_legacy/summarize.py similarity index 91% rename from aeon/transformations/summarize.py rename to aeon/transformations/_legacy/summarize.py index edfbc28b37..3386cb9f68 100644 --- a/aeon/transformations/summarize.py +++ b/aeon/transformations/_legacy/summarize.py @@ -5,19 +5,12 @@ import numpy as np import pandas as pd -from deprecated.sphinx import deprecated from joblib import Parallel, delayed from aeon.transformations.base import BaseTransformer from aeon.utils.multiindex import flatten_multiindex -# TODO: remove in v0.11.0 -@deprecated( - version="0.10.0", - reason="WindowSummarizer will be removed in version 0.11.0.", - category=FutureWarning, -) class WindowSummarizer(BaseTransformer): """ Transformer for extracting time series features. @@ -132,55 +125,6 @@ class WindowSummarizer(BaseTransformer): Contains all transformed columns as well as non-transformed columns. The raw inputs to transformed columns will be dropped. self: reference to self - - Examples - -------- - >>> import pandas as pd - >>> from aeon.transformations.summarize import WindowSummarizer - >>> from aeon.datasets import load_airline, load_longley - >>> from aeon.forecasting.naive import NaiveForecaster - >>> from aeon.forecasting.base import ForecastingHorizon - >>> from aeon.forecasting.compose import ForecastingPipeline - >>> from aeon.forecasting.model_selection import temporal_train_test_split - >>> y = load_airline() - >>> kwargs = { - ... "lag_feature": { - ... "lag": [1], - ... "mean": [[1, 3], [3, 6]], - ... "std": [[1, 4]], - ... } - ... } - >>> transformer = WindowSummarizer(**kwargs) - >>> y_transformed = transformer.fit_transform(y) - - Example with transforming multiple columns of exogeneous features - >>> y, X = load_longley() - >>> y_train, y_test, X_train, X_test = temporal_train_test_split(y, X) - >>> fh = ForecastingHorizon(X_test.index, is_relative=False) - >>> # Example transforming only X - >>> pipe = ForecastingPipeline( - ... steps=[ - ... ("a", WindowSummarizer(n_jobs=1, target_cols=["POP", "GNPDEFL"])), - ... ("b", WindowSummarizer(n_jobs=1, target_cols=["GNP"], **kwargs)), - ... ("forecaster", NaiveForecaster(strategy="drift")), - ... ] - ... ) - >>> pipe_return = pipe.fit(y_train, X_train) - >>> y_pred1 = pipe_return.predict(fh=fh, X=X_test) - - Example with transforming multiple columns of exogeneous features - as well as the y column - >>> Z_train = pd.concat([X_train, y_train], axis=1) - >>> Z_test = pd.concat([X_test, y_test], axis=1) - >>> pipe = ForecastingPipeline( - ... steps=[ - ... ("a", WindowSummarizer(n_jobs=1, target_cols=["POP", "TOTEMP"])), - ... ("b", WindowSummarizer(**kwargs, n_jobs=1, target_cols=["GNP"])), - ... ("forecaster", NaiveForecaster(strategy="drift")), - ... ] - ... ) - >>> pipe_return = pipe.fit(y_train, Z_train) - >>> y_pred2 = pipe_return.predict(fh=fh, X=Z_test) """ _tags = { @@ -594,12 +538,6 @@ def _check_quantiles(quantiles): return quantiles -# TODO: remove in v0.11.0 -@deprecated( - version="0.10.0", - reason="SummaryTransformer will be removed in version 0.11.0.", - category=FutureWarning, -) class SummaryTransformer(BaseTransformer): """Calculate summary value of a time series. @@ -633,14 +571,6 @@ class SummaryTransformer(BaseTransformer): ----- This provides a wrapper around pandas DataFrame and Series agg and quantile methods. - - Examples - -------- - >>> from aeon.transformations.summarize import SummaryTransformer - >>> from aeon.datasets import load_airline - >>> y = load_airline() - >>> transformer = SummaryTransformer() - >>> y_mean = transformer.fit_transform(y) """ _tags = { @@ -742,12 +672,6 @@ def get_test_params(cls, parameter_set="default"): return [params1, params2, params3] -# TODO: remove in v0.11.0 -@deprecated( - version="0.10.0", - reason="PlateauFinder will be removed in version 0.11.0.", - category=FutureWarning, -) class PlateauFinder(BaseTransformer): """ Plateau finder transformer. @@ -833,12 +757,6 @@ def _transform(self, X, y=None): return Xt -# TODO: remove in v0.11.0 -@deprecated( - version="0.10.0", - reason="FittedParamExtractor will be removed in version 0.11.0.", - category=FutureWarning, -) class FittedParamExtractor(BaseTransformer): """Fitted parameter extractor. diff --git a/aeon/transformations/collection/convolution_based/tests/test_all_rockets.py b/aeon/transformations/collection/convolution_based/tests/test_all_rockets.py index 30dceecdad..b9439a1eaa 100644 --- a/aeon/transformations/collection/convolution_based/tests/test_all_rockets.py +++ b/aeon/transformations/collection/convolution_based/tests/test_all_rockets.py @@ -9,6 +9,7 @@ MultiRocket, Rocket, ) +from aeon.transformations.collection.convolution_based._minirocket import _PPV # Data used to test correctness of transform uni_test_data = np.array( @@ -213,3 +214,11 @@ def test_expected_basic_motions(): np.testing.assert_allclose( X2[:5, :5], expected_basic_motions["MiniRocket"], rtol=1e-4 ) + + +def test_ppv(): + """Test uncovered PPV function.""" + a = np.float32(10.0) + b = np.float32(-5.0) + assert _PPV(a, b) == 1 + assert _PPV(b, a) == 0 diff --git a/aeon/transformations/collection/dictionary_based/_sfa.py b/aeon/transformations/collection/dictionary_based/_sfa.py index 9bf638e3d5..df3f51604a 100644 --- a/aeon/transformations/collection/dictionary_based/_sfa.py +++ b/aeon/transformations/collection/dictionary_based/_sfa.py @@ -46,7 +46,7 @@ class SFA(BaseCollectionTransformer): Parameters ---------- word_length: int, default = 8 - length of word to shorten window to (using PAA) + length of word to shorten window to (using DFT) alphabet_size: int, default = 4 number of values to discretise each value to diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py index 91d0e326e9..044217158e 100644 --- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py +++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py @@ -59,7 +59,7 @@ class SFAFast(BaseCollectionTransformer): Parameters ---------- word_length : int, default = 8 - Length of word to shorten window to (using PAA). + Length of word to shorten window to (using DFT). alphabet_size : int, default = 4 Number of values to discretise each value to. window_size : int, default = 12 diff --git a/aeon/transformations/scaledlogit.py b/aeon/transformations/scaledlogit.py deleted file mode 100644 index 3a1f7f1a7d..0000000000 --- a/aeon/transformations/scaledlogit.py +++ /dev/null @@ -1,214 +0,0 @@ -"""Implements the scaled logit transformation.""" - -__maintainer__ = [] -__all__ = ["ScaledLogitTransformer"] - -from copy import deepcopy -from warnings import warn - -import numpy as np -from deprecated.sphinx import deprecated - -from aeon.transformations.base import BaseTransformer - - -# TODO: remove in v0.11.0 -@deprecated( - version="0.10.0", - reason="ScaledLogitTransformer will be removed in version 0.11.0.", - category=FutureWarning, -) -class ScaledLogitTransformer(BaseTransformer): - r"""Scaled logit transform or Log transform. - - If both lower_bound and upper_bound are not None, a scaled logit transform is - applied to the data. Otherwise, the transform applied is a log transform variation - that ensures the resulting values from the inverse transform are bounded - accordingly. The transform is applied to all scalar elements of the input array - individually. - - Combined with an aeon.forecasting.compose.TransformedTargetForecaster, it ensures - that the forecast stays between the specified bounds (lower_bound, upper_bound). - - Default is lower_bound = upper_bound = None, i.e., the identity transform. - - The logarithm transform is obtained for lower_bound = 0, upper_bound = None. - - Parameters - ---------- - lower_bound : float, optional, default=None - lower bound of inverse transform function - upper_bound : float, optional, default=None - upper bound of inverse transform function - - See Also - -------- - aeon.transformations.boxcox._LogTransformer : - Transformer input data using natural log. Can help normalize data and - compress variance of the series. - aeon.transformations.boxcox._BoxCoxTransformer : - Applies Box-Cox power transformation. Can help normalize data and - compress variance of the series. - aeon.transformations.exponent.ExponentTransformer : - Transform input data by raising it to an exponent. Can help compress - variance of series if a fractional exponent is supplied. - aeon.transformations.exponent.SqrtTransformer : - Transform input data by taking its square root. Can help compress - variance of input series. - - Notes - ----- - | The scaled logit transform is applied if both upper_bound and lower_bound are - | not None: - | :math:`log(\frac{x - a}{b - x})`, where a is the lower and b is the upper bound. - - | If upper_bound is None and lower_bound is not None the transform applied is - | a log transform of the form: - | :math:`log(x - a)` - - | If lower_bound is None and upper_bound is not None the transform applied is - | a log transform of the form: - | :math:`- log(b - x)` - - References - ---------- - .. [1] Hyndsight - Forecasting within limits: - https://robjhyndman.com/hyndsight/forecasting-within-limits/ - .. [2] Hyndman, R.J., & Athanasopoulos, G. (2021) Forecasting: principles and - practice, 3rd edition, OTexts: Melbourne, Australia. OTexts.com/fpp3. - Accessed on January 24th 2022. - - Examples - -------- - >>> import numpy as np - >>> from aeon.datasets import load_airline - >>> from aeon.transformations.scaledlogit import ScaledLogitTransformer - >>> from aeon.forecasting.trend import PolynomialTrendForecaster - >>> from aeon.forecasting.compose import TransformedTargetForecaster - >>> y = load_airline() - >>> fcaster = TransformedTargetForecaster([ - ... ("scaled_logit", ScaledLogitTransformer(0, 650)), - ... ("poly", PolynomialTrendForecaster(degree=2)) - ... ]) - >>> fcaster.fit(y) - TransformedTargetForecaster(...) - >>> y_pred = fcaster.predict(fh = np.arange(32)) - """ - - _tags = { - "input_data_type": "Series", - # what is the abstract type of X: Series, or Panel - "output_data_type": "Series", - # what abstract type is returned: Primitives, Series, Panel - "instancewise": True, # is this an instance-wise transform? - "X_inner_type": "np.ndarray", - "y_inner_type": "None", - "transform-returns-same-time-index": True, - "fit_is_empty": True, - "capability:multivariate": True, - "capability:inverse_transform": True, - "skip-inverse-transform": False, - } - - def __init__(self, lower_bound=None, upper_bound=None): - self.lower_bound = lower_bound - self.upper_bound = upper_bound - - super().__init__() - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - private _transform containing core logic, called from transform - - Parameters - ---------- - X : 2D np.ndarray - Data to be transformed - y : data structure of type y_inner_type, default=None - Ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - if self.upper_bound is not None and np.any(X >= self.upper_bound): - warn( - "X in ScaledLogitTransformer should not have values " - "greater than upper_bound", - RuntimeWarning, - ) - - if self.lower_bound is not None and np.any(X <= self.lower_bound): - warn( - "X in ScaledLogitTransformer should not have values " - "lower than lower_bound", - RuntimeWarning, - ) - - if self.upper_bound and self.lower_bound: - X_transformed = np.log((X - self.lower_bound) / (self.upper_bound - X)) - elif self.upper_bound is not None: - X_transformed = -np.log(self.upper_bound - X) - elif self.lower_bound is not None: - X_transformed = np.log(X - self.lower_bound) - else: - X_transformed = deepcopy(X) - - return X_transformed - - def _inverse_transform(self, X, y=None): - """Inverse transform, inverse operation to transform. - - private _inverse_transform containing core logic, called from inverse_transform - - Parameters - ---------- - X : 2D np.ndarray - Data to be inverse transformed - y : data of y_inner_type, default=None - Ignored argument for interface compatibility - - Returns - ------- - inverse transformed version of X - """ - if self.upper_bound and self.lower_bound: - X_inv_transformed = (self.upper_bound * np.exp(X) + self.lower_bound) / ( - np.exp(X) + 1 - ) - elif self.upper_bound is not None: - X_inv_transformed = self.upper_bound - np.exp(-X) - elif self.lower_bound is not None: - X_inv_transformed = np.exp(X) + self.lower_bound - else: - X_inv_transformed = deepcopy(X) - - return X_inv_transformed - - @classmethod - def get_test_params(cls, parameter_set="default"): - """Return testing parameter settings for the estimator. - - Parameters - ---------- - parameter_set : str, default="default" - Name of the set of test parameters to return, for use in tests. If no - special parameters are defined for a value, will return `"default"` set. - - - Returns - ------- - params : dict or list of dict, default = {} - Parameters to create testing instances of the class - Each dict are parameters to construct an "interesting" test instance, i.e., - `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. - `create_test_instance` uses the first (or only) dictionary in `params` - """ - test_params = [ - {"lower_bound": None, "upper_bound": None}, - {"lower_bound": -(10**6), "upper_bound": None}, - {"lower_bound": None, "upper_bound": 10**6}, - {"lower_bound": -(10**6), "upper_bound": 10**6}, - ] - return test_params diff --git a/aeon/transformations/tests/test_compose.py b/aeon/transformations/tests/test_compose.py index f25ecba802..c49f12a57d 100644 --- a/aeon/transformations/tests/test_compose.py +++ b/aeon/transformations/tests/test_compose.py @@ -19,13 +19,13 @@ OptionalPassthrough, TransformerPipeline, ) -from aeon.transformations._legacy.impute import Imputer from aeon.transformations._legacy.subset import _ColumnSelect +from aeon.transformations._legacy.summarize import SummaryTransformer from aeon.transformations._legacy.theta import ( _ThetaLinesTransformer as ThetaLinesTransformer, ) from aeon.transformations.collection.pad import PaddingTransformer -from aeon.transformations.summarize import SummaryTransformer +from aeon.transformations.impute import Imputer def test_dunder_mul(): diff --git a/aeon/transformations/tests/test_featureizer.py b/aeon/transformations/tests/test_featureizer.py deleted file mode 100644 index f6b4b3e03c..0000000000 --- a/aeon/transformations/tests/test_featureizer.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Test YtoX.""" - -__maintainer__ = [] - -from numpy.testing import assert_array_equal - -from aeon.datasets import load_longley -from aeon.forecasting.model_selection import temporal_train_test_split -from aeon.testing.mock_estimators import MockTransformer -from aeon.transformations._legacy.compose import YtoX -from aeon.transformations.lag import Lag - -y, X = load_longley() -y_train, y_test, X_train, X_test = temporal_train_test_split(y, X) - - -def test_featurized_values(): - """Test against plain transformation. - - Test to check that the featurized values are same as if transformation - is done without YtoX. - """ - lags = len(y_test) - featurizer = YtoX() * MockTransformer() * Lag(lags) - featurizer.fit(X_train, y_train) - X_hat = featurizer.transform(X_test, y_test) - - exp_transformer = MockTransformer() - expected_len = lags + len(y_test) - y_hat = exp_transformer.fit_transform(y[-expected_len:]) - assert_array_equal(X_hat[f"lag_{lags}__TOTEMP"].values, y_hat.values) diff --git a/aeon/transformations/tests/test_fittedparamextractor.py b/aeon/transformations/tests/test_fittedparamextractor.py deleted file mode 100644 index 524f49255d..0000000000 --- a/aeon/transformations/tests/test_fittedparamextractor.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Tests for FittedParamExtractor.""" - -__maintainer__ = [] -__all__ = [] - -import pytest - -from aeon.datasets import load_gunpoint -from aeon.forecasting.exp_smoothing import ExponentialSmoothing -from aeon.transformations.summarize import FittedParamExtractor -from aeon.utils.validation._dependencies import _check_estimator_deps - -X_train, y_train = load_gunpoint("train", return_type="nested_univ") - - -@pytest.mark.skipif( - not _check_estimator_deps(ExponentialSmoothing, severity="none"), - reason="skip test if required soft dependency for hmmlearn not available", -) -@pytest.mark.parametrize("param_names", ["initial_level"]) -def test_fitted_param_extractor(param_names): - forecaster = ExponentialSmoothing() - t = FittedParamExtractor(forecaster=forecaster, param_names=param_names) - Xt = t.fit_transform(X_train) - assert Xt.shape == (X_train.shape[0], len(t._check_param_names(param_names))) - - # check specific value - forecaster.fit(X_train.iloc[47, 0]) - fitted_param = forecaster.get_fitted_params()[param_names] - assert Xt.iloc[47, 0] == fitted_param diff --git a/aeon/transformations/tests/test_lag.py b/aeon/transformations/tests/test_lag.py deleted file mode 100644 index 92e1c8ef78..0000000000 --- a/aeon/transformations/tests/test_lag.py +++ /dev/null @@ -1,92 +0,0 @@ -"""Tests for Lag transformer.""" - -__maintainer__ = [] - -import itertools - -import pandas as pd -import pytest - -from aeon.testing.data_generation._legacy import get_examples, make_series -from aeon.transformations.lag import Lag - -# some examples with range vs time index, univariate vs multivariate (mv) -X_range_idx = get_examples("pd.DataFrame")[0] -X_range_idx_mv = get_examples("pd.DataFrame")[1] -X_time_idx = make_series() -X_time_idx_mv = make_series(n_columns=2) - -# all fixtures -X_fixtures = [X_range_idx, X_range_idx_mv, X_time_idx, X_time_idx_mv] - -# fixtures with time index -X_time_fixtures = [X_time_idx, X_time_idx_mv] - -index_outs = ["original", "extend", "shift"] - - -@pytest.mark.parametrize("X", X_fixtures) -@pytest.mark.parametrize("index_out", index_outs) -def test_lag_fit_transform_out_index(X, index_out): - """Test that index sets of fit_transform output behave as expected.""" - t = Lag(2, index_out=index_out) - Xt = t.fit_transform(X) - - if index_out == "original": - assert Xt.index.equals(X.index) - elif index_out == "extend": - assert X.index.isin(Xt.index).all() - assert len(Xt) == len(X) + 2 - elif index_out == "shift": - assert len(Xt) == len(X) - assert X.index[2:].isin(Xt.index).all() - - -@pytest.mark.parametrize("X", X_fixtures) -@pytest.mark.parametrize("index_out", index_outs) -@pytest.mark.parametrize("lag", [2, [2, 4], [-1, 0, 5]]) -def test_lag_fit_transform_columns(X, index_out, lag): - """Test that columns of fit_transform output behave as expected.""" - t = Lag(lags=lag, index_out=index_out) - Xt = t.fit_transform(X) - - if isinstance(lag, list): - len_lag = len(lag) - else: - len_lag = 1 - - def ncols(obj): - if isinstance(obj, pd.DataFrame): - return len(obj.columns) - else: - return 1 - - assert ncols(Xt) == ncols(X) * len_lag - - -@pytest.mark.parametrize("X", X_fixtures) -@pytest.mark.parametrize("index_out", index_outs) -@pytest.mark.parametrize("lags", [2, [2, 4]]) -def test_lag_fit_transform_column_names(X, index_out, lags): - """Test expected column names.""" - t = Lag(lags=lags, index_out=index_out) - Xt = t.fit_transform(X) - - if isinstance(Xt, pd.DataFrame): - lag_col_names = set(Xt.columns) - - if isinstance(X, pd.DataFrame): - col_names = X.columns - elif isinstance(X, pd.Series): - col_names = [X.name if X.name else 0] - else: - pass - - lags = [lags] if isinstance(lags, int) else lags - expected = { - f"lag_{lag}__{col}" for lag, col in itertools.product(lags, col_names) - } - assert lag_col_names == expected - - elif isinstance(Xt, pd.Series): - assert Xt.name is None diff --git a/aeon/transformations/tests/test_plateaufinder.py b/aeon/transformations/tests/test_plateaufinder.py index d94dac896e..b5ebd53f2b 100644 --- a/aeon/transformations/tests/test_plateaufinder.py +++ b/aeon/transformations/tests/test_plateaufinder.py @@ -4,7 +4,7 @@ import pandas as pd import pytest -from aeon.transformations.summarize import PlateauFinder +from aeon.transformations._legacy.summarize import PlateauFinder @pytest.mark.parametrize("value", [np.nan, -10, 10, -0.5, 0.5]) diff --git a/aeon/transformations/tests/test_rockets.py b/aeon/transformations/tests/test_rockets.py deleted file mode 100644 index cac8aeebb5..0000000000 --- a/aeon/transformations/tests/test_rockets.py +++ /dev/null @@ -1,13 +0,0 @@ -"""Tests for the rocket transformers.""" - -import numpy as np - -from aeon.transformations.collection.convolution_based._minirocket import _PPV - - -def test_ppv(): - """Test uncovered PPV function.""" - a = np.float32(10.0) - b = np.float32(-5.0) - assert _PPV(a, b) == 1 - assert _PPV(b, a) == 0 diff --git a/aeon/transformations/tests/test_summarize.py b/aeon/transformations/tests/test_summarize.py deleted file mode 100644 index 6585b4f79b..0000000000 --- a/aeon/transformations/tests/test_summarize.py +++ /dev/null @@ -1,94 +0,0 @@ -"""Test functionality of summary transformer.""" - -__maintainer__ = [] -import re - -import numpy as np -import pandas as pd -import pytest - -from aeon.testing.data_generation._legacy import make_series -from aeon.transformations.summarize import ALLOWED_SUM_FUNCS, SummaryTransformer - -# Test individual summary functions + lists and tuples of all summary functions -sum_funcs_to_test = [ALLOWED_SUM_FUNCS[0]] + [ALLOWED_SUM_FUNCS] -sum_funcs_to_test.append(tuple(ALLOWED_SUM_FUNCS)) -quantiles_to_test = [0.7, [0.5, 0.25, 0.1], (0.3, 0.0002, 0.99), None] - -# Incorrect inputs to test to ensure they raise expected errors -incorrect_sum_funcs_to_test = [ - "meen", - "md", - 0.25, - ["mean", "medain"], - np.array(["mean", "max"]), -] -incorrect_quantiles_to_test = [25, "0.25", "median", [0.25, 1.25], [0.25, "median"]] - -# Test functionality on pd.Series and pd.DataFrame (uni- and multi-variate) input -y1 = make_series(n_timepoints=75) -y2 = make_series(n_timepoints=75) -y1.name, y2.name = "y1", "y2" -y_df_uni = pd.DataFrame(y1) -y_df_multi = pd.concat([y1, y2], axis=1) -data_to_test = [y1, y_df_uni, y_df_multi] - - -@pytest.mark.parametrize("y", data_to_test) -@pytest.mark.parametrize("summary_arg", sum_funcs_to_test) -@pytest.mark.parametrize("quantile_arg", quantiles_to_test) -def test_summary_transformer_output_type(y, summary_arg, quantile_arg): - """Test whether output is DataFrame of correct dimensions.""" - transformer = SummaryTransformer( - summary_function=summary_arg, quantiles=quantile_arg - ) - transformer.fit(y) - yt = transformer.transform(y) - - output_is_dataframe = isinstance(yt, pd.DataFrame) - assert output_is_dataframe - - # compute number of expected rows and columns - - # all test cases are single series, so single row - expected_instances = 1 - - # expected number of feature types = quantiles plus summaries - expected_sum_features = 1 if isinstance(summary_arg, str) else len(summary_arg) - if quantile_arg is None: - expected_q_features = 0 - elif isinstance(quantile_arg, (int, float)): - expected_q_features = 1 - else: - expected_q_features = len(quantile_arg) - expected_features = expected_sum_features + expected_q_features - - # for multivariate series, columns = no variables * no feature types - if isinstance(y, pd.DataFrame): - expected_features = len(y.columns) * expected_features - - assert yt.shape == (expected_instances, expected_features) - - -@pytest.mark.parametrize("summary_arg", incorrect_sum_funcs_to_test) -def test_summary_transformer_incorrect_summary_function_raises_error(summary_arg): - """Test if correct errors are raised for invalid summary_function input.""" - msg = rf"""`summary_function` must be None, or str or a list or tuple made up of - {ALLOWED_SUM_FUNCS}. - """ - with pytest.raises(ValueError, match=re.escape(msg)): - transformer = SummaryTransformer(summary_function=summary_arg, quantiles=None) - transformer.fit_transform(data_to_test[0]) - - -@pytest.mark.parametrize("quantile_arg", incorrect_quantiles_to_test) -def test_summary_transformer_incorrect_quantile_raises_error(quantile_arg): - """Test if correct errors are raised for invalid quantiles input.""" - msg = """`quantiles` must be None, int, float or a list or tuple made up of - int and float values that are between 0 and 1. - """ - with pytest.raises(ValueError, match=msg): - transformer = SummaryTransformer( - summary_function="mean", quantiles=quantile_arg - ) - transformer.fit_transform(data_to_test[0]) diff --git a/aeon/transformations/tests/test_window_summarizer.py b/aeon/transformations/tests/test_window_summarizer.py deleted file mode 100644 index 636e3c78c6..0000000000 --- a/aeon/transformations/tests/test_window_summarizer.py +++ /dev/null @@ -1,171 +0,0 @@ -"""Test extraction of features across (shifted) windows.""" - -__maintainer__ = [] - -import numpy as np -import pandas as pd -import pytest - -from aeon.datasets import load_airline, load_longley -from aeon.forecasting.model_selection import temporal_train_test_split -from aeon.testing.data_generation._legacy import get_examples -from aeon.transformations.summarize import WindowSummarizer - - -def check_eval(test_input, expected): - """Test which columns are returned for different arguments. - - For a detailed description what these arguments do, - and how theyinteract see docstring of DateTimeFeatures. - """ - if test_input is not None: - assert len(test_input) == len(expected) - assert all([a == b for a, b in zip(test_input, expected)]) - else: - assert expected is None - - -# Load data that will be the basis of tests -y = load_airline() -y_pd = get_examples("pd.DataFrame")[0] -y_series = get_examples("pd.Series")[0] -y_multi = get_examples("pd-multiindex")[0] -# y Train will be univariate data set -y_train, y_test = temporal_train_test_split(y) - -# Create Panel sample data -mi = pd.MultiIndex.from_product([[0], y.index], names=["instances", "timepoints"]) -y_group1 = pd.DataFrame(y.values, index=mi, columns=["y"]) - -mi = pd.MultiIndex.from_product([[1], y.index], names=["instances", "timepoints"]) -y_group2 = pd.DataFrame(y.values, index=mi, columns=["y"]) - -y_grouped = pd.concat([y_group1, y_group2]) - -mi = pd.MultiIndex.from_product([[0], [0], y.index], names=["h1", "h2", "time"]) -y_hier1 = pd.DataFrame(y.values, index=mi, columns=["y"]) - -mi = pd.MultiIndex.from_product([[0], [1], y.index], names=["h1", "h2", "time"]) -y_hier2 = pd.DataFrame(y.values, index=mi, columns=["y"]) - -mi = pd.MultiIndex.from_product([[1], [0], y.index], names=["h1", "h2", "time"]) -y_hier3 = pd.DataFrame(y.values, index=mi, columns=["y"]) - -mi = pd.MultiIndex.from_product([[1], [1], y.index], names=["h1", "h2", "time"]) -y_hier4 = pd.DataFrame(y.values, index=mi, columns=["y"]) - -y_hierarchical = pd.concat([y_hier1, y_hier2, y_hier3, y_hier4]) - -y_ll, X_ll = load_longley() -y_ll_train, _, X_ll_train, X_ll_test = temporal_train_test_split(y_ll, X_ll) - -# Get different WindowSummarizer functions -kwargs = WindowSummarizer.get_test_params()[0] -kwargs_alternames = WindowSummarizer.get_test_params()[1] -kwargs_variant = WindowSummarizer.get_test_params()[2] - - -def count_gt100(x): - """Count how many observations lie above threshold 100.""" - return np.sum((x > 100)[::-1]) - - -# Cannot be pickled in get_test_params, therefore here explicit -kwargs_custom = { - "lag_feature": { - count_gt100: [[3, 2]], - } -} -# Generate named and unnamed y -y_train.name = None -y_train_named = y_train.copy() -y_train_named.name = "y" - -# Target for multivariate extraction -Xtmvar = ["POP_lag_3", "POP_lag_6", "GNP_lag_3", "GNP_lag_6"] -Xtmvar = Xtmvar + ["GNPDEFL", "UNEMP", "ARMED"] -Xtmvar_none = ["GNPDEFL_lag_3", "GNPDEFL_lag_6", "GNP", "UNEMP", "ARMED", "POP"] - - -@pytest.mark.parametrize( - "kwargs, column_names, y, target_cols, truncate", - [ - ( - kwargs, - ["y_lag_1", "y_mean_1_3", "y_mean_1_12", "y_std_1_4"], - y_train_named, - None, - None, - ), - (kwargs_alternames, Xtmvar, X_ll_train, ["POP", "GNP"], None), - (kwargs_alternames, Xtmvar_none, X_ll_train, None, None), - ( - kwargs, - ["y_lag_1", "y_mean_1_3", "y_mean_1_12", "y_std_1_4"], - y_group1, - None, - None, - ), - ( - kwargs, - ["y_lag_1", "y_mean_1_3", "y_mean_1_12", "y_std_1_4"], - y_grouped, - None, - None, - ), - ( - kwargs, - ["y_lag_1", "y_mean_1_3", "y_mean_1_12", "y_std_1_4"], - y_hierarchical, - None, - None, - ), - ( - None, - ["var_0_lag_1", "var_1"], - y_multi, - None, - None, - ), - (None, None, y_train, None, None), - (None, ["a_lag_1"], y_pd, None, None), - (kwargs_custom, ["a_count_gt100_3_2"], y_pd, None, None), - (kwargs_alternames, ["0_lag_3", "0_lag_6"], y_train, None, "bfill"), - ( - kwargs_variant, - ["0_mean_1_7", "0_mean_8_7", "0_cov_1_28"], - y_train, - None, - None, - ), - ], -) -def test_windowsummarizer(kwargs, column_names, y, target_cols, truncate): - """Test columns match kwargs arguments.""" - if kwargs is not None: - transformer = WindowSummarizer( - **kwargs, target_cols=target_cols, truncate=truncate - ) - else: - transformer = WindowSummarizer(target_cols=target_cols, truncate=truncate) - Xt = transformer.fit_transform(y) - if Xt is not None: - if isinstance(Xt, pd.DataFrame): - Xt_columns = Xt.columns.to_list() - else: - Xt_columns = Xt.name - else: - Xt_columns = None - - # check that the index names are preserved - assert y.index.names == Xt.index.names - - check_eval(Xt_columns, column_names) - - -@pytest.mark.xfail(raises=ValueError) -def test_wrong_column(): - """Test mismatch between X column names and target_cols.""" - transformer = WindowSummarizer(target_cols=["dummy"]) - Xt = transformer.fit_transform(X_ll_train) - return Xt diff --git a/docs/api_reference/classification.rst b/docs/api_reference/classification.rst index ab36e9a19b..89a6261c3b 100644 --- a/docs/api_reference/classification.rst +++ b/docs/api_reference/classification.rst @@ -32,7 +32,9 @@ Deep learning :toctree: auto_generated/ :template: class.rst + BaseDeepClassifier CNNClassifier + TimeCNNClassifier EncoderClassifier FCNClassifier InceptionTimeClassifier @@ -56,6 +58,7 @@ Dictionary-based ContractableBOSS IndividualBOSS IndividualTDE + MrSQMClassifier MUSE REDCOMETS TemporalDictionaryEnsemble @@ -133,7 +136,6 @@ Shapelet-based :template: class.rst LearningShapeletClassifier - MrSQMClassifier RDSTClassifier SASTClassifier ShapeletTransformClassifier diff --git a/docs/api_reference/clustering.rst b/docs/api_reference/clustering.rst index 1361f8b1a4..737a585631 100644 --- a/docs/api_reference/clustering.rst +++ b/docs/api_reference/clustering.rst @@ -20,6 +20,7 @@ Deep learning BaseDeepClusterer AEFCNClusterer + AEResNetClusterer Clustering Algorithms --------------------- diff --git a/docs/api_reference/distances.rst b/docs/api_reference/distances.rst index ef79912320..c11ca70151 100644 --- a/docs/api_reference/distances.rst +++ b/docs/api_reference/distances.rst @@ -153,6 +153,9 @@ Shape Dynamic Time Warping (Shape DTW) shape_dtw_pairwise_distance shape_dtw_cost_matrix shape_dtw_alignment_path + _pad_ts_collection_edges + _pad_ts_edges + _transform_subsequences Squared ------- diff --git a/docs/api_reference/networks.rst b/docs/api_reference/networks.rst index 0a33613289..65b8aefa14 100644 --- a/docs/api_reference/networks.rst +++ b/docs/api_reference/networks.rst @@ -15,6 +15,7 @@ Deep learning networks BaseDeepLearningNetwork CNNNetwork + TimeCNNNetwork EncoderNetwork FCNNetwork InceptionNetwork @@ -22,3 +23,6 @@ Deep learning networks ResNetNetwork TapNetNetwork AEFCNNetwork + AEResNetNetwork + LITENetwork + AEBiGRUNetwork diff --git a/docs/api_reference/regression.rst b/docs/api_reference/regression.rst index 2001e5e4b4..bb0ff961c3 100644 --- a/docs/api_reference/regression.rst +++ b/docs/api_reference/regression.rst @@ -51,16 +51,18 @@ Deep learning :toctree: auto_generated/ :template: class.rst + BaseDeepRegressor CNNRegressor + TimeCNNRegressor EncoderRegressor FCNRegressor InceptionTimeRegressor IndividualLITERegressor IndividualInceptionRegressor LITETimeRegressor - LITETimeRegressor ResNetRegressor TapNetRegressor + MLPRegressor Distance-based -------------- diff --git a/docs/api_reference/transformations.rst b/docs/api_reference/transformations.rst index 5f1ea860a4..9752d7c36a 100644 --- a/docs/api_reference/transformations.rst +++ b/docs/api_reference/transformations.rst @@ -74,21 +74,6 @@ features, usually a vector of floats, but can also be categorical. When applied to collections or hierarchical data, the transformation result is a table with as many rows as time series in the collection and a column for each feature. -Summarization -~~~~~~~~~~~~~ - -These transformers extract simple summary features. - -.. currentmodule:: aeon.transformations.summarize - -.. autosummary:: - :toctree: auto_generated/ - :template: class.rst - - SummaryTransformer - WindowSummarizer - FittedParamExtractor - Shapelets, wavelets and convolution ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -172,23 +157,6 @@ These transformers extract larger collections of features. TSFreshFeatureExtractor Catch22 -Series-to-series transformers ------------------------------ - -Series-to-series transformers transform individual time series into another time series. -When applied to collections or hierarchical data, individual series are transformed -through broadcasting. - -Lagging -~~~~~~~ - -.. currentmodule:: aeon.transformations.lag - -.. autosummary:: - :toctree: auto_generated/ - :template: class.rst - - Lag Series transforms ~~~~~~~~~~~~~~~~~~~~~~~ @@ -206,14 +174,6 @@ Depending on the transformer, the transformation parameters can be fitted. BoxCoxTransformer LogTransformer -.. currentmodule:: aeon.transformations.scaledlogit - -.. autosummary:: - :toctree: auto_generated/ - :template: class.rst - - ScaledLogitTransformer - .. currentmodule:: aeon.transformations.exponent .. autosummary:: @@ -386,14 +346,6 @@ Bootstrap transformations Outlier detection, changepoint detection ---------------------------------------- -.. currentmodule:: aeon.transformations.outlier_detection - -.. autosummary:: - :toctree: auto_generated/ - :template: class.rst - - HampelFilter - .. currentmodule:: aeon.transformations.series._clasp .. autosummary:: diff --git a/docs/api_reference/visualisation.rst b/docs/api_reference/visualisation.rst index 1555d6bbaf..4d7be0feb8 100644 --- a/docs/api_reference/visualisation.rst +++ b/docs/api_reference/visualisation.rst @@ -31,3 +31,4 @@ Visualisation plot_time_series_with_change_points plot_time_series_with_profiles plot_cluster_algorithm + plot_network diff --git a/docs/getting_started.md b/docs/getting_started.md index 212c058dca..180d553a38 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -72,15 +72,14 @@ Quarter 1971 Q1 1.897371 1.987154 1.909734 3.657771 -0.1 ``` -We commonly refer to the number of observations for a time series as `n_timepoints` or -`n_timepoints`. If a series is multivariate, we refer to the dimensions as channels +We commonly refer to the number of observations for a time series as `n_timepoints`. If a series is multivariate, we refer to the dimensions as channels (to avoid confusion with the dimensions of array) and in code use `n_channels`. Dimensions may also be referred to as variables. Different parts of `aeon` work with single series or collections of series. The `anomaly detection` and `segmentation` modules will commonly use single series input, while `classification`, `regression` and `clustering` modules will use collections of time -series. Collections of time series may also be referred to a Panels. Collections of +series. Collections of time series may also be referred to as Panels. Collections of time series will often be accompanied by an array of target variables. ```{code-block} python diff --git a/examples/classification/deep_learning.ipynb b/examples/classification/deep_learning.ipynb index e9fc4e2db9..1e9415c32f 100644 --- a/examples/classification/deep_learning.ipynb +++ b/examples/classification/deep_learning.ipynb @@ -201,6 +201,7 @@ "names.remove(\"IndividualInception\")\n", "names.remove(\"IndividualLITE\")\n", "names.remove(\"MLP\")\n", + "names.remove(\"TimeCNN\")\n", "names.remove(\"TapNet\") # Multivariate only\n", "\n", "\n", diff --git a/examples/transformations/transformations.ipynb b/examples/transformations/transformations.ipynb deleted file mode 100644 index 53ff123f8d..0000000000 --- a/examples/transformations/transformations.ipynb +++ /dev/null @@ -1,890 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Transforming time series with aeon\n", - "\n", - "Transformers are objects that transform data from one representation to another. `aeon`\n", - "contains time series specific transformers which can be used in\n", - "pipelines in conjunction with other estimators.\n", - "Note: the term \"transformer\" is used in deep learning to refer to specific neural\n", - "network architectures. `aeon` transformers follow the `scikit-learn` design: they\n", - "have `fit`, `transform` and `fit_transform` methods that combine the two functions.\n", - "Some transformers also have `inverse_transform` that allows you to reverse the change.\n", - "\n", - "`aeon` distinguishes different types of transformer, depending on the input type accepted\n", - "by the `fit` and `transform` methods. The main distinction is whether all series types\n", - "(i.e. single time series, collections of time series, hierarchical time series) are accepted\n", - "and implicitly converted, or whether only a singular input type (i.e. collections) is accepted." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Transformers\n", - "\n", - "General transformers (in the package `aeon/transformations`, `aeon/transformations`,\n", - "`aeon/transformations/bootstrap` and `aeon/transformations/hierarchical`) aim to\n", - "accept all input types, and will attempt to restructure the data or broadcast to multiple transformer\n", - "objects if necessary to fit the input data to the data structure used by the transformer. For example,\n", - "if the class excepts a singular series but is given a collection of series, a separate instance of the\n", - "transformer is applied independently to each series. Transformers all extend the base class\n", - "`BaseTransformer`. General transformations are mostly used for single series tasks such as forecasting\n", - "and annotation, and are best used with `pd.Series` or `pd.DataFrame` input. Other valid data types will\n", - "be accepted but are likely to be converted to another format internally, see the\n", - "[data structures](../datasets/data_structures.ipynb) notebook for clarification of how\n", - "best\n", - "to store\n", - "data with aeon.\n", - "\n", - "Transformers differ in terms of whether they convert time series into different time series\n", - "(series-to-series transformation), or whether they convert series into feature vector(s)\n", - "(series-to-vector transformation).\n", - "\n", - "To illustrate the difference, we compare two single-series transformers with different output:\n", - "\n", - "* the Box-Cox transformer `BoxCoxTransformer`, a series-to-series transformer using the\n", - "[Box Cox power transform](https://en.wikipedia.org/wiki/Power_transform#Box%E2%80%93Cox_transformation).\n", - "* the summary transformer `SummaryTransformer`, a series-to-vector transformer that\n", - "finds summary statistics such as the mean an standard deviation of each series.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "outputs": [], - "source": [ - "import warnings\n", - "\n", - "warnings.filterwarnings(\"ignore\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:25.993971Z", - "start_time": "2024-03-01T16:16:25.989981Z" - } - } - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": { - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.098690Z", - "start_time": "2024-03-01T16:16:26.088717Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": "pandas.core.series.Series" - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from aeon.datasets import load_airline\n", - "from aeon.transformations._legacy._boxcox import _BoxCoxTransformer as BoxCoxTransformer\n", - "from aeon.transformations.summarize import SummaryTransformer\n", - "from aeon.visualisation import plot_series\n", - "\n", - "boxcox_trans = BoxCoxTransformer()\n", - "summary_trans = SummaryTransformer()\n", - "\n", - "# airline is a single time series stored in a pd.Series\n", - "airline = load_airline()\n", - "\n", - "type(airline)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "outputs": [ - { - "data": { - "text/plain": "Period\n1949-01 112.0\n1949-02 118.0\n1949-03 132.0\n1949-04 129.0\n1949-05 121.0\nFreq: M, Name: Number of airline passengers, dtype: float64" - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "airline[:5]" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.108664Z", - "start_time": "2024-03-01T16:16:26.102680Z" - } - } - }, - { - "cell_type": "code", - "execution_count": 32, - "outputs": [ - { - "data": { - "text/plain": "(
,\n )" - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": "
", - "image/png": "" - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot_series(airline)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.291176Z", - "start_time": "2024-03-01T16:16:26.109661Z" - } - } - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.302147Z", - "start_time": "2024-03-01T16:16:26.293172Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": "pandas.core.series.Series" - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# this produces a pandas Series containing the transformed series\n", - "airline_bc = boxcox_trans.fit_transform(airline)\n", - "type(airline_bc)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "outputs": [ - { - "data": { - "text/plain": "Period\n1949-01 6.827490\n1949-02 6.932822\n1949-03 7.161892\n1949-04 7.114611\n1949-05 6.983787\nFreq: M, dtype: float64" - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "airline_bc[:5]" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.313117Z", - "start_time": "2024-03-01T16:16:26.304142Z" - } - } - }, - { - "cell_type": "code", - "execution_count": 35, - "outputs": [ - { - "data": { - "text/plain": "(
, )" - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": "
", - "image/png": "" - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot_series(airline_bc)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.492637Z", - "start_time": "2024-03-01T16:16:26.315112Z" - } - } - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": { - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.504605Z", - "start_time": "2024-03-01T16:16:26.493634Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": "pandas.core.frame.DataFrame" - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# this produces a pandas.DataFrame containing the feature vector for our single series\n", - "airline_summary = summary_trans.fit_transform(airline)\n", - "type(airline_summary)" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "outputs": [ - { - "data": { - "text/plain": " mean std min max 0.1 0.25 0.5 0.75 0.9\n0 280.298611 119.966317 104.0 622.0 135.3 180.0 265.5 360.5 453.2", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
meanstdminmax0.10.250.50.750.9
0280.298611119.966317104.0622.0135.3180.0265.5360.5453.2
\n
" - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "airline_summary" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.516574Z", - "start_time": "2024-03-01T16:16:26.505602Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "You can get a list of all series-to-series and series-to-vector transformers using\n", - "the output tags. Please consult the API for details on each transformations" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 38, - "outputs": [ - { - "data": { - "text/plain": " name \\\n0 Aggregator \n1 AutoCorrelationTransformer \n2 BKFilter \n3 BoxCoxTransformer \n4 ClaSPTransformer \n5 ClearSky \n6 CollectionToSeriesWrapper \n7 ColumnConcatenator \n8 ColumnSelect \n9 ColumnwiseTransformer \n10 ConditionalDeseasonalizer \n11 CosineTransformer \n12 DOBIN \n13 DateTimeFeatures \n14 Deseasonalizer \n15 Detrender \n16 Differencer \n17 EAgglo \n18 ExponentTransformer \n19 FeatureSelection \n20 FeatureUnion \n21 Filter \n22 FitInTransform \n23 FourierFeatures \n24 FunctionTransformer \n25 HampelFilter \n26 Hidalgo \n27 Id \n28 Imputer \n29 IndexSubset \n30 InvertAugmenter \n31 InvertTransform \n32 KalmanFilterTransformer \n33 Lag \n34 LogTransformer \n35 MatrixProfileSeriesTransformer \n36 MatrixProfileTransformer \n37 MultiplexTransformer \n38 OptionalPassthrough \n39 PCATransformer \n40 PandasTransformAdaptor \n41 PartialAutoCorrelationTransformer \n42 PlateauFinder \n43 RandomSamplesAugmenter \n44 Reconciler \n45 ReducerTransform \n46 ReverseAugmenter \n47 STLTransformer \n48 STRAY \n49 ScaledLogitTransformer \n50 SqrtTransformer \n51 TabularToSeriesAdaptor \n52 ThetaLinesTransformer \n53 TimeBinAggregate \n54 TimeSince \n55 TransformerPipeline \n56 WhiteNoiseAugmenter \n57 WindowSummarizer \n58 YtoX \n\n estimator \n0 \n3 \n6 \n13 \n18 \n22 \n27 \n28 \n29 \n30 \n34 \n40 \n49 ", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nameestimator
0Aggregator<class 'aeon.transformations.hierarchical.aggr...
1AutoCorrelationTransformer<class 'aeon.transformations.acf.AutoCorrelati...
2BKFilter<class 'aeon.transformations.bkfilter.BKFilter'>
3BoxCoxTransformer<class 'aeon.transformations.boxcox.BoxCoxTran...
4ClaSPTransformer<class 'aeon.transformations.clasp.ClaSPTransf...
5ClearSky<class 'aeon.transformations.clear_sky.ClearSky'>
6CollectionToSeriesWrapper<class 'aeon.transformations.collection._colle...
7ColumnConcatenator<class 'aeon.transformations.compose.ColumnCon...
8ColumnSelect<class 'aeon.transformations.subset.ColumnSele...
9ColumnwiseTransformer<class 'aeon.transformations.compose.Columnwis...
10ConditionalDeseasonalizer<class 'aeon.transformations.detrend._deseason...
11CosineTransformer<class 'aeon.transformations.cos.CosineTransfo...
12DOBIN<class 'aeon.transformations.dobin.DOBIN'>
13DateTimeFeatures<class 'aeon.transformations.date.DateTimeFeat...
14Deseasonalizer<class 'aeon.transformations.detrend._deseason...
15Detrender<class 'aeon.transformations.detrend._detrend....
16Differencer<class 'aeon.transformations.difference.Differ...
17EAgglo<class 'aeon.annotation.eagglo.EAgglo'>
18ExponentTransformer<class 'aeon.transformations.exponent.Exponent...
19FeatureSelection<class 'aeon.transformations.feature_selection...
20FeatureUnion<class 'aeon.transformations.compose.FeatureUn...
21Filter<class 'aeon.transformations.filter.Filter'>
22FitInTransform<class 'aeon.transformations.compose.FitInTran...
23FourierFeatures<class 'aeon.transformations.fourier.FourierFe...
24FunctionTransformer<class 'aeon.transformations.func_transform.Fu...
25HampelFilter<class 'aeon.transformations.outlier_detection...
26Hidalgo<class 'aeon.transformations.hidalgo.Hidalgo'>
27Id<class 'aeon.transformations.compose.Id'>
28Imputer<class 'aeon.transformations.impute.Imputer'>
29IndexSubset<class 'aeon.transformations.subset.IndexSubset'>
30InvertAugmenter<class 'aeon.transformations.augmenter.InvertA...
31InvertTransform<class 'aeon.transformations.compose.InvertTra...
32KalmanFilterTransformer<class 'aeon.transformations.kalman_filter.Kal...
33Lag<class 'aeon.transformations.lag.Lag'>
34LogTransformer<class 'aeon.transformations.boxcox.LogTransfo...
35MatrixProfileSeriesTransformer<class 'aeon.transformations.series._matrix_pr...
36MatrixProfileTransformer<class 'aeon.transformations.matrix_profile.Ma...
37MultiplexTransformer<class 'aeon.transformations.compose.Multiplex...
38OptionalPassthrough<class 'aeon.transformations.compose.OptionalP...
39PCATransformer<class 'aeon.transformations.pca.PCATransformer'>
40PandasTransformAdaptor<class 'aeon.transformations.adapt.PandasTrans...
41PartialAutoCorrelationTransformer<class 'aeon.transformations.acf.PartialAutoCo...
42PlateauFinder<class 'aeon.transformations.summarize.Plateau...
43RandomSamplesAugmenter<class 'aeon.transformations.augmenter.RandomS...
44Reconciler<class 'aeon.transformations.hierarchical.reco...
45ReducerTransform<class 'aeon.transformations.lag.ReducerTransf...
46ReverseAugmenter<class 'aeon.transformations.augmenter.Reverse...
47STLTransformer<class 'aeon.transformations.detrend._deseason...
48STRAY<class 'aeon.anomaly_detection._stray.STRAY'>
49ScaledLogitTransformer<class 'aeon.transformations.scaledlogit.Scale...
50SqrtTransformer<class 'aeon.transformations.exponent.SqrtTran...
51TabularToSeriesAdaptor<class 'aeon.transformations.adapt.TabularToSe...
52ThetaLinesTransformer<class 'aeon.transformations.theta.ThetaLinesT...
53TimeBinAggregate<class 'aeon.transformations.binning.TimeBinAg...
54TimeSince<class 'aeon.transformations.time_since.TimeSi...
55TransformerPipeline<class 'aeon.transformations.compose.Transform...
56WhiteNoiseAugmenter<class 'aeon.transformations.augmenter.WhiteNo...
57WindowSummarizer<class 'aeon.transformations.summarize.WindowS...
58YtoX<class 'aeon.transformations.compose.YtoX'>
\n
" - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from aeon.registry import all_estimators\n", - "\n", - "all_estimators(\n", - " \"transformer\",\n", - " exclude_estimator_types=\"collection-transformer\",\n", - " filter_tags={\n", - " \"output_data_type\": [\"Series\", \"Collection\"],\n", - " },\n", - " as_dataframe=True,\n", - ")" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "is_executing": true - }, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.612317Z", - "start_time": "2024-03-01T16:16:26.517570Z" - } - } - }, - { - "cell_type": "code", - "execution_count": 39, - "outputs": [ - { - "data": { - "text/plain": "Empty DataFrame\nColumns: [name, estimator]\nIndex: []", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n
nameestimator
\n
" - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "all_estimators(\n", - " \"transformer\",\n", - " exclude_estimator_types=\"collection-transformer\",\n", - " filter_tags={\n", - " \"output_data_type\": \"Tabular\",\n", - " },\n", - " as_dataframe=True,\n", - ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.686120Z", - "start_time": "2024-03-01T16:16:26.613314Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "If your series is split into training and testing data, you should call `fit` and\n", - "`transform` separately. `BoxCoxTransformer` has a parameter `lambda` that can be\n", - "learned from the train data:" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": { - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.695096Z", - "start_time": "2024-03-01T16:16:26.687117Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": "Period\n1958-01 340.0\n1958-02 318.0\n1958-03 362.0\n1958-04 348.0\n1958-05 363.0\nFreq: M, Name: Number of airline passengers, dtype: float64" - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from aeon.forecasting.model_selection import temporal_train_test_split\n", - "\n", - "train, test = temporal_train_test_split(airline)\n", - "boxcox = BoxCoxTransformer(method=\"mle\")\n", - "test[:5]" - ] - }, - { - "cell_type": "markdown", - "source": [ - "You can then apply the model without refitting lambda using just `transform`:" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": { - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.709058Z", - "start_time": "2024-03-01T16:16:26.698088Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": "Period\n1958-01 5.597723\n1958-02 5.536036\n1958-03 5.655489\n1958-04 5.619156\n1958-05 5.658029\nFreq: M, dtype: float64" - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# fit the transformer on the training data\n", - "boxcox.fit(train)\n", - "# apply to test data\n", - "test_new = boxcox.transform(test)\n", - "test_new[:5]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Fitted model components of transformers can be found with the `get_fitted_params()`\n", - "method:" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.716040Z", - "start_time": "2024-03-01T16:16:26.710056Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": "{'lambda': -0.01398297802065717}" - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "boxcox.get_fitted_params()\n", - "# this is a pandas.DataFrame that contains the fitted transformers" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Collection Transformers\n", - "\n", - "Collection transformers inherit from `BaseCollectionTransformer`, itself a subclass\n", - "of `BaseTransformer`. Collection transformers differ from the other transformers in\n", - "`aeon` in that the only accept collections of series, and they are more likely to not\n", - "transform each series independently. A `BaseCollectionTransformer` works best with the same\n", - "data structures used by clusterers, regressors and classifiers: 3D numpy of shape\n", - "`(n_cases, n_channels, n_timepoints)` for equal length series or a list of 2D numpy `[n_cases]`.\n", - " Like before, other valid collection input types can be used.\n", - " See the [data storage notebook](../datasets/data_structures.ipynb) for more\n", - " details.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Arrows shape (n_cases, n_channels, n_timepoints) = (211, 1, 251)\n", - "Motions shape (n_cases, n_channels, n_timepoints) = (80, 6, 100)\n", - "Covid shape (n_cases, n_channels, n_timepoints) = (201, 1, 84)\n" - ] - } - ], - "source": [ - "from aeon.datasets import load_arrow_head, load_basic_motions, load_covid_3month\n", - "\n", - "# univariate classification\n", - "arrows, arrows_labels = load_arrow_head()\n", - "# multivariate classification\n", - "motions, motions_labels = load_basic_motions()\n", - "# univariate regression\n", - "covid, covid_response = load_covid_3month()\n", - "\n", - "print(\"Arrows shape (n_cases, n_channels, n_timepoints) = \", arrows.shape)\n", - "print(\"Motions shape (n_cases, n_channels, n_timepoints) = \", motions.shape)\n", - "print(\"Covid shape (n_cases, n_channels, n_timepoints) = \", covid.shape)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:26.999718Z", - "start_time": "2024-03-01T16:16:26.954838Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "Collection transformers can also be series-to-series or series-to-vector. Most transformers will\n", - "always transform a collection of $n$ series into a collection of $n$ series or\n", - "vectors. For example, `Catch22` transforms each channel of each series into 22 summary features.\n" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 49, - "outputs": [ - { - "data": { - "text/plain": "(211, 22)" - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from aeon.transformations.collection.feature_based import Catch22\n", - "\n", - "c22 = Catch22()\n", - "t = c22.fit_transform(arrows)\n", - "t.shape" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:27.121393Z", - "start_time": "2024-03-01T16:16:27.000715Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "Series-to-series transformers transform each series into a different series. This can\n", - " mean it has a different number of channels and/or be different length. For example,\n", - " `ElbowClassPairwise` performs a supervised channel selection to reduce\n", - " dimensionality. In the example below, it selects the best two channels from BasicMotions." - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 50, - "outputs": [ - { - "data": { - "text/plain": "(80, 2, 100)" - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from aeon.transformations.collection import ElbowClassPairwise\n", - "\n", - "ecp = ElbowClassPairwise()\n", - "t2 = ecp.fit_transform(motions, motions_labels)\n", - "t2.shape" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:27.152310Z", - "start_time": "2024-03-01T16:16:27.122391Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "series-to-vector Collection transformers return array-like objects of shape `(n_cases, n_features)`, so\n", - "they can be used with sklearn classifiers or regressors directly or in a pipeline. The following are equivalent." - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 51, - "outputs": [ - { - "data": { - "text/plain": "0.6285714285714286" - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.metrics import accuracy_score\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "arrows_train, arrows_test, y_train, y_test = train_test_split(\n", - " arrows, arrows_labels, test_size=0.33\n", - ")\n", - "\n", - "c22 = Catch22()\n", - "c22_train = c22.fit_transform(arrows_train, y_train)\n", - "\n", - "lr = LogisticRegression()\n", - "lr.fit(c22_train, y_train)\n", - "\n", - "c22_test = c22.transform(arrows_test, y_test)\n", - "preds = lr.predict(c22_test)\n", - "accuracy_score(y_test, preds)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:27.300647Z", - "start_time": "2024-03-01T16:16:27.153307Z" - } - } - }, - { - "cell_type": "code", - "execution_count": 52, - "outputs": [ - { - "data": { - "text/plain": "0.6285714285714286" - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from sklearn.pipeline import Pipeline\n", - "\n", - "pipe = Pipeline(steps=[(\"catch22\", c22), (\"logistic\", lr)])\n", - "pipe.fit(arrows_train, y_train)\n", - "preds = pipe.predict(arrows_test)\n", - "accuracy_score(y_test, preds)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:27.441605Z", - "start_time": "2024-03-01T16:16:27.301645Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "Series-to-series collection transformers can be used in an sklearn pipeline with an\n", - "`aeon` classifier or regressor" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 53, - "outputs": [ - { - "data": { - "text/plain": "Pipeline(steps=[('ECP', ElbowClassPairwise()),\n ('knn', KNeighborsTimeSeriesRegressor(distance='euclidean'))])", - "text/html": "
Pipeline(steps=[('ECP', ElbowClassPairwise()),\n                ('knn', KNeighborsTimeSeriesRegressor(distance='euclidean'))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from sklearn.metrics import mean_squared_error\n", - "\n", - "from aeon.regression.distance_based import KNeighborsTimeSeriesRegressor\n", - "\n", - "knn = KNeighborsTimeSeriesRegressor(distance=\"euclidean\")\n", - "pipe = Pipeline(steps=[(\"ECP\", ecp), (\"knn\", knn)])\n", - "covid_train, covid_test, y_train, y_test = train_test_split(\n", - " covid, covid_response, test_size=0.75\n", - ")\n", - "pipe.fit(covid_train, y_train)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:30.862948Z", - "start_time": "2024-03-01T16:16:27.442603Z" - } - } - }, - { - "cell_type": "code", - "execution_count": 54, - "outputs": [ - { - "data": { - "text/plain": "0.0030565796424194182" - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "preds = pipe.predict(covid_test)\n", - "mean_squared_error(y_test, preds)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:30.880933Z", - "start_time": "2024-03-01T16:16:30.863946Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "### Wrapping as a general transformer\n", - "### Wrapping as a general transformer\n", - "\n", - "Collection transformers can be wrapped to have to same functionality as a `BaseTransformer` using the `CollectionToSeriesWrapper`." - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 1, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'Catch22' is not defined", - "output_type": "error", - "traceback": [ - "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[1;31mNameError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[1;32mIn[1], line 3\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01maeon\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mtransformations\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mcollection\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m CollectionToSeriesWrapper\n\u001B[1;32m----> 3\u001B[0m c22 \u001B[38;5;241m=\u001B[39m \u001B[43mCatch22\u001B[49m()\n\u001B[0;32m 4\u001B[0m wrapper \u001B[38;5;241m=\u001B[39m CollectionToSeriesWrapper(c22) \u001B[38;5;66;03m# wrap transformer to accept single series\u001B[39;00m\n\u001B[0;32m 6\u001B[0m wrapper\u001B[38;5;241m.\u001B[39mfit_transform(airline)\n", - "\u001B[1;31mNameError\u001B[0m: name 'Catch22' is not defined" - ] - } - ], - "source": [ - "from aeon.transformations.collection import CollectionToSeriesWrapper\n", - "\n", - "c22 = Catch22()\n", - "wrapper = CollectionToSeriesWrapper(c22) # wrap transformer to accept single series\n", - "\n", - "wrapper.fit_transform(airline)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-03-01T16:16:30.935785Z", - "start_time": "2024-03-01T16:16:30.881930Z" - } - } - } - ], - "metadata": { - "hide_input": false, - "kernelspec": { - "display_name": "Python 3.8.12 ('aeon-baseobject')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.12" - }, - "latex_envs": { - "LaTeX_envs_menu_present": true, - "autoclose": false, - "autocomplete": true, - "bibliofile": "biblio.bib", - "cite_by": "apalike", - "current_citInitial": 1, - "eqLabelWithNumbers": true, - "eqNumInitial": 1, - "hotkeys": { - "equation": "Ctrl-E", - "itemize": "Ctrl-I" - }, - "labels_anchors": false, - "latex_user_defs": false, - "report_style_numbering": false, - "user_envs_cfg": false - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - }, - "vscode": { - "interpreter": { - "hash": "ff39becaf9fb8fe58d1d34fc2c63ee411a5dd80719d9cd02520236b9e8461a42" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}