From dc34d7e027d9fa3203f15e9b7ba027bb5cbf5e56 Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Fri, 15 Mar 2024 15:31:47 +0100 Subject: [PATCH 1/5] feat: add deterministic hash methods to the all types test: add tests to all new hash implementations --- poetry.lock | 122 +++++++++++++++++- pyproject.toml | 1 + src/safeds/data/image/containers/_image.py | 12 ++ src/safeds/data/tabular/containers/_column.py | 12 ++ src/safeds/data/tabular/containers/_row.py | 14 ++ src/safeds/data/tabular/containers/_table.py | 12 ++ .../data/tabular/containers/_tagged_table.py | 26 ++++ .../data/tabular/containers/_time_series.py | 26 ++++ .../transformation/_table_transformer.py | 12 ++ src/safeds/data/tabular/typing/_schema.py | 5 +- .../classical/classification/_classifier.py | 12 ++ .../ml/classical/regression/_regressor.py | 12 ++ .../data/image/containers/test_image.py | 38 ++++++ .../tabular/containers/_column/test_hash.py | 36 ++++++ .../_tagged_table/_time_series/test_eq.py | 50 +++++++ .../_tagged_table/_time_series/test_hash.py | 42 ++++++ .../_tagged_table/_time_series/test_sizeof.py | 2 +- .../_table/_tagged_table/test_eq.py | 46 +++++++ .../_table/_tagged_table/test_hash.py | 40 ++++++ .../tabular/containers/_table/test_hash.py | 40 ++++++ .../data/tabular/containers/test_row.py | 33 +++++ .../transformation/test_table_transformer.py | 122 ++++++++++++++++++ .../classification/test_classifier.py | 21 +++ .../ml/classical/regression/test_regressor.py | 21 +++ 24 files changed, 751 insertions(+), 6 deletions(-) create mode 100644 tests/safeds/data/tabular/containers/_column/test_hash.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_eq.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_hash.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_eq.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_hash.py create mode 100644 tests/safeds/data/tabular/containers/_table/test_hash.py create mode 100644 tests/safeds/data/tabular/transformation/test_table_transformer.py diff --git a/poetry.lock b/poetry.lock index 75833ac2b..09160fda7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "anyio" @@ -2943,7 +2943,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -4144,7 +4143,124 @@ files = [ {file = "widgetsnbextension-4.0.9.tar.gz", hash = "sha256:3c1f5e46dc1166dfd40a42d685e6a51396fd34ff878742a3e47c6f0cc4a2a385"}, ] +[[package]] +name = "xxhash" +version = "3.4.1" +description = "Python binding for xxHash" +optional = false +python-versions = ">=3.7" +files = [ + {file = "xxhash-3.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:91dbfa55346ad3e18e738742236554531a621042e419b70ad8f3c1d9c7a16e7f"}, + {file = "xxhash-3.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:665a65c2a48a72068fcc4d21721510df5f51f1142541c890491afc80451636d2"}, + {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb11628470a6004dc71a09fe90c2f459ff03d611376c1debeec2d648f44cb693"}, + {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5bef2a7dc7b4f4beb45a1edbba9b9194c60a43a89598a87f1a0226d183764189"}, + {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c0f7b2d547d72c7eda7aa817acf8791f0146b12b9eba1d4432c531fb0352228"}, + {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00f2fdef6b41c9db3d2fc0e7f94cb3db86693e5c45d6de09625caad9a469635b"}, + {file = "xxhash-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23cfd9ca09acaf07a43e5a695143d9a21bf00f5b49b15c07d5388cadf1f9ce11"}, + {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6a9ff50a3cf88355ca4731682c168049af1ca222d1d2925ef7119c1a78e95b3b"}, + {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:f1d7c69a1e9ca5faa75546fdd267f214f63f52f12692f9b3a2f6467c9e67d5e7"}, + {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:672b273040d5d5a6864a36287f3514efcd1d4b1b6a7480f294c4b1d1ee1b8de0"}, + {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4178f78d70e88f1c4a89ff1ffe9f43147185930bb962ee3979dba15f2b1cc799"}, + {file = "xxhash-3.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9804b9eb254d4b8cc83ab5a2002128f7d631dd427aa873c8727dba7f1f0d1c2b"}, + {file = "xxhash-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c09c49473212d9c87261d22c74370457cfff5db2ddfc7fd1e35c80c31a8c14ce"}, + {file = "xxhash-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:ebbb1616435b4a194ce3466d7247df23499475c7ed4eb2681a1fa42ff766aff6"}, + {file = "xxhash-3.4.1-cp310-cp310-win_arm64.whl", hash = "sha256:25dc66be3db54f8a2d136f695b00cfe88018e59ccff0f3b8f545869f376a8a46"}, + {file = "xxhash-3.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:58c49083801885273e262c0f5bbeac23e520564b8357fbb18fb94ff09d3d3ea5"}, + {file = "xxhash-3.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b526015a973bfbe81e804a586b703f163861da36d186627e27524f5427b0d520"}, + {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36ad4457644c91a966f6fe137d7467636bdc51a6ce10a1d04f365c70d6a16d7e"}, + {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:248d3e83d119770f96003271fe41e049dd4ae52da2feb8f832b7a20e791d2920"}, + {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2070b6d5bbef5ee031666cf21d4953c16e92c2f8a24a94b5c240f8995ba3b1d0"}, + {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2746035f518f0410915e247877f7df43ef3372bf36cfa52cc4bc33e85242641"}, + {file = "xxhash-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a8ba6181514681c2591840d5632fcf7356ab287d4aff1c8dea20f3c78097088"}, + {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0aac5010869240e95f740de43cd6a05eae180c59edd182ad93bf12ee289484fa"}, + {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4cb11d8debab1626181633d184b2372aaa09825bde709bf927704ed72765bed1"}, + {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b29728cff2c12f3d9f1d940528ee83918d803c0567866e062683f300d1d2eff3"}, + {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:a15cbf3a9c40672523bdb6ea97ff74b443406ba0ab9bca10ceccd9546414bd84"}, + {file = "xxhash-3.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e66df260fed01ed8ea790c2913271641c58481e807790d9fca8bfd5a3c13844"}, + {file = "xxhash-3.4.1-cp311-cp311-win32.whl", hash = "sha256:e867f68a8f381ea12858e6d67378c05359d3a53a888913b5f7d35fbf68939d5f"}, + {file = "xxhash-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:200a5a3ad9c7c0c02ed1484a1d838b63edcf92ff538770ea07456a3732c577f4"}, + {file = "xxhash-3.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:1d03f1c0d16d24ea032e99f61c552cb2b77d502e545187338bea461fde253583"}, + {file = "xxhash-3.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c4bbba9b182697a52bc0c9f8ec0ba1acb914b4937cd4a877ad78a3b3eeabefb3"}, + {file = "xxhash-3.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9fd28a9da300e64e434cfc96567a8387d9a96e824a9be1452a1e7248b7763b78"}, + {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6066d88c9329ab230e18998daec53d819daeee99d003955c8db6fc4971b45ca3"}, + {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93805bc3233ad89abf51772f2ed3355097a5dc74e6080de19706fc447da99cd3"}, + {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64da57d5ed586ebb2ecdde1e997fa37c27fe32fe61a656b77fabbc58e6fbff6e"}, + {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a97322e9a7440bf3c9805cbaac090358b43f650516486746f7fa482672593df"}, + {file = "xxhash-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bbe750d512982ee7d831838a5dee9e9848f3fb440e4734cca3f298228cc957a6"}, + {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fd79d4087727daf4d5b8afe594b37d611ab95dc8e29fe1a7517320794837eb7d"}, + {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:743612da4071ff9aa4d055f3f111ae5247342931dedb955268954ef7201a71ff"}, + {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:b41edaf05734092f24f48c0958b3c6cbaaa5b7e024880692078c6b1f8247e2fc"}, + {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:a90356ead70d715fe64c30cd0969072de1860e56b78adf7c69d954b43e29d9fa"}, + {file = "xxhash-3.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ac56eebb364e44c85e1d9e9cc5f6031d78a34f0092fea7fc80478139369a8b4a"}, + {file = "xxhash-3.4.1-cp312-cp312-win32.whl", hash = "sha256:911035345932a153c427107397c1518f8ce456f93c618dd1c5b54ebb22e73747"}, + {file = "xxhash-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:f31ce76489f8601cc7b8713201ce94b4bd7b7ce90ba3353dccce7e9e1fee71fa"}, + {file = "xxhash-3.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:b5beb1c6a72fdc7584102f42c4d9df232ee018ddf806e8c90906547dfb43b2da"}, + {file = "xxhash-3.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6d42b24d1496deb05dee5a24ed510b16de1d6c866c626c2beb11aebf3be278b9"}, + {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b685fab18876b14a8f94813fa2ca80cfb5ab6a85d31d5539b7cd749ce9e3624"}, + {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:419ffe34c17ae2df019a4685e8d3934d46b2e0bbe46221ab40b7e04ed9f11137"}, + {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e041ce5714f95251a88670c114b748bca3bf80cc72400e9f23e6d0d59cf2681"}, + {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc860d887c5cb2f524899fb8338e1bb3d5789f75fac179101920d9afddef284b"}, + {file = "xxhash-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:312eba88ffe0a05e332e3a6f9788b73883752be63f8588a6dc1261a3eaaaf2b2"}, + {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e01226b6b6a1ffe4e6bd6d08cfcb3ca708b16f02eb06dd44f3c6e53285f03e4f"}, + {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9f3025a0d5d8cf406a9313cd0d5789c77433ba2004b1c75439b67678e5136537"}, + {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:6d3472fd4afef2a567d5f14411d94060099901cd8ce9788b22b8c6f13c606a93"}, + {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:43984c0a92f06cac434ad181f329a1445017c33807b7ae4f033878d860a4b0f2"}, + {file = "xxhash-3.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a55e0506fdb09640a82ec4f44171273eeabf6f371a4ec605633adb2837b5d9d5"}, + {file = "xxhash-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:faec30437919555b039a8bdbaba49c013043e8f76c999670aef146d33e05b3a0"}, + {file = "xxhash-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:c9e1b646af61f1fc7083bb7b40536be944f1ac67ef5e360bca2d73430186971a"}, + {file = "xxhash-3.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:961d948b7b1c1b6c08484bbce3d489cdf153e4122c3dfb07c2039621243d8795"}, + {file = "xxhash-3.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:719a378930504ab159f7b8e20fa2aa1896cde050011af838af7e7e3518dd82de"}, + {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74fb5cb9406ccd7c4dd917f16630d2e5e8cbbb02fc2fca4e559b2a47a64f4940"}, + {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5dab508ac39e0ab988039bc7f962c6ad021acd81fd29145962b068df4148c476"}, + {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c59f3e46e7daf4c589e8e853d700ef6607afa037bfad32c390175da28127e8c"}, + {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cc07256eff0795e0f642df74ad096f8c5d23fe66bc138b83970b50fc7f7f6c5"}, + {file = "xxhash-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9f749999ed80f3955a4af0eb18bb43993f04939350b07b8dd2f44edc98ffee9"}, + {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7688d7c02149a90a3d46d55b341ab7ad1b4a3f767be2357e211b4e893efbaaf6"}, + {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a8b4977963926f60b0d4f830941c864bed16aa151206c01ad5c531636da5708e"}, + {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:8106d88da330f6535a58a8195aa463ef5281a9aa23b04af1848ff715c4398fb4"}, + {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4c76a77dbd169450b61c06fd2d5d436189fc8ab7c1571d39265d4822da16df22"}, + {file = "xxhash-3.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:11f11357c86d83e53719c592021fd524efa9cf024dc7cb1dfb57bbbd0d8713f2"}, + {file = "xxhash-3.4.1-cp38-cp38-win32.whl", hash = "sha256:0c786a6cd74e8765c6809892a0d45886e7c3dc54de4985b4a5eb8b630f3b8e3b"}, + {file = "xxhash-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:aabf37fb8fa27430d50507deeab2ee7b1bcce89910dd10657c38e71fee835594"}, + {file = "xxhash-3.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6127813abc1477f3a83529b6bbcfeddc23162cece76fa69aee8f6a8a97720562"}, + {file = "xxhash-3.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef2e194262f5db16075caea7b3f7f49392242c688412f386d3c7b07c7733a70a"}, + {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71be94265b6c6590f0018bbf73759d21a41c6bda20409782d8117e76cd0dfa8b"}, + {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10e0a619cdd1c0980e25eb04e30fe96cf8f4324758fa497080af9c21a6de573f"}, + {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa122124d2e3bd36581dd78c0efa5f429f5220313479fb1072858188bc2d5ff1"}, + {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17032f5a4fea0a074717fe33477cb5ee723a5f428de7563e75af64bfc1b1e10"}, + {file = "xxhash-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca7783b20e3e4f3f52f093538895863f21d18598f9a48211ad757680c3bd006f"}, + {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d77d09a1113899fad5f354a1eb4f0a9afcf58cefff51082c8ad643ff890e30cf"}, + {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:21287bcdd299fdc3328cc0fbbdeaa46838a1c05391264e51ddb38a3f5b09611f"}, + {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:dfd7a6cc483e20b4ad90224aeb589e64ec0f31e5610ab9957ff4314270b2bf31"}, + {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:543c7fcbc02bbb4840ea9915134e14dc3dc15cbd5a30873a7a5bf66039db97ec"}, + {file = "xxhash-3.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fe0a98d990e433013f41827b62be9ab43e3cf18e08b1483fcc343bda0d691182"}, + {file = "xxhash-3.4.1-cp39-cp39-win32.whl", hash = "sha256:b9097af00ebf429cc7c0e7d2fdf28384e4e2e91008130ccda8d5ae653db71e54"}, + {file = "xxhash-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:d699b921af0dcde50ab18be76c0d832f803034d80470703700cb7df0fbec2832"}, + {file = "xxhash-3.4.1-cp39-cp39-win_arm64.whl", hash = "sha256:2be491723405e15cc099ade1280133ccfbf6322d2ef568494fb7d07d280e7eee"}, + {file = "xxhash-3.4.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:431625fad7ab5649368c4849d2b49a83dc711b1f20e1f7f04955aab86cd307bc"}, + {file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc6dbd5fc3c9886a9e041848508b7fb65fd82f94cc793253990f81617b61fe49"}, + {file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ff8dbd0ec97aec842476cb8ccc3e17dd288cd6ce3c8ef38bff83d6eb927817"}, + {file = "xxhash-3.4.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef73a53fe90558a4096e3256752268a8bdc0322f4692ed928b6cd7ce06ad4fe3"}, + {file = "xxhash-3.4.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:450401f42bbd274b519d3d8dcf3c57166913381a3d2664d6609004685039f9d3"}, + {file = "xxhash-3.4.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a162840cf4de8a7cd8720ff3b4417fbc10001eefdd2d21541a8226bb5556e3bb"}, + {file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b736a2a2728ba45017cb67785e03125a79d246462dfa892d023b827007412c52"}, + {file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d0ae4c2e7698adef58710d6e7a32ff518b66b98854b1c68e70eee504ad061d8"}, + {file = "xxhash-3.4.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6322c4291c3ff174dcd104fae41500e75dad12be6f3085d119c2c8a80956c51"}, + {file = "xxhash-3.4.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:dd59ed668801c3fae282f8f4edadf6dc7784db6d18139b584b6d9677ddde1b6b"}, + {file = "xxhash-3.4.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92693c487e39523a80474b0394645b393f0ae781d8db3474ccdcead0559ccf45"}, + {file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4603a0f642a1e8d7f3ba5c4c25509aca6a9c1cc16f85091004a7028607ead663"}, + {file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fa45e8cbfbadb40a920fe9ca40c34b393e0b067082d94006f7f64e70c7490a6"}, + {file = "xxhash-3.4.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:595b252943b3552de491ff51e5bb79660f84f033977f88f6ca1605846637b7c6"}, + {file = "xxhash-3.4.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:562d8b8f783c6af969806aaacf95b6c7b776929ae26c0cd941d54644ea7ef51e"}, + {file = "xxhash-3.4.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:41ddeae47cf2828335d8d991f2d2b03b0bdc89289dc64349d712ff8ce59d0647"}, + {file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c44d584afdf3c4dbb3277e32321d1a7b01d6071c1992524b6543025fb8f4206f"}, + {file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd7bddb3a5b86213cc3f2c61500c16945a1b80ecd572f3078ddbbe68f9dabdfb"}, + {file = "xxhash-3.4.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9ecb6c987b62437c2f99c01e97caf8d25660bf541fe79a481d05732e5236719c"}, + {file = "xxhash-3.4.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:696b4e18b7023527d5c50ed0626ac0520edac45a50ec7cf3fc265cd08b1f4c03"}, + {file = "xxhash-3.4.1.tar.gz", hash = "sha256:0379d6cf1ff987cd421609a264ce025e74f346e3e145dd106c0cc2e3ec3f99a9"}, +] + [metadata] lock-version = "2.0" python-versions = "^3.11,<3.13" -content-hash = "bba95c10a3dd3ff93b46a6c90954cb0932bf3fae178bc4d7e0231e63d417d1a1" +content-hash = "aee323d4eab275af4ac0f640c75782aab622b457f38e4d0e8dc6802e0bd633cc" diff --git a/pyproject.toml b/pyproject.toml index 56d671eff..4525700a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ scikit-learn = "^1.2.0" seaborn = "^0.13.0" torch = {version = "^2.2.0", source = "torch_cuda121"} torchvision = {version = "^0.17.0", source = "torch_cuda121"} +xxhash = "^3.4.1" [tool.poetry.group.dev.dependencies] pytest = ">=7.2.1,<9.0.0" diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index f3ff799f6..ca5670830 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -109,6 +109,18 @@ def __eq__(self, other: object) -> bool: and torch.all(torch.eq(self._image_tensor, other._set_device(self.device)._image_tensor)).item() ) + def __hash__(self): + """ + Return a deterministic hash value for this image. + + Returns + ------- + hash : int + The hash value. + """ + import xxhash + return xxhash.xxh3_64(self.width.to_bytes(8) + self.height.to_bytes(8) + self.channel.to_bytes(8)).intdigest() + def __sizeof__(self) -> int: """ Return the complete size of this object. diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index 04b998294..bcc79da38 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -191,6 +191,18 @@ def __getitem__(self, index: int | slice) -> T | Column[T]: data = self._data[index].reset_index(drop=True).rename(self.name) return Column._from_pandas_series(data, self._type) + def __hash__(self): + """ + Return a deterministic hash value for this column. + + Returns + ------- + hash : int + The hash value. + """ + import xxhash + return xxhash.xxh3_64(self.name.encode("utf-8") + self.type.__repr__().encode("utf-8") + self.number_of_rows.to_bytes(8)).intdigest() + def __iter__(self) -> Iterator[T]: r""" Create an iterator for the data of this column. This way e.g. for-each loops can be used on it. diff --git a/src/safeds/data/tabular/containers/_row.py b/src/safeds/data/tabular/containers/_row.py index 1c91a1592..847bea717 100644 --- a/src/safeds/data/tabular/containers/_row.py +++ b/src/safeds/data/tabular/containers/_row.py @@ -216,6 +216,20 @@ def __getitem__(self, column_name: str) -> Any: """ return self.get_value(column_name) + def __hash__(self): + """ + Return a deterministic hash value for this row. + + Returns + ------- + hash : int + The hash value. + """ + import xxhash + import functools + import operator + return xxhash.xxh3_64(hash(self._schema).to_bytes(8) + functools.reduce(operator.add, [xxhash.xxh3_64(str(self.get_value(value))).intdigest().to_bytes(8) for value in self], b"\0")).intdigest() + def __iter__(self) -> Iterator[Any]: """ Create an iterator for the column names of this row. diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index a189e034d..279024b73 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -457,6 +457,18 @@ def __eq__(self, other: object) -> bool: return table1.column_names == table2.column_names return table1._schema == table2._schema and table1._data.equals(table2._data) + def __hash__(self): + """ + Return a deterministic hash value for this table. + + Returns + ------- + hash : int + The hash value. + """ + import xxhash + return xxhash.xxh3_64(hash(self._schema).to_bytes(8) + self.number_of_rows.to_bytes(8)).intdigest() + def __repr__(self) -> str: r""" Display the table in only one line. diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index f50124718..a78af218e 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -165,6 +165,32 @@ def __init__( self._features: Table = _data.keep_only_columns(feature_names) self._target: Column = _data.get_column(target_name) + def __eq__(self, other): + """ + Compare two tagged table instances. + + Returns + ------- + 'True' if contents and tags are equal, 'False' otherwise. + """ + if not isinstance(other, TaggedTable): + return NotImplemented + if self is other: + return True + return self.target == other.target and self.features == other.features and Table.__eq__(self, other) + + def __hash__(self): + """ + Return a deterministic hash value for this tagged table. + + Returns + ------- + hash : int + The hash value. + """ + import xxhash + return xxhash.xxh3_64(hash(self.target).to_bytes(8) + hash(self.features).to_bytes(8) + Table.__hash__(self).to_bytes(8)).intdigest() + def __sizeof__(self) -> int: """ Return the complete size of this object. diff --git a/src/safeds/data/tabular/containers/_time_series.py b/src/safeds/data/tabular/containers/_time_series.py index a755885e1..9a51d7616 100644 --- a/src/safeds/data/tabular/containers/_time_series.py +++ b/src/safeds/data/tabular/containers/_time_series.py @@ -194,6 +194,32 @@ def __init__( raise UnknownColumnNameError([time_name]) self._time: Column = _data.get_column(time_name) + def __eq__(self, other): + """ + Compare two time series instances. + + Returns + ------- + 'True' if contents are equal, 'False' otherwise. + """ + if not isinstance(other, TimeSeries): + return NotImplemented + if self is other: + return True + return self.time == other.time and TaggedTable.__eq__(self, other) + + def __hash__(self): + """ + Return a deterministic hash value for this time series. + + Returns + ------- + hash : int + The hash value. + """ + import xxhash + return xxhash.xxh3_64(hash(self.time).to_bytes(8) + TaggedTable.__hash__(self).to_bytes(8)).intdigest() + def __sizeof__(self) -> int: """ Return the complete size of this object. diff --git a/src/safeds/data/tabular/transformation/_table_transformer.py b/src/safeds/data/tabular/transformation/_table_transformer.py index bbd780411..1a2aa3fdd 100644 --- a/src/safeds/data/tabular/transformation/_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_table_transformer.py @@ -10,6 +10,18 @@ class TableTransformer(ABC): """Learn a transformation for a set of columns in a `Table` and transform another `Table` with the same columns.""" + def __hash__(self): + """ + Return a deterministic hash value for a table transformer. + + Returns + ------- + hash : int + The hash value. + """ + import xxhash + return xxhash.xxh3_64(self.__class__.__qualname__.encode("utf-8") + (1 if self.is_fitted() else 0).to_bytes(1)).intdigest() + @abstractmethod def fit(self, table: Table, column_names: list[str] | None) -> TableTransformer: """ diff --git a/src/safeds/data/tabular/typing/_schema.py b/src/safeds/data/tabular/typing/_schema.py index 56ee3c453..4cc32c667 100644 --- a/src/safeds/data/tabular/typing/_schema.py +++ b/src/safeds/data/tabular/typing/_schema.py @@ -66,7 +66,7 @@ def __init__(self, schema: dict[str, ColumnType]): def __hash__(self) -> int: """ - Return a hash value for the schema. + Return a deterministic hash value for the schema. Returns ------- @@ -79,9 +79,10 @@ def __hash__(self) -> int: >>> schema = Schema({"A": Integer(), "B": String()}) >>> hash_value = hash(schema) """ + import xxhash column_names = self._schema.keys() column_types = map(repr, self._schema.values()) - return hash(tuple(zip(column_names, column_types, strict=True))) + return xxhash.xxh3_64(str(tuple(zip(column_names, column_types, strict=True)))).intdigest() def __repr__(self) -> str: """ diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 915ab8f3c..50664fb16 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -17,6 +17,18 @@ class Classifier(ABC): """Abstract base class for all classifiers.""" + def __hash__(self): + """ + Return a deterministic hash value for a classifier. + + Returns + ------- + hash : int + The hash value. + """ + import xxhash + return xxhash.xxh3_64(self.__class__.__qualname__.encode("utf-8") + (1 if self.is_fitted() else 0).to_bytes(1)).intdigest() + @abstractmethod def fit(self, training_set: TaggedTable) -> Classifier: """ diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 682781e58..427c00f5d 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -16,6 +16,18 @@ class Regressor(ABC): """Abstract base class for all regressors.""" + def __hash__(self): + """ + Return a deterministic hash value for a regressor. + + Returns + ------- + hash : int + The hash value. + """ + import xxhash + return xxhash.xxh3_64(self.__class__.__qualname__.encode("utf-8") + (1 if self.is_fitted() else 0).to_bytes(1)).intdigest() + @abstractmethod def fit(self, training_set: TaggedTable) -> Regressor: """ diff --git a/tests/safeds/data/image/containers/test_image.py b/tests/safeds/data/image/containers/test_image.py index 4c7897010..a9acce658 100644 --- a/tests/safeds/data/image/containers/test_image.py +++ b/tests/safeds/data/image/containers/test_image.py @@ -373,6 +373,44 @@ def test_should_raise(self, resource_path: str, device: Device) -> None: assert (image.__eq__(other)) is NotImplemented +class TestHash: + @pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) + @pytest.mark.parametrize( + "resource_path", + _test_images_all(), + ids=_test_images_all_ids(), + ) + def test_should_hash_be_equal(self, resource_path: str, device: Device) -> None: + _skip_if_device_not_available(device) + image = Image.from_file(resolve_resource_path(resource_path), device) + image2 = Image.from_file(resolve_resource_path(resource_path), device) + assert hash(image) == hash(image2) + + @pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) + def test_should_hash_not_be_equal(self, device: Device) -> None: + _skip_if_device_not_available(device) + image = Image.from_file(resolve_resource_path(_plane_png_path), device) + image2 = Image.from_file(resolve_resource_path(_white_square_png_path), device) + assert hash(image) != hash(image2) + + @pytest.mark.parametrize( + "resource_path", + _test_images_all(), + ids=_test_images_all_ids(), + ) + def test_should_hash_be_equal_different_devices(self, resource_path: str) -> None: + _skip_if_device_not_available(_device_cuda) + image = Image.from_file(resolve_resource_path(resource_path), torch.device("cpu")) + image2 = Image.from_file(resolve_resource_path(resource_path), torch.device("cuda")) + assert hash(image) == hash(image2) + + def test_should_hash_not_be_equal_different_devices(self) -> None: + _skip_if_device_not_available(_device_cuda) + image = Image.from_file(resolve_resource_path(_plane_png_path), torch.device("cpu")) + image2 = Image.from_file(resolve_resource_path(_white_square_png_path), torch.device("cuda")) + assert hash(image) != hash(image2) + + @pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) class TestResize: @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/containers/_column/test_hash.py b/tests/safeds/data/tabular/containers/_column/test_hash.py new file mode 100644 index 000000000..0103b9aeb --- /dev/null +++ b/tests/safeds/data/tabular/containers/_column/test_hash.py @@ -0,0 +1,36 @@ +from typing import Any + +import pytest +from safeds.data.tabular.containers import Column, Row + + +@pytest.mark.parametrize( + ("column1", "column2"), + [ + (Column("a"), Column("a")), + (Column("a", [1, 2, 3]), Column("a", [1, 2, 3])), + (Column("a", [1, 2, 3]), Column("a", [1, 2, 4])), + ], + ids=[ + "empty columns", + "equal columns", + "different values", + ], +) +def test_should_return_same_hash_for_equal_columns(column1: Column, column2: Column) -> None: + assert hash(column1) == hash(column2) + + +@pytest.mark.parametrize( + ("column1", "column2"), + [ + (Column("a"), Column("b")), + (Column("a", [1, 2, 3]), Column("a", ["1", "2", "3"])), + ], + ids=[ + "different names", + "different types", + ], +) +def test_should_return_different_hash_for_unequal_columns(column1: Column, column2: Column) -> None: + assert hash(column1) != hash(column2) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_eq.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_eq.py new file mode 100644 index 000000000..ccc22ca12 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_eq.py @@ -0,0 +1,50 @@ +from typing import Any + +import pytest +from safeds.data.tabular.containers import Row, Table, TimeSeries, TimeSeries, TaggedTable + + +@pytest.mark.parametrize( + ("table1", "table2", "expected"), + [ + (TimeSeries({"a": [], "b": [], "c": []}, "b", "c", ["a"]), TimeSeries({"a": [], "b": [], "c": []}, "b", "c", ["a"]), True), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), True), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["a"]), TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "c", "d", ["a"]), False), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "c", ["a"]), TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "e": [10, 11, 12]}, "b", "c", ["a"]), False), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), TimeSeries({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), False), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), TimeSeries({"a": ["1", "2", "3"], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), False), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["a"]), TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["c"]), False), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["a"]), TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "c", ["a"]), False), + ], + ids=[ + "rowless table", + "equal tables", + "different target", + "different column names", + "different values", + "different types", + "different features", + "different time", + ], +) +def test_should_return_whether_two_tagged_tables_are_equal(table1: TimeSeries, table2: TimeSeries, expected: bool) -> None: + assert (table1.__eq__(table2)) == expected + + +@pytest.mark.parametrize( + ("table", "other"), + [ + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), None), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), Row()), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), Table()), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"])) + ], + ids=[ + "TimeSeries vs. None", + "TimeSeries vs. Row", + "TimeSeries vs. Table", + "TimeSeries vs. TaggedTable", + ], +) +def test_should_return_not_implemented_if_other_is_not_tagged_table(table: TimeSeries, other: Any) -> None: + assert (table.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_hash.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_hash.py new file mode 100644 index 000000000..59fcdd0fa --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_hash.py @@ -0,0 +1,42 @@ +from typing import Any + +import pytest +from safeds.data.tabular.containers import Row, Table, TimeSeries, TimeSeries, TaggedTable + + +@pytest.mark.parametrize( + ("table1", "table2"), + [ + (TimeSeries({"a": [], "b": [], "c": []}, "b", "c", ["a"]), TimeSeries({"a": [], "b": [], "c": []}, "b", "c", ["a"])), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"])), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), TimeSeries({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"])), + ], + ids=[ + "rowless table", + "equal tables", + "different values", + ], +) +def test_should_return_same_hash_for_equal_time_series(table1: TimeSeries, table2: TimeSeries) -> None: + assert hash(table1) == hash(table2) + + +@pytest.mark.parametrize( + ("table1", "table2"), + [ + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["a"]), TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "c", "d", ["a"])), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "c", ["a"]), TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "e": [10, 11, 12]}, "b", "c", ["a"])), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"]), TimeSeries({"a": ["1", "2", "3"], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", "c", ["a"])), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["a"]), TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["c"])), + (TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "d", ["a"]), TimeSeries({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}, "b", "c", ["a"])), + ], + ids=[ + "different target", + "different column names", + "different types", + "different features", + "different time", + ], +) +def test_should_return_different_hash_for_unequal_time_series(table1: TimeSeries, table2: TimeSeries) -> None: + assert hash(table1) != hash(table2) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_sizeof.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_sizeof.py index fbe310894..d431dd6f7 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_sizeof.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_sizeof.py @@ -1,7 +1,7 @@ import sys import pytest -from safeds.data.tabular.containers import Table, TimeSeries +from safeds.data.tabular.containers import TimeSeries @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_eq.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_eq.py new file mode 100644 index 000000000..6a610bdf0 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_eq.py @@ -0,0 +1,46 @@ +from typing import Any + +import pytest +from safeds.data.tabular.containers import Row, Table, TaggedTable + + +@pytest.mark.parametrize( + ("table1", "table2", "expected"), + [ + (TaggedTable({"a": [], "b": []}, "b", ["a"]), TaggedTable({"a": [], "b": []}, "b", ["a"]), True), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), True), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "c", ["a"]), False), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6], "d": [7, 8, 9]}, "b", ["a"]), False), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), TaggedTable({"a": [1, 1, 3], "b": [4, 5, 6]}, "b", ["a"]), False), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), TaggedTable({"a": ["1", "2", "3"], "b": [4, 5, 6]}, "b", ["a"]), False), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["c"]), False), + ], + ids=[ + "rowless table", + "equal tables", + "different target", + "different column names", + "different values", + "different types", + "different features" + ], +) +def test_should_return_whether_two_tagged_tables_are_equal(table1: TaggedTable, table2: TaggedTable, expected: bool) -> None: + assert (table1.__eq__(table2)) == expected + + +@pytest.mark.parametrize( + ("table", "other"), + [ + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), None), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), Row()), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), Table()) + ], + ids=[ + "TaggedTable vs. None", + "TaggedTable vs. Row", + "TaggedTable vs. Table", + ], +) +def test_should_return_not_implemented_if_other_is_not_tagged_table(table: TaggedTable, other: Any) -> None: + assert (table.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_hash.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_hash.py new file mode 100644 index 000000000..46e90f88e --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_hash.py @@ -0,0 +1,40 @@ +from typing import Any + +import pytest +from safeds.data.tabular.containers import Row, Table, TaggedTable + + +@pytest.mark.parametrize( + ("table1", "table2"), + [ + (TaggedTable({"a": [], "b": []}, "b", ["a"]), TaggedTable({"a": [], "b": []}, "b", ["a"])), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"])), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), TaggedTable({"a": [1, 1, 3], "b": [4, 5, 6]}, "b", ["a"])), + ], + ids=[ + "rowless table", + "equal tables", + "different values", + ], +) +def test_should_return_same_hash_for_equal_tagged_tables(table1: TaggedTable, table2: TaggedTable) -> None: + assert hash(table1) == hash(table2) + + +@pytest.mark.parametrize( + ("table1", "table2"), + [ + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "c", ["a"])), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6], "d": [7, 8, 9]}, "b", ["a"])), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]), TaggedTable({"a": ["1", "2", "3"], "b": [4, 5, 6]}, "b", ["a"])), + (TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["a"]), TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, "b", ["c"])), + ], + ids=[ + "different target", + "different column names", + "different types", + "different features" + ], +) +def test_should_return_different_hash_for_unequal_tagged_tables(table1: TaggedTable, table2: TaggedTable) -> None: + assert hash(table1) != hash(table2) diff --git a/tests/safeds/data/tabular/containers/_table/test_hash.py b/tests/safeds/data/tabular/containers/_table/test_hash.py new file mode 100644 index 000000000..1a5144053 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/test_hash.py @@ -0,0 +1,40 @@ +from typing import Any + +import pytest +from safeds.data.tabular.containers import Row, Table + + +@pytest.mark.parametrize( + ("table1", "table2"), + [ + (Table(), Table()), + (Table({"a": [], "b": []}), Table({"a": [], "b": []})), + (Table({"col1": [1]}), Table({"col1": [1]})), + (Table({"col1": [1, 2, 3]}), Table({"col1": [1, 1, 3]})), + ], + ids=[ + "empty table", + "rowless table", + "equal tables", + "different values", + ], +) +def test_should_return_same_hash_for_equal_tables(table1: Table, table2: Table) -> None: + assert hash(table1) == hash(table2) + + +@pytest.mark.parametrize( + ("table1", "table2"), + [ + (Table({"col1": [1]}), Table({"col2": [1]})), + (Table({"col1": [1, 2, 3]}), Table({"col1": ["1", "2", "3"]})), + (Table({"col1": [1, 2, 3]}), Table({"col1": [1, 2, 3, 4]})), + ], + ids=[ + "different column names", + "different types", + "different number of rows" + ], +) +def test_should_return_different_hash_for_unequal_tables(table1: Table, table2: Table) -> None: + assert hash(table1) != hash(table2) diff --git a/tests/safeds/data/tabular/containers/test_row.py b/tests/safeds/data/tabular/containers/test_row.py index 61ee4a04d..fd5a4a56b 100644 --- a/tests/safeds/data/tabular/containers/test_row.py +++ b/tests/safeds/data/tabular/containers/test_row.py @@ -184,6 +184,39 @@ def test_should_return_not_implemented_if_other_is_not_row(self, row: Row, other assert (row.__eq__(other)) is NotImplemented +class TestHash: + @pytest.mark.parametrize( + ("row1", "row2"), + [ + (Row(), Row()), + (Row({"col1": 0}), Row({"col1": 0})), + ], + ids=[ + "empty rows", + "equal rows", + ], + ) + def test_should_return_same_hash_for_equal_rows(self, row1: Row, row2: Row) -> None: + assert hash(row1) == hash(row2) + + @pytest.mark.parametrize( + ("row1", "row2"), + [ + + (Row({"col1": 0}), Row({"col1": 1})), + (Row({"col1": 0}), Row({"col2": 0})), + (Row({"col1": 0}), Row({"col1": "a"})), + ], + ids=[ + "different values", + "different columns", + "different types", + ], + ) + def test_should_return_different_hash_for_unequal_rows(self, row1: Row, row2: Row) -> None: + assert hash(row1) != hash(row2) + + class TestGetitem: @pytest.mark.parametrize( ("row", "column_name", "expected"), diff --git a/tests/safeds/data/tabular/transformation/test_table_transformer.py b/tests/safeds/data/tabular/transformation/test_table_transformer.py new file mode 100644 index 000000000..2fd84a8af --- /dev/null +++ b/tests/safeds/data/tabular/transformation/test_table_transformer.py @@ -0,0 +1,122 @@ +import itertools + +import pytest + +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation import TableTransformer, StandardScaler, RangeScaler, OneHotEncoder, LabelEncoder, Discretizer, Imputer + + +def transformers_numeric() -> list[TableTransformer]: + """ + Return the list of numeric transformers to test. + + After you implemented a new numeric transformer, add it to this list to ensure its `__hash__` method works as + expected. Place tests of methods that are specific to your transformer in a separate test file. + + Returns + ------- + classifiers : list[TableTransformer] + The list of numeric transformers to test. + """ + return [ + StandardScaler(), + RangeScaler(), + Discretizer() + ] + + +def transformers_non_numeric() -> list[TableTransformer]: + """ + Return the list of non-numeric transformers to test. + + After you implemented a new non-numeric transformer, add it to this list to ensure its `__hash__` method works as + expected. Place tests of methods that are specific to your transformer in a separate test file. + + Returns + ------- + classifiers : list[TableTransformer] + The list of non-numeric transformers to test. + """ + return [ + OneHotEncoder(), + LabelEncoder() + ] + + +def transformers() -> list[TableTransformer]: + """ + Return the list of all transformers to test. + + After you implemented a new transformer (which is either applicable to both numeric and non-numeric data or none), add it to one of the three lists to ensure its `__hash__` method works as + expected. Place tests of methods that are specific to your in a separate test file. + + Returns + ------- + classifiers : list[TableTransformer] + The list of all transformers to test. + """ + return transformers_numeric() + transformers_non_numeric() + [ + Imputer(strategy=Imputer.Strategy.Mode()) + ] + + +@pytest.fixture() +def valid_data_numeric() -> Table: + return Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + +@pytest.fixture() +def valid_data_non_numeric() -> Table: + return Table( + { + "col1": ["a", "b", "c"], + }, + ) + +@pytest.fixture() +def valid_data_imputer() -> Table: + return Table( + { + "col1": [1, 1, None], + }, + ) + + +class TestHash: + @pytest.mark.parametrize(("transformer1", "transformer2"), ([(x, y) for x in transformers() for y in transformers() if x.__class__ == y.__class__]), ids=lambda x: x.__class__.__name__) + def test_should_return_same_hash_for_equal_transformer(self, transformer1: TableTransformer, transformer2: TableTransformer) -> None: + assert hash(transformer1) == hash(transformer2) + + @pytest.mark.parametrize(("transformer1", "transformer2"), ([(x, y) for x in transformers() for y in transformers() if x.__class__ != y.__class__]), ids=lambda x: x.__class__.__name__) + def test_should_return_different_hash_for_unequal_transformer(self, transformer1: TableTransformer, transformer2: TableTransformer) -> None: + assert hash(transformer1) != hash(transformer2) + + @pytest.mark.parametrize("transformer1", transformers_numeric(), ids=lambda x: x.__class__.__name__) + def test_should_return_different_hash_for_same_numeric_transformer_fit(self, transformer1: TableTransformer, valid_data_numeric: Table) -> None: + transformer1_fit = transformer1.fit(valid_data_numeric, ["col1"]) + assert hash(transformer1) != hash(transformer1_fit) + + @pytest.mark.parametrize("transformer1", transformers_non_numeric(), ids=lambda x: x.__class__.__name__) + def test_should_return_different_hash_for_same_non_numeric_transformer_fit(self, transformer1: TableTransformer, valid_data_non_numeric: Table) -> None: + transformer1_fit = transformer1.fit(valid_data_non_numeric, ["col1"]) + assert hash(transformer1) != hash(transformer1_fit) + + @pytest.mark.parametrize(("transformer1", "transformer2"), (list(itertools.product(transformers_numeric(), transformers()))), ids=lambda x: x.__class__.__name__) + def test_should_return_different_hash_for_numeric_transformer_fit(self, transformer1: TableTransformer, transformer2: TableTransformer, valid_data_numeric: Table) -> None: + transformer1_fit = transformer1.fit(valid_data_numeric, ["col1"]) + assert hash(transformer2) != hash(transformer1_fit) + + @pytest.mark.parametrize(("transformer1", "transformer2"), (list(itertools.product(transformers_non_numeric(), transformers()))), ids=lambda x: x.__class__.__name__) + def test_should_return_different_hash_for_non_numeric_transformer_fit(self, transformer1: TableTransformer, transformer2: TableTransformer, valid_data_non_numeric: Table) -> None: + transformer1_fit = transformer1.fit(valid_data_non_numeric, ["col1"]) + assert hash(transformer2) != hash(transformer1_fit) + + @pytest.mark.parametrize("transformer2", transformers(), ids=lambda x: x.__class__.__name__) + def test_should_return_different_hash_for_imputer_fit(self, transformer2: TableTransformer, valid_data_imputer: Table) -> None: + transformer1 = Imputer(strategy=Imputer.Strategy.Mode()) + transformer1_fit = transformer1.fit(valid_data_imputer, ["col1"]) + assert hash(transformer2) != hash(transformer1_fit) diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index b8f82269a..2338544a0 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -1,5 +1,6 @@ from __future__ import annotations +import itertools from typing import TYPE_CHECKING, Any import pytest @@ -260,6 +261,26 @@ def test_should_return_true_after_fitting(self, classifier: Classifier, valid_da assert fitted_classifier.is_fitted() +class TestHash: + @pytest.mark.parametrize(("classifier1", "classifier2"), ([(x, y) for x in classifiers() for y in classifiers() if x.__class__ == y.__class__]), ids=lambda x: x.__class__.__name__) + def test_should_return_same_hash_for_equal_classifier(self, classifier1: Classifier, classifier2: Classifier) -> None: + assert hash(classifier1) == hash(classifier2) + + @pytest.mark.parametrize(("classifier1", "classifier2"), ([(x, y) for x in classifiers() for y in classifiers() if x.__class__ != y.__class__]), ids=lambda x: x.__class__.__name__) + def test_should_return_different_hash_for_unequal_classifier(self, classifier1: Classifier, classifier2: Classifier) -> None: + assert hash(classifier1) != hash(classifier2) + + @pytest.mark.parametrize("classifier1", classifiers(), ids=lambda x: x.__class__.__name__) + def test_should_return_different_hash_for_same_classifier_fit(self, classifier1: Classifier, valid_data: TaggedTable) -> None: + regressor1_fit = classifier1.fit(valid_data) + assert hash(classifier1) != hash(regressor1_fit) + + @pytest.mark.parametrize(("classifier1", "classifier2"), (list(itertools.product(classifiers(), classifiers()))), ids=lambda x: x.__class__.__name__) + def test_should_return_different_hash_for_classifier_fit(self, classifier1: Classifier, classifier2: Classifier, valid_data: TaggedTable) -> None: + classifier1_fit = classifier1.fit(valid_data) + assert hash(classifier1_fit) != hash(classifier2) + + class DummyClassifier(Classifier): """ Dummy classifier to test metrics. diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 418d3e682..c1d648018 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -1,5 +1,6 @@ from __future__ import annotations +import itertools from typing import TYPE_CHECKING, Any import pandas as pd @@ -271,6 +272,26 @@ def test_should_return_true_after_fitting(self, regressor: Regressor, valid_data assert fitted_regressor.is_fitted() +class TestHash: + @pytest.mark.parametrize(("regressor1", "regressor2"), ([(x, y) for x in regressors() for y in regressors() if x.__class__ == y.__class__]), ids=lambda x: x.__class__.__name__) + def test_should_return_same_hash_for_equal_regressor(self, regressor1: Regressor, regressor2: Regressor) -> None: + assert hash(regressor1) == hash(regressor2) + + @pytest.mark.parametrize(("regressor1", "regressor2"), ([(x, y) for x in regressors() for y in regressors() if x.__class__ != y.__class__]), ids=lambda x: x.__class__.__name__) + def test_should_return_different_hash_for_unequal_regressor(self, regressor1: Regressor, regressor2: Regressor) -> None: + assert hash(regressor1) != hash(regressor2) + + @pytest.mark.parametrize("regressor1", regressors(), ids=lambda x: x.__class__.__name__) + def test_should_return_different_hash_for_same_regressor_fit(self, regressor1: Regressor, valid_data: TaggedTable) -> None: + regressor1_fit = regressor1.fit(valid_data) + assert hash(regressor1) != hash(regressor1_fit) + + @pytest.mark.parametrize(("regressor1", "regressor2"), (list(itertools.product(regressors(), regressors()))), ids=lambda x: x.__class__.__name__) + def test_should_return_different_hash_for_regressor_fit(self, regressor1: Regressor, regressor2: Regressor, valid_data: TaggedTable) -> None: + regressor1_fit = regressor1.fit(valid_data) + assert hash(regressor1_fit) != hash(regressor2) + + class DummyRegressor(Regressor): """ Dummy regressor to test metrics. From dae05fe3262d38da7a28b1d71b0c6fc42e3f7677 Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Fri, 15 Mar 2024 15:46:34 +0100 Subject: [PATCH 2/5] test: fix non-covered line --- .../_table/_tagged_table/_time_series/test_eq.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_eq.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_eq.py index ccc22ca12..e7104ba38 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_eq.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_eq.py @@ -31,6 +31,17 @@ def test_should_return_whether_two_tagged_tables_are_equal(table1: TimeSeries, t assert (table1.__eq__(table2)) == expected +@pytest.mark.parametrize( + "table1", + [TimeSeries({"a": [], "b": [], "c": []}, "b", "c", ["a"])], + ids=[ + "any", + ], +) +def test_should_return_true_if_objects_are_identical(table1: TimeSeries) -> None: + assert (table1.__eq__(table1)) is True + + @pytest.mark.parametrize( ("table", "other"), [ From 0726fbfba53300b907a55c967b5306312f39cf77 Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Fri, 15 Mar 2024 15:48:40 +0100 Subject: [PATCH 3/5] style: add missing return types to __hash__ methods --- src/safeds/data/image/containers/_image.py | 2 +- src/safeds/data/tabular/containers/_column.py | 2 +- src/safeds/data/tabular/containers/_row.py | 2 +- src/safeds/data/tabular/containers/_table.py | 2 +- src/safeds/data/tabular/containers/_tagged_table.py | 4 ++-- src/safeds/data/tabular/containers/_time_series.py | 4 ++-- src/safeds/data/tabular/transformation/_table_transformer.py | 2 +- src/safeds/ml/classical/classification/_classifier.py | 2 +- src/safeds/ml/classical/regression/_regressor.py | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index ca5670830..45c9f39f7 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -109,7 +109,7 @@ def __eq__(self, other: object) -> bool: and torch.all(torch.eq(self._image_tensor, other._set_device(self.device)._image_tensor)).item() ) - def __hash__(self): + def __hash__(self) -> int: """ Return a deterministic hash value for this image. diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index bcc79da38..ead44da0b 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -191,7 +191,7 @@ def __getitem__(self, index: int | slice) -> T | Column[T]: data = self._data[index].reset_index(drop=True).rename(self.name) return Column._from_pandas_series(data, self._type) - def __hash__(self): + def __hash__(self) -> int: """ Return a deterministic hash value for this column. diff --git a/src/safeds/data/tabular/containers/_row.py b/src/safeds/data/tabular/containers/_row.py index 847bea717..337034441 100644 --- a/src/safeds/data/tabular/containers/_row.py +++ b/src/safeds/data/tabular/containers/_row.py @@ -216,7 +216,7 @@ def __getitem__(self, column_name: str) -> Any: """ return self.get_value(column_name) - def __hash__(self): + def __hash__(self) -> int: """ Return a deterministic hash value for this row. diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 279024b73..2ed3a0f92 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -457,7 +457,7 @@ def __eq__(self, other: object) -> bool: return table1.column_names == table2.column_names return table1._schema == table2._schema and table1._data.equals(table2._data) - def __hash__(self): + def __hash__(self) -> int: """ Return a deterministic hash value for this table. diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index a78af218e..87618b43c 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -165,7 +165,7 @@ def __init__( self._features: Table = _data.keep_only_columns(feature_names) self._target: Column = _data.get_column(target_name) - def __eq__(self, other): + def __eq__(self, other) -> bool: """ Compare two tagged table instances. @@ -179,7 +179,7 @@ def __eq__(self, other): return True return self.target == other.target and self.features == other.features and Table.__eq__(self, other) - def __hash__(self): + def __hash__(self) -> int: """ Return a deterministic hash value for this tagged table. diff --git a/src/safeds/data/tabular/containers/_time_series.py b/src/safeds/data/tabular/containers/_time_series.py index 9a51d7616..030d8e098 100644 --- a/src/safeds/data/tabular/containers/_time_series.py +++ b/src/safeds/data/tabular/containers/_time_series.py @@ -194,7 +194,7 @@ def __init__( raise UnknownColumnNameError([time_name]) self._time: Column = _data.get_column(time_name) - def __eq__(self, other): + def __eq__(self, other) -> bool: """ Compare two time series instances. @@ -208,7 +208,7 @@ def __eq__(self, other): return True return self.time == other.time and TaggedTable.__eq__(self, other) - def __hash__(self): + def __hash__(self) -> int: """ Return a deterministic hash value for this time series. diff --git a/src/safeds/data/tabular/transformation/_table_transformer.py b/src/safeds/data/tabular/transformation/_table_transformer.py index 1a2aa3fdd..dbfcd1c0a 100644 --- a/src/safeds/data/tabular/transformation/_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_table_transformer.py @@ -10,7 +10,7 @@ class TableTransformer(ABC): """Learn a transformation for a set of columns in a `Table` and transform another `Table` with the same columns.""" - def __hash__(self): + def __hash__(self) -> int: """ Return a deterministic hash value for a table transformer. diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 50664fb16..02e9eefeb 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -17,7 +17,7 @@ class Classifier(ABC): """Abstract base class for all classifiers.""" - def __hash__(self): + def __hash__(self) -> int: """ Return a deterministic hash value for a classifier. diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 427c00f5d..647662752 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -16,7 +16,7 @@ class Regressor(ABC): """Abstract base class for all regressors.""" - def __hash__(self): + def __hash__(self) -> int: """ Return a deterministic hash value for a regressor. From 4603352271b9760023699234a2abefa0520d49cc Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Fri, 15 Mar 2024 15:52:50 +0100 Subject: [PATCH 4/5] style: add missing parameter type to __eq__ method --- src/safeds/data/tabular/containers/_tagged_table.py | 2 +- src/safeds/data/tabular/containers/_time_series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 87618b43c..b6b09b5c5 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -165,7 +165,7 @@ def __init__( self._features: Table = _data.keep_only_columns(feature_names) self._target: Column = _data.get_column(target_name) - def __eq__(self, other) -> bool: + def __eq__(self, other: object) -> bool: """ Compare two tagged table instances. diff --git a/src/safeds/data/tabular/containers/_time_series.py b/src/safeds/data/tabular/containers/_time_series.py index 030d8e098..7a2a89b5c 100644 --- a/src/safeds/data/tabular/containers/_time_series.py +++ b/src/safeds/data/tabular/containers/_time_series.py @@ -194,7 +194,7 @@ def __init__( raise UnknownColumnNameError([time_name]) self._time: Column = _data.get_column(time_name) - def __eq__(self, other) -> bool: + def __eq__(self, other: object) -> bool: """ Compare two time series instances. From a3b3837969dc67d87165e77b0d5972b5189df00e Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Mon, 18 Mar 2024 13:38:52 +0100 Subject: [PATCH 5/5] style: move imports to module --- src/safeds/data/image/containers/_image.py | 2 +- src/safeds/data/tabular/containers/_column.py | 2 +- src/safeds/data/tabular/containers/_row.py | 5 ++--- src/safeds/data/tabular/containers/_table.py | 2 +- src/safeds/data/tabular/containers/_tagged_table.py | 3 ++- src/safeds/data/tabular/containers/_time_series.py | 2 +- src/safeds/data/tabular/transformation/_table_transformer.py | 3 ++- src/safeds/data/tabular/typing/_schema.py | 3 ++- src/safeds/ml/classical/classification/_classifier.py | 2 +- src/safeds/ml/classical/regression/_regressor.py | 2 +- 10 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index 45c9f39f7..3f00430b8 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -8,6 +8,7 @@ import torch import torch.nn.functional as func +import xxhash from PIL.Image import open as pil_image_open from torch import Tensor @@ -118,7 +119,6 @@ def __hash__(self) -> int: hash : int The hash value. """ - import xxhash return xxhash.xxh3_64(self.width.to_bytes(8) + self.height.to_bytes(8) + self.channel.to_bytes(8)).intdigest() def __sizeof__(self) -> int: diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index ead44da0b..8eae640e6 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -10,6 +10,7 @@ import numpy as np import pandas as pd import seaborn as sns +import xxhash from safeds.data.image.containers import Image from safeds.data.tabular.typing import ColumnType @@ -200,7 +201,6 @@ def __hash__(self) -> int: hash : int The hash value. """ - import xxhash return xxhash.xxh3_64(self.name.encode("utf-8") + self.type.__repr__().encode("utf-8") + self.number_of_rows.to_bytes(8)).intdigest() def __iter__(self) -> Iterator[T]: diff --git a/src/safeds/data/tabular/containers/_row.py b/src/safeds/data/tabular/containers/_row.py index 337034441..50c670b49 100644 --- a/src/safeds/data/tabular/containers/_row.py +++ b/src/safeds/data/tabular/containers/_row.py @@ -2,10 +2,12 @@ import sys import functools +import operator from collections.abc import Callable, Mapping from typing import TYPE_CHECKING, Any import pandas as pd +import xxhash from safeds.data.tabular.typing import ColumnType, Schema from safeds.exceptions import UnknownColumnNameError @@ -225,9 +227,6 @@ def __hash__(self) -> int: hash : int The hash value. """ - import xxhash - import functools - import operator return xxhash.xxh3_64(hash(self._schema).to_bytes(8) + functools.reduce(operator.add, [xxhash.xxh3_64(str(self.get_value(value))).intdigest().to_bytes(8) for value in self], b"\0")).intdigest() def __iter__(self) -> Iterator[Any]: diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 2ed3a0f92..1ff71abc8 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -13,6 +13,7 @@ import openpyxl import pandas as pd import seaborn as sns +import xxhash from pandas import DataFrame from scipy import stats @@ -466,7 +467,6 @@ def __hash__(self) -> int: hash : int The hash value. """ - import xxhash return xxhash.xxh3_64(hash(self._schema).to_bytes(8) + self.number_of_rows.to_bytes(8)).intdigest() def __repr__(self) -> str: diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index b6b09b5c5..c9c1b1344 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,6 +3,8 @@ import sys from typing import TYPE_CHECKING +import xxhash + from safeds.data.tabular.containers import Column, Row, Table from safeds.exceptions import ( ColumnIsTargetError, @@ -188,7 +190,6 @@ def __hash__(self) -> int: hash : int The hash value. """ - import xxhash return xxhash.xxh3_64(hash(self.target).to_bytes(8) + hash(self.features).to_bytes(8) + Table.__hash__(self).to_bytes(8)).intdigest() def __sizeof__(self) -> int: diff --git a/src/safeds/data/tabular/containers/_time_series.py b/src/safeds/data/tabular/containers/_time_series.py index 7a2a89b5c..1612935ac 100644 --- a/src/safeds/data/tabular/containers/_time_series.py +++ b/src/safeds/data/tabular/containers/_time_series.py @@ -7,6 +7,7 @@ import matplotlib.pyplot as plt import pandas as pd import seaborn as sns +import xxhash from safeds.data.image.containers import Image from safeds.data.tabular.containers import Column, Row, Table, TaggedTable @@ -217,7 +218,6 @@ def __hash__(self) -> int: hash : int The hash value. """ - import xxhash return xxhash.xxh3_64(hash(self.time).to_bytes(8) + TaggedTable.__hash__(self).to_bytes(8)).intdigest() def __sizeof__(self) -> int: diff --git a/src/safeds/data/tabular/transformation/_table_transformer.py b/src/safeds/data/tabular/transformation/_table_transformer.py index dbfcd1c0a..3502be576 100644 --- a/src/safeds/data/tabular/transformation/_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_table_transformer.py @@ -3,6 +3,8 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING +import xxhash + if TYPE_CHECKING: from safeds.data.tabular.containers import Table @@ -19,7 +21,6 @@ def __hash__(self) -> int: hash : int The hash value. """ - import xxhash return xxhash.xxh3_64(self.__class__.__qualname__.encode("utf-8") + (1 if self.is_fitted() else 0).to_bytes(1)).intdigest() @abstractmethod diff --git a/src/safeds/data/tabular/typing/_schema.py b/src/safeds/data/tabular/typing/_schema.py index 4cc32c667..ff0c15d30 100644 --- a/src/safeds/data/tabular/typing/_schema.py +++ b/src/safeds/data/tabular/typing/_schema.py @@ -4,6 +4,8 @@ from dataclasses import dataclass from typing import TYPE_CHECKING +import xxhash + from safeds.data.tabular.typing import Anything, Integer, Nothing, RealNumber from safeds.data.tabular.typing._column_type import ColumnType from safeds.exceptions import UnknownColumnNameError @@ -79,7 +81,6 @@ def __hash__(self) -> int: >>> schema = Schema({"A": Integer(), "B": String()}) >>> hash_value = hash(schema) """ - import xxhash column_names = self._schema.keys() column_types = map(repr, self._schema.values()) return xxhash.xxh3_64(str(tuple(zip(column_names, column_types, strict=True)))).intdigest() diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 02e9eefeb..b3a7a852b 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -3,6 +3,7 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING +import xxhash from sklearn.metrics import accuracy_score as sk_accuracy_score from safeds.data.tabular.containers import Table, TaggedTable @@ -26,7 +27,6 @@ def __hash__(self) -> int: hash : int The hash value. """ - import xxhash return xxhash.xxh3_64(self.__class__.__qualname__.encode("utf-8") + (1 if self.is_fitted() else 0).to_bytes(1)).intdigest() @abstractmethod diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 647662752..54ca497f3 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -3,6 +3,7 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING +import xxhash from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error from sklearn.metrics import mean_squared_error as sk_mean_squared_error @@ -25,7 +26,6 @@ def __hash__(self) -> int: hash : int The hash value. """ - import xxhash return xxhash.xxh3_64(self.__class__.__qualname__.encode("utf-8") + (1 if self.is_fitted() else 0).to_bytes(1)).intdigest() @abstractmethod