From f59fa76cd46bb4de9c4d61bcf258b222498e8f0d Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Mon, 19 Feb 2024 09:46:44 -0500 Subject: [PATCH 1/2] Tweaks from David and Scott --- .../examples/Jaccard and RBO Comparison.ipynb | 69 +++++++++++++++++-- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/jupyterlite/files/examples/Jaccard and RBO Comparison.ipynb b/jupyterlite/files/examples/Jaccard and RBO Comparison.ipynb index 6f8112d..e86fc58 100644 --- a/jupyterlite/files/examples/Jaccard and RBO Comparison.ipynb +++ b/jupyterlite/files/examples/Jaccard and RBO Comparison.ipynb @@ -23,12 +23,12 @@ "cells": [ { "cell_type": "markdown", - "source": "# Jaccard and RBO Comparison \nTo understand the magnatude of changes to your query result sets, you can compare multiple snapshots together, either from the same case or different cases. \n\nThis notebook provides both Jaccard and Rank Biased Overlap (RBO) metrics.\n\nPlease copy this example and customize it for your own purposes!", + "source": "# Jaccard and RBO Comparison \nTo understand the magnatude of changes to your query result sets, you can compare multiple snapshots to each other.\n\nThis notebook provides both Jaccard and Rank Biased Overlap (RBO) metrics.\n\nPlease copy this example and customize it for your own purposes!", "metadata": {} }, { "cell_type": "code", - "source": "from js import fetch\nfrom typing import List, Optional, Union\n\nimport json\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nimport piplite\nawait piplite.install('seaborn')\nawait piplite.install('rbo')\n\nimport rbo\nimport seaborn as sns\n\nimport os\n\nos.environ[\"TQDM_DISABLE\"] = \"1\"", + "source": "from js import fetch\nfrom typing import List, Optional, Union\n\nimport json\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nimport piplite\nawait piplite.install('seaborn')\nawait piplite.install('rbo')\n\nimport rbo\nimport seaborn as sns\n\nimport os", "metadata": { "trusted": true }, @@ -72,16 +72,30 @@ } ] }, + { + "cell_type": "code", + "source": "os.environ[\"TQDM_DISABLE\"] = \"1\"", + "metadata": { + "trusted": true + }, + "execution_count": 5, + "outputs": [] + }, { "cell_type": "code", "source": "def jaccard(l1, l2, max_n):\n if len(l1) == 0 and len(l2) == 0:\n return 1\n max_len = min(len(l1), len(l2), max_n)\n set1 = set(l1[:max_len])\n set2 = set(l2[:max_len])\n intersection = len(set1.intersection(set2))\n union = len(set1) + len(set2) - intersection\n return float(intersection) / union\n\nasync def load_snapshots(case_id1, snapshot_id1, case_id2, snapshot_id2):\n df_a = await load_snapshot(case_id1, snapshot_id1)\n df_b = await load_snapshot(case_id2, snapshot_id2)\n return df_a.merge(df_b, on='query')\n\nasync def compare(case_id1, snapshot_id1, case_id2, snapshot_id2):\n df = await load_snapshots(case_id1, snapshot_id1, case_id2, snapshot_id2)\n \n df['jaccard'] = df.apply(lambda row: jaccard(row['docs_x'], row['docs_y'], 10), axis=1)\n df['rbo'] = df.apply(lambda row: rbo.RankingSimilarity(row['docs_x'], row['docs_y']).rbo(), axis=1)\n df['score_delta'] = df['score_y'] - df['score_x']\n df.name = f\"Case {case_id1} snapshot {snapshot_id1} vs. case {case_id1} snapshot {snapshot_id2}\"\n return df\n\n\n\nawait compare(case_id1=6789, snapshot_id1=2471, case_id2=6789, snapshot_id2=2472)", "metadata": { "trusted": true }, - "execution_count": 7, + "execution_count": 6, "outputs": [ { - "execution_count": 7, + "name": "stderr", + "text": "/lib/python3.11/site-packages/rbo/rbo.py:129: TqdmMonitorWarning: tqdm:disabling monitor support (monitor_interval = 0) due to:\ncan't start new thread\n for d in tqdm(range(1, k), disable=~self.verbose):\n", + "output_type": "stream" + }, + { + "execution_count": 6, "output_type": "execute_result", "data": { "text/plain": " num_results_x score_x \\\nquery \nprojector screen 1 1.0 \nnotebook 1 1.0 \niphone 8 1 1.0 \nprinter 1 1.0 \ncomputer 1 1.0 \n... ... ... \nwindows 10 1 1.0 \nmicrowave 1 1.0 \nbluetooth speakers 1 1.0 \ncoffee 1 1.0 \nvans 1 1.0 \n\n docs_x \\\nquery \nprojector screen [1069226, 47471, 490523, 1229109, 1229118, 325... \nnotebook [3851056, 3959000, 1550833, 1684763, 1675257, ... \niphone 8 [2048598, 1648546, 79524888, 1857711, 3613408,... \nprinter [3849563, 2225354, 1569761, 798960, 377837, 13... \ncomputer [560468, 532095, 560475, 523407, 693956, 56047... \n... ... \nwindows 10 [4481689, 3902727, 1560529, 1797902, 3155116, ... \nmicrowave [79513345, 4020048, 1768856, 2936032] \nbluetooth speakers [1993197, 3537784, 279672, 2663204, 558184, 33... \ncoffee [1996660, 2102472, 79583150, 1357989, 656359, ... \nvans [78503576, 79118095, 77388459, 78322005, 79013... \n\n num_results_y score_y \\\nquery \nprojector screen 1 1.0 \nnotebook 1 1.0 \niphone 8 1 1.0 \nprinter 1 1.0 \ncomputer 1 1.0 \n... ... ... \nwindows 10 1 1.0 \nmicrowave 1 1.0 \nbluetooth speakers 1 1.0 \ncoffee 1 1.0 \nvans 1 1.0 \n\n docs_y \\\nquery \nprojector screen [1069226, 47471, 490523, 1229109, 1229118, 325... \nnotebook [3851056, 3959000, 1550833, 1684763, 1675257, ... \niphone 8 [2048598, 1648546, 79524888, 1857711, 3613408,... \nprinter [3849563, 2225354, 1569761, 798960, 377837, 13... \ncomputer [560468, 532095, 560475, 523407, 693956, 56047... \n... ... \nwindows 10 [4481689, 3902727, 1560529, 1797902, 3155116, ... \nmicrowave [79513345, 4020048, 1768856, 2936032] \nbluetooth speakers [1993197, 3537784, 279672, 2663204, 558184, 33... \ncoffee [1996660, 2102472, 79583150, 1357989, 656359, ... \nvans [78503576, 79118095, 77388459, 78322005, 79013... \n\n jaccard rbo score_delta \nquery \nprojector screen 1.0 1.0 0.0 \nnotebook 1.0 1.0 0.0 \niphone 8 1.0 1.0 0.0 \nprinter 1.0 1.0 0.0 \ncomputer 1.0 1.0 0.0 \n... ... ... ... \nwindows 10 1.0 1.0 0.0 \nmicrowave 1.0 1.0 0.0 \nbluetooth speakers 1.0 1.0 0.0 \ncoffee 1.0 1.0 0.0 \nvans 1.0 1.0 0.0 \n\n[135 rows x 9 columns]", @@ -93,11 +107,45 @@ }, { "cell_type": "code", - "source": "import matplotlib\nmatplotlib.rc_file_defaults()\n\ndef plot_compare(df):\n figure, axes = plt.subplots(1, 3, figsize=(10, 4))\n figure.suptitle(df.name)\n\n sns.barplot(ax=axes[0], x=df['score_delta'], y=df.index, width=0.3, color='darkgrey')\n axes[0].set(xlim=(-1, 1))\n axes[0].set_xlabel('Change in Score')\n axes[0].set_ylabel('')\n axes[0].set_facecolor((0.90, 0.90, 0.90))\n axes[0].grid(True)\n axes[0].spines['top'].set_visible(False)\n axes[0].spines['right'].set_visible(False)\n axes[0].spines['bottom'].set_visible(False)\n axes[0].spines['left'].set_visible(False)\n axes[0].set_axisbelow(True)\n axes[0].xaxis.grid(color='w', linestyle='solid')\n axes[0].yaxis.grid(color='w', linestyle='solid')\n \n sns.heatmap(df[['jaccard']], ax=axes[1], cmap='crest', annot=True, xticklabels=False, yticklabels=False)\n axes[1].set_xlabel('Jaccard Similiarity')\n axes[1].set_ylabel('')\n \n sns.heatmap(df[['rbo']], ax=axes[2], cmap='crest', annot=True, xticklabels=False, yticklabels=False)\n axes[2].set_xlabel('Rank Biased Overlap')\n axes[2].set_ylabel('')\n \n plt.show()\n \ndf = await compare(case_id1=6789, snapshot_id1=2471, case_id2=6789, snapshot_id2=2473)\nplot_compare(df)", + "source": "import matplotlib\nmatplotlib.rc_file_defaults()\n\ndef plot_compare(df):\n figure, axes = plt.subplots(1, 3, figsize=(10, 4))\n figure.suptitle(df.name)\n\n sns.barplot(ax=axes[0], x=df['score_delta'], y=df.index, width=0.3, color='darkgrey')\n axes[0].set(xlim=(-1, 1))\n axes[0].set_xlabel('Change in Score')\n axes[0].set_ylabel('')\n axes[0].set_facecolor((0.90, 0.90, 0.90))\n axes[0].grid(True)\n axes[0].spines['top'].set_visible(False)\n axes[0].spines['right'].set_visible(False)\n axes[0].spines['bottom'].set_visible(False)\n axes[0].spines['left'].set_visible(False)\n axes[0].set_axisbelow(True)\n axes[0].xaxis.grid(color='w', linestyle='solid')\n axes[0].yaxis.grid(color='w', linestyle='solid')\n \n sns.heatmap(df[['jaccard']], ax=axes[1], cmap='crest', annot=True, xticklabels=False, yticklabels=False)\n axes[1].set_xlabel('Jaccard Similiarity')\n axes[1].set_ylabel('')\n \n sns.heatmap(df[['rbo']], ax=axes[2], cmap='crest', annot=True, xticklabels=False, yticklabels=False)\n axes[2].set_xlabel('Rank Biased Overlap')\n axes[2].set_ylabel('')\n \n plt.show()\n \ndf = await compare(case_id1=6789, snapshot_id1=2471, case_id2=6789, snapshot_id2=2473)\n", "metadata": { "trusted": true }, - "execution_count": 6, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": "## Overall Jaccard and RBO Scores", + "metadata": {} + }, + { + "cell_type": "code", + "source": "print(f\"Overall Jaccard Score: {df['jaccard'].mean()}\\nOverall RBO Score: {df['rbo'].mean()}\")", + "metadata": { + "trusted": true + }, + "execution_count": 8, + "outputs": [ + { + "name": "stdout", + "text": "Overall Jaccard Score: 1.0\nOverall RBO Score: 1.0\n", + "output_type": "stream" + } + ] + }, + { + "cell_type": "markdown", + "source": "## Query Level Jaccard and RBO Scores", + "metadata": {} + }, + { + "cell_type": "code", + "source": "plot_compare(df)", + "metadata": { + "trusted": true + }, + "execution_count": 9, "outputs": [ { "output_type": "display_data", @@ -111,8 +159,15 @@ }, { "cell_type": "markdown", - "source": "_This notebook was last updated 16-FEB-2024_", + "source": "_This notebook was last updated 19-FEB-2024_", "metadata": {} + }, + { + "cell_type": "code", + "source": "", + "metadata": {}, + "execution_count": null, + "outputs": [] } ] } \ No newline at end of file From 98a0ccce7f1f915293ba844a04b340e90fed003b Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Mon, 19 Feb 2024 10:19:41 -0500 Subject: [PATCH 2/2] Tweaks from the team.. --- jupyterlite/files/examples/Fleiss Kappa.ipynb | 29 +-- .../examples/Multiple Raters Analysis.ipynb | 171 +++++++++--------- 2 files changed, 92 insertions(+), 108 deletions(-) diff --git a/jupyterlite/files/examples/Fleiss Kappa.ipynb b/jupyterlite/files/examples/Fleiss Kappa.ipynb index e9df73b..2e1f9fe 100644 --- a/jupyterlite/files/examples/Fleiss Kappa.ipynb +++ b/jupyterlite/files/examples/Fleiss Kappa.ipynb @@ -23,7 +23,7 @@ "cells": [ { "cell_type": "markdown", - "source": "# Fleiss' Kappa \nTo understand how much your raters what? Scott, need some text!\n\nPlease copy this example and customize it for your own purposes!", + "source": "# Fleiss' Kappa \nTo understand how much your judges agree with each other. It is meant to be used with more than two judges.\n\nRead https://www.datanovia.com/en/blog/kappa-coefficient-interpretation/ to learn more.\n\nPlease copy this example and customize it for your own purposes!", "metadata": {}, "id": "bd7e4efa-eb00-451e-984d-ed6646d8e25f" }, @@ -51,11 +51,11 @@ }, { "cell_type": "code", - "source": "QUEPID_BOOK_NUM = 25\n\n# Not needed if running within Quepid JupyterLite\n# QUEPID_API_TOKEN = \"\"", + "source": "QUEPID_BOOK_NUM = 25", "metadata": { "trusted": true }, - "execution_count": 3, + "execution_count": 2, "outputs": [], "id": "71803a49-4065-4adf-a69e-cb0fe2d00f22" }, @@ -71,7 +71,7 @@ "metadata": { "trusted": true }, - "execution_count": 4, + "execution_count": 3, "outputs": [], "id": "31193536-98eb-4b46-ab98-af04ee07c6d3" }, @@ -81,7 +81,7 @@ "metadata": { "trusted": true }, - "execution_count": 5, + "execution_count": null, "outputs": [], "id": "8fef6231-daa8-467f-ac57-13a144e8a356" }, @@ -97,7 +97,7 @@ "metadata": { "trusted": true }, - "execution_count": 6, + "execution_count": null, "outputs": [], "id": "9a8561fd-2dbf-477e-9ac1-4df6d5ebdc91" }, @@ -113,7 +113,7 @@ "metadata": { "trusted": true }, - "execution_count": 7, + "execution_count": null, "outputs": [], "id": "a7598308-129b-4628-ad3a-fc3d703f8205" }, @@ -129,22 +129,13 @@ "metadata": { "trusted": true }, - "execution_count": 8, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/markdown": "## Fleiss' Kappa: -0.3333" - }, - "metadata": {} - } - ], + "execution_count": null, + "outputs": [], "id": "25a613f9" }, { "cell_type": "markdown", - "source": "_This notebook was last updated 17-FEB-2024_", + "source": "_This notebook was last updated 19-FEB-2024_", "metadata": {}, "id": "5704579e-2321-4629-8de0-6608b428e2b6" }, diff --git a/jupyterlite/files/examples/Multiple Raters Analysis.ipynb b/jupyterlite/files/examples/Multiple Raters Analysis.ipynb index 1a262b4..53c3283 100644 --- a/jupyterlite/files/examples/Multiple Raters Analysis.ipynb +++ b/jupyterlite/files/examples/Multiple Raters Analysis.ipynb @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "source": "# You need to get your book_id from Quepid UI. You should be able to see its content if you open /api/books/1.json\nBOOK_ID = 25", + "source": "# You need to get your book_id from Quepid UI. You should be able to see it's content if you open /api/books/1.json\nBOOK_ID = 25", "metadata": { "trusted": true }, @@ -75,10 +75,10 @@ "metadata": { "trusted": true }, - "execution_count": 4, + "execution_count": 66, "outputs": [ { - "execution_count": 4, + "execution_count": 66, "output_type": "execute_result", "data": { "text/plain": " query docid charlie@flax.co.uk \\\n0 projector screen 325961 NaN \n1 projector screen 47471 NaN \n2 projector screen 126679 NaN \n3 projector screen 254441 NaN \n4 projector screen 325958 NaN \n... ... ... ... \n2415 power supply 1667352 NaN \n2416 power supply 1667804 NaN \n2417 power supply 1667752 NaN \n2418 power supply 1667821 NaN \n2419 power supply 1667357 NaN \n\n epugh@opensourceconnections.com eschramma@cas.org dtaivpp@gmail.com \\\n0 3.0 NaN 3.0 \n1 3.0 NaN 3.0 \n2 3.0 NaN 3.0 \n3 3.0 NaN NaN \n4 3.0 NaN NaN \n... ... ... ... \n2415 0.0 NaN NaN \n2416 0.0 NaN NaN \n2417 0.0 NaN NaN \n2418 0.0 NaN NaN \n2419 0.0 NaN NaN \n\n aarora@opensourceconnections.com cmcollier@gmail.com \\\n0 NaN NaN \n1 NaN NaN \n2 NaN NaN \n3 NaN NaN \n4 NaN NaN \n... ... ... \n2415 NaN NaN \n2416 NaN NaN \n2417 NaN NaN \n2418 NaN NaN \n2419 NaN NaN \n\n ben.w.trent@gmail.com jeff@vin.com cmarino@enterprise-knowledge.com \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n msfroh@gmail.com peter@searchintuition.com maximilian.werk@jina.ai \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n ryan.finley@ferguson.com \n0 NaN \n1 NaN \n2 NaN \n3 NaN \n4 NaN \n... ... \n2415 NaN \n2416 NaN \n2417 NaN \n2418 NaN \n2419 NaN \n\n[2420 rows x 15 columns]", @@ -95,13 +95,13 @@ "metadata": { "trusted": true }, - "execution_count": 5, + "execution_count": 28, "outputs": [], "id": "98ad3844-d67f-44a2-8bae-034223de6c68" }, { "cell_type": "code", - "source": "df.dropna(inplace=True)\ndf.shape", + "source": "#df.dropna(inplace=True)\ndf.shape", "metadata": { "trusted": true }, @@ -124,10 +124,10 @@ "metadata": { "trusted": true }, - "execution_count": 7, + "execution_count": 29, "outputs": [ { - "execution_count": 7, + "execution_count": 29, "output_type": "execute_result", "data": { "text/plain": "Empty DataFrame\nColumns: [query, docid, charlie@flax.co.uk, epugh@opensourceconnections.com, eschramma@cas.org, dtaivpp@gmail.com, aarora@opensourceconnections.com, cmcollier@gmail.com, ben.w.trent@gmail.com, jeff@vin.com, cmarino@enterprise-knowledge.com, msfroh@gmail.com, peter@searchintuition.com, maximilian.werk@jina.ai, ryan.finley@ferguson.com]\nIndex: []", @@ -144,10 +144,10 @@ "metadata": { "trusted": true }, - "execution_count": 8, + "execution_count": 30, "outputs": [ { - "execution_count": 8, + "execution_count": 30, "output_type": "execute_result", "data": { "text/plain": "['charlie@flax.co.uk',\n 'epugh@opensourceconnections.com',\n 'eschramma@cas.org',\n 'dtaivpp@gmail.com',\n 'aarora@opensourceconnections.com',\n 'cmcollier@gmail.com',\n 'ben.w.trent@gmail.com',\n 'jeff@vin.com',\n 'cmarino@enterprise-knowledge.com',\n 'msfroh@gmail.com',\n 'peter@searchintuition.com',\n 'maximilian.werk@jina.ai',\n 'ryan.finley@ferguson.com']" @@ -157,13 +157,23 @@ ], "id": "72d4481e-ae12-4fff-bbbd-1888a894f69a" }, + { + "cell_type": "code", + "source": "# We need to filter to raters that we THINK might have some overlap\nraters = [\n 'epugh@opensourceconnections.com',\n 'aarora@opensourceconnections.com',\n 'ben.w.trent@gmail.com'\n]", + "metadata": { + "trusted": true + }, + "execution_count": 67, + "outputs": [], + "id": "dcd60629-44fc-4122-95dd-98fe2558489d" + }, { "cell_type": "code", "source": "nb_raters = len(raters)", "metadata": { "trusted": true }, - "execution_count": 9, + "execution_count": 68, "outputs": [], "id": "4c1fc91f-cda6-4e76-8372-3062e6975adb" }, @@ -179,14 +189,14 @@ "metadata": { "trusted": true }, - "execution_count": 10, + "execution_count": 69, "outputs": [ { - "execution_count": 10, + "execution_count": 69, "output_type": "execute_result", "data": { - "text/plain": "Empty DataFrame\nColumns: [query, docid, rating_0, rating_1, rating_2, rating_3, rating_4, rating_5, rating_6, rating_7, rating_8, rating_9, rating_10, rating_11, rating_12, rater_0, rater_1, rater_2, rater_3, rater_4, rater_5, rater_6, rater_7, rater_8, rater_9, rater_10, rater_11, rater_12]\nIndex: []\n\n[0 rows x 28 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidrating_0rating_1rating_2rating_3rating_4rating_5rating_6rating_7...rater_3rater_4rater_5rater_6rater_7rater_8rater_9rater_10rater_11rater_12
\n

0 rows × 28 columns

\n
" + "text/plain": " query docid charlie@flax.co.uk rating_0 \\\n0 projector screen 325961 NaN 3.0 \n1 projector screen 47471 NaN 3.0 \n2 projector screen 126679 NaN 3.0 \n3 projector screen 254441 NaN 3.0 \n4 projector screen 325958 NaN 3.0 \n... ... ... ... ... \n2415 power supply 1667352 NaN 0.0 \n2416 power supply 1667804 NaN 0.0 \n2417 power supply 1667752 NaN 0.0 \n2418 power supply 1667821 NaN 0.0 \n2419 power supply 1667357 NaN 0.0 \n\n eschramma@cas.org dtaivpp@gmail.com rating_1 cmcollier@gmail.com \\\n0 NaN 3.0 NaN NaN \n1 NaN 3.0 NaN NaN \n2 NaN 3.0 NaN NaN \n3 NaN NaN NaN NaN \n4 NaN NaN NaN NaN \n... ... ... ... ... \n2415 NaN NaN NaN NaN \n2416 NaN NaN NaN NaN \n2417 NaN NaN NaN NaN \n2418 NaN NaN NaN NaN \n2419 NaN NaN NaN NaN \n\n rating_2 jeff@vin.com cmarino@enterprise-knowledge.com \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n msfroh@gmail.com peter@searchintuition.com maximilian.werk@jina.ai \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n ryan.finley@ferguson.com rater_0 \\\n0 NaN epugh@opensourceconnections.com \n1 NaN epugh@opensourceconnections.com \n2 NaN epugh@opensourceconnections.com \n3 NaN epugh@opensourceconnections.com \n4 NaN epugh@opensourceconnections.com \n... ... ... \n2415 NaN epugh@opensourceconnections.com \n2416 NaN epugh@opensourceconnections.com \n2417 NaN epugh@opensourceconnections.com \n2418 NaN epugh@opensourceconnections.com \n2419 NaN epugh@opensourceconnections.com \n\n rater_1 rater_2 \n0 aarora@opensourceconnections.com ben.w.trent@gmail.com \n1 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2 aarora@opensourceconnections.com ben.w.trent@gmail.com \n3 aarora@opensourceconnections.com ben.w.trent@gmail.com \n4 aarora@opensourceconnections.com ben.w.trent@gmail.com \n... ... ... \n2415 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2416 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2417 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2418 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2419 aarora@opensourceconnections.com ben.w.trent@gmail.com \n\n[2420 rows x 18 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidcharlie@flax.co.ukrating_0eschramma@cas.orgdtaivpp@gmail.comrating_1cmcollier@gmail.comrating_2jeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.comrater_0rater_1rater_2
0projector screen325961NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
1projector screen47471NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2projector screen126679NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
3projector screen254441NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
4projector screen325958NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
.........................................................
2415power supply1667352NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2416power supply1667804NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2417power supply1667752NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2418power supply1667821NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2419power supply1667357NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
\n

2420 rows × 18 columns

\n
" }, "metadata": {} } @@ -205,20 +215,30 @@ "metadata": { "trusted": true }, - "execution_count": 11, + "execution_count": 70, "outputs": [ { - "execution_count": 11, + "execution_count": 70, "output_type": "execute_result", "data": { - "text/plain": "Empty DataFrame\nColumns: [query, docid, rating, rater]\nIndex: []", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n
querydocidratingrater
\n
" + "text/plain": " query docid rating rater\n4063 iphone 11 1423 NaN aarora@opensourceconnections.com\n6483 iphone 11 1423 NaN ben.w.trent@gmail.com\n1643 iphone 11 1423 NaN epugh@opensourceconnections.com\n4065 iphone 11 1424 NaN aarora@opensourceconnections.com\n6485 iphone 11 1424 NaN ben.w.trent@gmail.com\n... ... ... ... ...\n2383 windows 10 79583170 NaN epugh@opensourceconnections.com\n4803 windows 10 79583170 NaN aarora@opensourceconnections.com\n5879 samsung 79659021 NaN ben.w.trent@gmail.com\n3459 samsung 79659021 3.0 aarora@opensourceconnections.com\n1039 samsung 79659021 3.0 epugh@opensourceconnections.com\n\n[7260 rows x 4 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidratingrater
4063iphone 111423NaNaarora@opensourceconnections.com
6483iphone 111423NaNben.w.trent@gmail.com
1643iphone 111423NaNepugh@opensourceconnections.com
4065iphone 111424NaNaarora@opensourceconnections.com
6485iphone 111424NaNben.w.trent@gmail.com
...............
2383windows 1079583170NaNepugh@opensourceconnections.com
4803windows 1079583170NaNaarora@opensourceconnections.com
5879samsung79659021NaNben.w.trent@gmail.com
3459samsung796590213.0aarora@opensourceconnections.com
1039samsung796590213.0epugh@opensourceconnections.com
\n

7260 rows × 4 columns

\n
" }, "metadata": {} } ], "id": "97b0bc0c-a20e-49b2-a65a-ef09eb7e6a58" }, + { + "cell_type": "code", + "source": "df_overall.dropna(inplace=True)", + "metadata": { + "trusted": true + }, + "execution_count": 71, + "outputs": [], + "id": "d58d14a6-6bfb-4c77-8145-514c0030bc53" + }, { "cell_type": "markdown", "source": "### Rating distribution per query\nHe we just want to plot the distribution of ratings for each query:\n", @@ -231,14 +251,14 @@ "metadata": { "trusted": true }, - "execution_count": 12, + "execution_count": 72, "outputs": [ { - "execution_count": 12, + "execution_count": 72, "output_type": "execute_result", "data": { - "text/plain": "Empty DataFrame\nColumns: [(rating, count), (rating, mean), (rating, std)]\nIndex: []", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
rating
countmeanstd
query
\n
" + "text/plain": " rating \n count mean std\nquery \n120v power supply 2 0.000000 0.000000\naa 10 1.800000 1.549193\naa battery 10 1.800000 1.135292\naaa 7 1.285714 1.603567\nadapter 10 1.300000 0.674949\n... ... ... ...\nwireless headphones 5 0.800000 1.303840\nwireless mouse 14 2.000000 1.109400\nxbox 15 0.000000 0.000000\nxbox one 7 0.428571 0.786796\nyoutube 12 0.000000 0.000000\n\n[137 rows x 3 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
rating
countmeanstd
query
120v power supply20.0000000.000000
aa101.8000001.549193
aa battery101.8000001.135292
aaa71.2857141.603567
adapter101.3000000.674949
............
wireless headphones50.8000001.303840
wireless mouse142.0000001.109400
xbox150.0000000.000000
xbox one70.4285710.786796
youtube120.0000000.000000
\n

137 rows × 3 columns

\n
" }, "metadata": {} } @@ -251,32 +271,15 @@ "metadata": { "trusted": true }, - "execution_count": 13, + "execution_count": 73, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "
", - "image/png": "\n" + "image/png": "\n" }, "metadata": {} - }, - { - "ename": "", - "evalue": "zero-size array to reduction operation minimum which has no identity", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[13], line 7\u001b[0m\n\u001b[1;32m 3\u001b[0m dataset \u001b[38;5;241m=\u001b[39m [df_overall[df_overall[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mquery\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m==\u001b[39m q][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrating\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m q \u001b[38;5;129;01min\u001b[39;00m queries]\n\u001b[1;32m 5\u001b[0m nb_queries \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(queries)\n\u001b[0;32m----> 7\u001b[0m \u001b[43maxes\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mviolinplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mshowmeans\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbw_method\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.05\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m axes\u001b[38;5;241m.\u001b[39mset_xlabel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mquery\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 9\u001b[0m axes\u001b[38;5;241m.\u001b[39mset_ylabel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mratings\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", - "File \u001b[0;32m/lib/python3.11/site-packages/matplotlib/__init__.py:1412\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1409\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1410\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1411\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1414\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1415\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1416\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", - "File \u001b[0;32m/lib/python3.11/site-packages/matplotlib/axes/_axes.py:7938\u001b[0m, in \u001b[0;36mAxes.violinplot\u001b[0;34m(self, dataset, positions, vert, widths, showmeans, showextrema, showmedians, quantiles, points, bw_method)\u001b[0m\n\u001b[1;32m 7935\u001b[0m kde \u001b[38;5;241m=\u001b[39m mlab\u001b[38;5;241m.\u001b[39mGaussianKDE(X, bw_method)\n\u001b[1;32m 7936\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m kde\u001b[38;5;241m.\u001b[39mevaluate(coords)\n\u001b[0;32m-> 7938\u001b[0m vpstats \u001b[38;5;241m=\u001b[39m \u001b[43mcbook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mviolin_stats\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_kde_method\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpoints\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpoints\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7939\u001b[0m \u001b[43m \u001b[49m\u001b[43mquantiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquantiles\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7940\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mviolin(vpstats, positions\u001b[38;5;241m=\u001b[39mpositions, vert\u001b[38;5;241m=\u001b[39mvert,\n\u001b[1;32m 7941\u001b[0m widths\u001b[38;5;241m=\u001b[39mwidths, showmeans\u001b[38;5;241m=\u001b[39mshowmeans,\n\u001b[1;32m 7942\u001b[0m showextrema\u001b[38;5;241m=\u001b[39mshowextrema, showmedians\u001b[38;5;241m=\u001b[39mshowmedians)\n", - "File \u001b[0;32m/lib/python3.11/site-packages/matplotlib/cbook/__init__.py:1447\u001b[0m, in \u001b[0;36mviolin_stats\u001b[0;34m(X, method, points, quantiles)\u001b[0m\n\u001b[1;32m 1444\u001b[0m stats \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;66;03m# Calculate basic stats for the distribution\u001b[39;00m\n\u001b[0;32m-> 1447\u001b[0m min_val \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1448\u001b[0m max_val \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mmax(x)\n\u001b[1;32m 1449\u001b[0m quantile_val \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mpercentile(x, \u001b[38;5;241m100\u001b[39m \u001b[38;5;241m*\u001b[39m q)\n", - "File \u001b[0;32m<__array_function__ internals>:200\u001b[0m, in \u001b[0;36mamin\u001b[0;34m(*args, **kwargs)\u001b[0m\n", - "File \u001b[0;32m/lib/python3.11/site-packages/numpy/core/fromnumeric.py:2946\u001b[0m, in \u001b[0;36mamin\u001b[0;34m(a, axis, out, keepdims, initial, where)\u001b[0m\n\u001b[1;32m 2829\u001b[0m \u001b[38;5;129m@array_function_dispatch\u001b[39m(_amin_dispatcher)\n\u001b[1;32m 2830\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mamin\u001b[39m(a, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, out\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, keepdims\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue, initial\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue,\n\u001b[1;32m 2831\u001b[0m where\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue):\n\u001b[1;32m 2832\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2833\u001b[0m \u001b[38;5;124;03m Return the minimum of an array or minimum along an axis.\u001b[39;00m\n\u001b[1;32m 2834\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2944\u001b[0m \u001b[38;5;124;03m 6\u001b[39;00m\n\u001b[1;32m 2945\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 2946\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_wrapreduction\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mminimum\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mmin\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2947\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minitial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minitial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/lib/python3.11/site-packages/numpy/core/fromnumeric.py:86\u001b[0m, in \u001b[0;36m_wrapreduction\u001b[0;34m(obj, ufunc, method, axis, dtype, out, **kwargs)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m reduction(axis\u001b[38;5;241m=\u001b[39maxis, out\u001b[38;5;241m=\u001b[39mout, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mpasskwargs)\n\u001b[0;32m---> 86\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mufunc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreduce\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpasskwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mValueError\u001b[0m: zero-size array to reduction operation minimum which has no identity" - ], - "output_type": "error" } ], "id": "70b93e5d-425d-4925-97fd-1b062f7c373f" @@ -293,32 +296,15 @@ "metadata": { "trusted": true }, - "execution_count": 14, + "execution_count": 74, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "
", - "image/png": "\n" + "image/png": "\n" }, "metadata": {} - }, - { - "ename": "", - "evalue": "zero-size array to reduction operation minimum which has no identity", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[14], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m raters \u001b[38;5;241m=\u001b[39m df_overall[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrater\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39munique()\n\u001b[1;32m 3\u001b[0m dataset \u001b[38;5;241m=\u001b[39m [df_overall[df_overall[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrater\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m==\u001b[39m r][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrating\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m r \u001b[38;5;129;01min\u001b[39;00m raters]\n\u001b[0;32m----> 5\u001b[0m \u001b[43maxes\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mviolinplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mshowmeans\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbw_method\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.05\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m axes\u001b[38;5;241m.\u001b[39mset_xlabel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrater\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 7\u001b[0m axes\u001b[38;5;241m.\u001b[39mset_ylabel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mratings\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", - "File \u001b[0;32m/lib/python3.11/site-packages/matplotlib/__init__.py:1412\u001b[0m, in \u001b[0;36m_preprocess_data..inner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1409\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 1410\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(ax, \u001b[38;5;241m*\u001b[39margs, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1411\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43max\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msanitize_sequence\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1414\u001b[0m bound \u001b[38;5;241m=\u001b[39m new_sig\u001b[38;5;241m.\u001b[39mbind(ax, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1415\u001b[0m auto_label \u001b[38;5;241m=\u001b[39m (bound\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mget(label_namer)\n\u001b[1;32m 1416\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m bound\u001b[38;5;241m.\u001b[39mkwargs\u001b[38;5;241m.\u001b[39mget(label_namer))\n", - "File \u001b[0;32m/lib/python3.11/site-packages/matplotlib/axes/_axes.py:7938\u001b[0m, in \u001b[0;36mAxes.violinplot\u001b[0;34m(self, dataset, positions, vert, widths, showmeans, showextrema, showmedians, quantiles, points, bw_method)\u001b[0m\n\u001b[1;32m 7935\u001b[0m kde \u001b[38;5;241m=\u001b[39m mlab\u001b[38;5;241m.\u001b[39mGaussianKDE(X, bw_method)\n\u001b[1;32m 7936\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m kde\u001b[38;5;241m.\u001b[39mevaluate(coords)\n\u001b[0;32m-> 7938\u001b[0m vpstats \u001b[38;5;241m=\u001b[39m \u001b[43mcbook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mviolin_stats\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_kde_method\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpoints\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpoints\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7939\u001b[0m \u001b[43m \u001b[49m\u001b[43mquantiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquantiles\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7940\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mviolin(vpstats, positions\u001b[38;5;241m=\u001b[39mpositions, vert\u001b[38;5;241m=\u001b[39mvert,\n\u001b[1;32m 7941\u001b[0m widths\u001b[38;5;241m=\u001b[39mwidths, showmeans\u001b[38;5;241m=\u001b[39mshowmeans,\n\u001b[1;32m 7942\u001b[0m showextrema\u001b[38;5;241m=\u001b[39mshowextrema, showmedians\u001b[38;5;241m=\u001b[39mshowmedians)\n", - "File \u001b[0;32m/lib/python3.11/site-packages/matplotlib/cbook/__init__.py:1447\u001b[0m, in \u001b[0;36mviolin_stats\u001b[0;34m(X, method, points, quantiles)\u001b[0m\n\u001b[1;32m 1444\u001b[0m stats \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 1446\u001b[0m \u001b[38;5;66;03m# Calculate basic stats for the distribution\u001b[39;00m\n\u001b[0;32m-> 1447\u001b[0m min_val \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1448\u001b[0m max_val \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mmax(x)\n\u001b[1;32m 1449\u001b[0m quantile_val \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mpercentile(x, \u001b[38;5;241m100\u001b[39m \u001b[38;5;241m*\u001b[39m q)\n", - "File \u001b[0;32m<__array_function__ internals>:200\u001b[0m, in \u001b[0;36mamin\u001b[0;34m(*args, **kwargs)\u001b[0m\n", - "File \u001b[0;32m/lib/python3.11/site-packages/numpy/core/fromnumeric.py:2946\u001b[0m, in \u001b[0;36mamin\u001b[0;34m(a, axis, out, keepdims, initial, where)\u001b[0m\n\u001b[1;32m 2829\u001b[0m \u001b[38;5;129m@array_function_dispatch\u001b[39m(_amin_dispatcher)\n\u001b[1;32m 2830\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mamin\u001b[39m(a, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, out\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, keepdims\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue, initial\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue,\n\u001b[1;32m 2831\u001b[0m where\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue):\n\u001b[1;32m 2832\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2833\u001b[0m \u001b[38;5;124;03m Return the minimum of an array or minimum along an axis.\u001b[39;00m\n\u001b[1;32m 2834\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2944\u001b[0m \u001b[38;5;124;03m 6\u001b[39;00m\n\u001b[1;32m 2945\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 2946\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_wrapreduction\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mminimum\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mmin\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2947\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minitial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minitial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/lib/python3.11/site-packages/numpy/core/fromnumeric.py:86\u001b[0m, in \u001b[0;36m_wrapreduction\u001b[0;34m(obj, ufunc, method, axis, dtype, out, **kwargs)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m reduction(axis\u001b[38;5;241m=\u001b[39maxis, out\u001b[38;5;241m=\u001b[39mout, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mpasskwargs)\n\u001b[0;32m---> 86\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mufunc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreduce\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpasskwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mValueError\u001b[0m: zero-size array to reduction operation minimum which has no identity" - ], - "output_type": "error" } ], "id": "f954846d-54a9-4cf1-a9da-ab0faaa46df9" @@ -341,13 +327,13 @@ "metadata": { "trusted": true }, - "execution_count": 15, + "execution_count": 75, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "
", - "image/png": "\n" + "image/png": "\n" }, "metadata": {} } @@ -366,13 +352,13 @@ "metadata": { "trusted": true }, - "execution_count": 16, + "execution_count": 76, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "
", - "image/png": "\n" + "image/png": "\n" }, "metadata": {} } @@ -385,10 +371,10 @@ "metadata": { "trusted": true }, - "execution_count": 17, + "execution_count": 77, "outputs": [ { - "execution_count": 17, + "execution_count": 77, "output_type": "execute_result", "data": { "text/plain": "Text(0.5, 1.0, 'Agents agreements')" @@ -399,7 +385,7 @@ "output_type": "display_data", "data": { "text/plain": "
", - "image/png": "\n" + "image/png": "\n" }, "metadata": {} } @@ -418,7 +404,7 @@ "metadata": { "trusted": true }, - "execution_count": 18, + "execution_count": 78, "outputs": [ { "name": "stdout", @@ -426,11 +412,11 @@ "output_type": "stream" }, { - "execution_count": 18, + "execution_count": 78, "output_type": "execute_result", "data": { - "text/plain": "Empty DataFrame\nColumns: [query, docid, rating_0, rating_1, rating_2, rating_3, rating_4, rating_5, rating_6, rating_7, rating_8, rating_9, rating_10, rating_11, rating_12, rater_0, rater_1, rater_2, rater_3, rater_4, rater_5, rater_6, rater_7, rater_8, rater_9, rater_10, rater_11, rater_12, nb_distinct_ratings]\nIndex: []\n\n[0 rows x 29 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidrating_0rating_1rating_2rating_3rating_4rating_5rating_6rating_7...rater_4rater_5rater_6rater_7rater_8rater_9rater_10rater_11rater_12nb_distinct_ratings
\n

0 rows × 29 columns

\n
" + "text/plain": " query docid charlie@flax.co.uk rating_0 \\\n0 projector screen 325961 NaN 3.0 \n1 projector screen 47471 NaN 3.0 \n2 projector screen 126679 NaN 3.0 \n3 projector screen 254441 NaN 3.0 \n4 projector screen 325958 NaN 3.0 \n... ... ... ... ... \n2415 power supply 1667352 NaN 0.0 \n2416 power supply 1667804 NaN 0.0 \n2417 power supply 1667752 NaN 0.0 \n2418 power supply 1667821 NaN 0.0 \n2419 power supply 1667357 NaN 0.0 \n\n eschramma@cas.org dtaivpp@gmail.com rating_1 cmcollier@gmail.com \\\n0 NaN 3.0 NaN NaN \n1 NaN 3.0 NaN NaN \n2 NaN 3.0 NaN NaN \n3 NaN NaN NaN NaN \n4 NaN NaN NaN NaN \n... ... ... ... ... \n2415 NaN NaN NaN NaN \n2416 NaN NaN NaN NaN \n2417 NaN NaN NaN NaN \n2418 NaN NaN NaN NaN \n2419 NaN NaN NaN NaN \n\n rating_2 jeff@vin.com cmarino@enterprise-knowledge.com \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n msfroh@gmail.com peter@searchintuition.com maximilian.werk@jina.ai \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n ryan.finley@ferguson.com rater_0 \\\n0 NaN epugh@opensourceconnections.com \n1 NaN epugh@opensourceconnections.com \n2 NaN epugh@opensourceconnections.com \n3 NaN epugh@opensourceconnections.com \n4 NaN epugh@opensourceconnections.com \n... ... ... \n2415 NaN epugh@opensourceconnections.com \n2416 NaN epugh@opensourceconnections.com \n2417 NaN epugh@opensourceconnections.com \n2418 NaN epugh@opensourceconnections.com \n2419 NaN epugh@opensourceconnections.com \n\n rater_1 rater_2 \\\n0 aarora@opensourceconnections.com ben.w.trent@gmail.com \n1 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2 aarora@opensourceconnections.com ben.w.trent@gmail.com \n3 aarora@opensourceconnections.com ben.w.trent@gmail.com \n4 aarora@opensourceconnections.com ben.w.trent@gmail.com \n... ... ... \n2415 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2416 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2417 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2418 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2419 aarora@opensourceconnections.com ben.w.trent@gmail.com \n\n nb_distinct_ratings \n0 3 \n1 3 \n2 3 \n3 3 \n4 3 \n... ... \n2415 3 \n2416 3 \n2417 3 \n2418 3 \n2419 3 \n\n[2148 rows x 19 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidcharlie@flax.co.ukrating_0eschramma@cas.orgdtaivpp@gmail.comrating_1cmcollier@gmail.comrating_2jeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.comrater_0rater_1rater_2nb_distinct_ratings
0projector screen325961NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
1projector screen47471NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2projector screen126679NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
3projector screen254441NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
4projector screen325958NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
............................................................
2415power supply1667352NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2416power supply1667804NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2417power supply1667752NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2418power supply1667821NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2419power supply1667357NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
\n

2148 rows × 19 columns

\n
" }, "metadata": {} } @@ -445,11 +431,11 @@ }, { "cell_type": "code", - "source": "print('All agree:')\ndf[df['nb_distinct_ratings']==1].sample(5)", + "source": "# We have none that everyone agrees on\nprint('All agree:')\ndf[df['nb_distinct_ratings']==1].sample(5)", "metadata": { "trusted": true }, - "execution_count": 19, + "execution_count": 79, "outputs": [ { "name": "stdout", @@ -462,7 +448,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[19], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAll agree:\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 2\u001b[0m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnb_distinct_ratings\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[79], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# We have none that everyone agrees on\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAll agree:\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnb_distinct_ratings\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/lib/python3.11/site-packages/pandas/core/generic.py:5773\u001b[0m, in \u001b[0;36mNDFrame.sample\u001b[0;34m(self, n, frac, replace, weights, random_state, axis, ignore_index)\u001b[0m\n\u001b[1;32m 5770\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m weights \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 5771\u001b[0m weights \u001b[38;5;241m=\u001b[39m sample\u001b[38;5;241m.\u001b[39mpreprocess_weights(\u001b[38;5;28mself\u001b[39m, weights, axis)\n\u001b[0;32m-> 5773\u001b[0m sampled_indices \u001b[38;5;241m=\u001b[39m \u001b[43msample\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweights\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5774\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(sampled_indices, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m 5776\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ignore_index:\n", "File \u001b[0;32m/lib/python3.11/site-packages/pandas/core/sample.py:150\u001b[0m, in \u001b[0;36msample\u001b[0;34m(obj_len, size, replace, weights, random_state)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid weights: weights sum to zero\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchoice\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mp\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mastype(\n\u001b[1;32m 151\u001b[0m np\u001b[38;5;241m.\u001b[39mintp, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 152\u001b[0m )\n", "File \u001b[0;32mmtrand.pyx:928\u001b[0m, in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[0;34m()\u001b[0m\n", @@ -481,11 +467,11 @@ }, { "cell_type": "code", - "source": "print('Majority agree:')\ndf[df['nb_distinct_ratings']==2]", + "source": "# We have none\nprint('Majority agree:')\ndf[df['nb_distinct_ratings']==2]", "metadata": { "trusted": true }, - "execution_count": 20, + "execution_count": 80, "outputs": [ { "name": "stdout", @@ -493,11 +479,11 @@ "output_type": "stream" }, { - "execution_count": 20, + "execution_count": 80, "output_type": "execute_result", "data": { - "text/plain": "Empty DataFrame\nColumns: [query, docid, rating_0, rating_1, rating_2, rating_3, rating_4, rating_5, rating_6, rating_7, rating_8, rating_9, rating_10, rating_11, rating_12, rater_0, rater_1, rater_2, rater_3, rater_4, rater_5, rater_6, rater_7, rater_8, rater_9, rater_10, rater_11, rater_12, nb_distinct_ratings]\nIndex: []\n\n[0 rows x 29 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidrating_0rating_1rating_2rating_3rating_4rating_5rating_6rating_7...rater_4rater_5rater_6rater_7rater_8rater_9rater_10rater_11rater_12nb_distinct_ratings
\n

0 rows × 29 columns

\n
" + "text/plain": " query docid charlie@flax.co.uk rating_0 \\\n6 projector screen 549808 NaN 3.0 \n19 laptop 77031393 NaN 3.0 \n20 iphone 8 79283963 NaN 0.0 \n21 iphone 8 79284190 NaN 0.0 \n24 iphone 8 77911774 NaN 0.0 \n... ... ... ... ... \n1330 coffee 656359 NaN 3.0 \n1331 coffee 77265396 NaN 2.0 \n1334 coffee 2102472 NaN 2.0 \n1340 vans 77129498 NaN 0.0 \n1342 vans 77388459 NaN 0.0 \n\n eschramma@cas.org dtaivpp@gmail.com rating_1 cmcollier@gmail.com \\\n6 NaN NaN 3.0 NaN \n19 NaN NaN NaN NaN \n20 NaN NaN NaN NaN \n21 NaN NaN NaN NaN \n24 NaN NaN 0.0 NaN \n... ... ... ... ... \n1330 NaN NaN 3.0 NaN \n1331 NaN NaN 2.0 NaN \n1334 NaN NaN NaN NaN \n1340 NaN NaN 0.0 NaN \n1342 NaN NaN NaN NaN \n\n rating_2 jeff@vin.com cmarino@enterprise-knowledge.com \\\n6 NaN NaN NaN \n19 3.0 NaN NaN \n20 0.0 NaN NaN \n21 0.0 NaN NaN \n24 NaN NaN NaN \n... ... ... ... \n1330 NaN NaN NaN \n1331 NaN NaN NaN \n1334 2.0 NaN NaN \n1340 NaN NaN NaN \n1342 0.0 NaN NaN \n\n msfroh@gmail.com peter@searchintuition.com maximilian.werk@jina.ai \\\n6 NaN NaN NaN \n19 NaN NaN NaN \n20 NaN NaN NaN \n21 NaN NaN NaN \n24 NaN NaN NaN \n... ... ... ... \n1330 NaN NaN NaN \n1331 NaN NaN NaN \n1334 NaN NaN NaN \n1340 NaN NaN NaN \n1342 NaN NaN NaN \n\n ryan.finley@ferguson.com rater_0 \\\n6 NaN epugh@opensourceconnections.com \n19 NaN epugh@opensourceconnections.com \n20 NaN epugh@opensourceconnections.com \n21 NaN epugh@opensourceconnections.com \n24 NaN epugh@opensourceconnections.com \n... ... ... \n1330 NaN epugh@opensourceconnections.com \n1331 NaN epugh@opensourceconnections.com \n1334 NaN epugh@opensourceconnections.com \n1340 NaN epugh@opensourceconnections.com \n1342 NaN epugh@opensourceconnections.com \n\n rater_1 rater_2 \\\n6 aarora@opensourceconnections.com ben.w.trent@gmail.com \n19 aarora@opensourceconnections.com ben.w.trent@gmail.com \n20 aarora@opensourceconnections.com ben.w.trent@gmail.com \n21 aarora@opensourceconnections.com ben.w.trent@gmail.com \n24 aarora@opensourceconnections.com ben.w.trent@gmail.com \n... ... ... \n1330 aarora@opensourceconnections.com ben.w.trent@gmail.com \n1331 aarora@opensourceconnections.com ben.w.trent@gmail.com \n1334 aarora@opensourceconnections.com ben.w.trent@gmail.com \n1340 aarora@opensourceconnections.com ben.w.trent@gmail.com \n1342 aarora@opensourceconnections.com ben.w.trent@gmail.com \n\n nb_distinct_ratings \n6 2 \n19 2 \n20 2 \n21 2 \n24 2 \n... ... \n1330 2 \n1331 2 \n1334 2 \n1340 2 \n1342 2 \n\n[272 rows x 19 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidcharlie@flax.co.ukrating_0eschramma@cas.orgdtaivpp@gmail.comrating_1cmcollier@gmail.comrating_2jeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.comrater_0rater_1rater_2nb_distinct_ratings
6projector screen549808NaN3.0NaNNaN3.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
19laptop77031393NaN3.0NaNNaNNaNNaN3.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
20iphone 879283963NaN0.0NaNNaNNaNNaN0.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
21iphone 879284190NaN0.0NaNNaNNaNNaN0.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
24iphone 877911774NaN0.0NaNNaN0.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
............................................................
1330coffee656359NaN3.0NaNNaN3.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
1331coffee77265396NaN2.0NaNNaN2.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
1334coffee2102472NaN2.0NaNNaNNaNNaN2.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
1340vans77129498NaN0.0NaNNaN0.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
1342vans77388459NaN0.0NaNNaNNaNNaN0.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
\n

272 rows × 19 columns

\n
" }, "metadata": {} } @@ -516,7 +502,7 @@ "metadata": { "trusted": true }, - "execution_count": 21, + "execution_count": 81, "outputs": [ { "ename": "", @@ -524,7 +510,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[21], line 6\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m1\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mmax\u001b[39m(ratings) \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mmin\u001b[39m(ratings) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 5\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbig_discrepancy\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mapply(big_discrepancy, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbig_discrepancy\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[81], line 6\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m1\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mmax\u001b[39m(ratings) \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mmin\u001b[39m(ratings) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 5\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbig_discrepancy\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mapply(big_discrepancy, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbig_discrepancy\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/lib/python3.11/site-packages/pandas/core/generic.py:5773\u001b[0m, in \u001b[0;36mNDFrame.sample\u001b[0;34m(self, n, frac, replace, weights, random_state, axis, ignore_index)\u001b[0m\n\u001b[1;32m 5770\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m weights \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 5771\u001b[0m weights \u001b[38;5;241m=\u001b[39m sample\u001b[38;5;241m.\u001b[39mpreprocess_weights(\u001b[38;5;28mself\u001b[39m, weights, axis)\n\u001b[0;32m-> 5773\u001b[0m sampled_indices \u001b[38;5;241m=\u001b[39m \u001b[43msample\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweights\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5774\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(sampled_indices, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m 5776\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ignore_index:\n", "File \u001b[0;32m/lib/python3.11/site-packages/pandas/core/sample.py:150\u001b[0m, in \u001b[0;36msample\u001b[0;34m(obj_len, size, replace, weights, random_state)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid weights: weights sum to zero\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchoice\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mp\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mastype(\n\u001b[1;32m 151\u001b[0m np\u001b[38;5;241m.\u001b[39mintp, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 152\u001b[0m )\n", "File \u001b[0;32mmtrand.pyx:928\u001b[0m, in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[0;34m()\u001b[0m\n", @@ -547,7 +533,7 @@ "metadata": { "trusted": true }, - "execution_count": 22, + "execution_count": 82, "outputs": [], "id": "6d8a99b9-f823-4829-aed8-9e376a0dfa73" }, @@ -557,15 +543,22 @@ "metadata": { "trusted": true }, - "execution_count": 23, + "execution_count": 83, "outputs": [ { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/png": "\n" - }, - "metadata": {} + "ename": "", + "evalue": "Input y_true contains NaN.", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[83], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m confusion_matrix, ConfusionMatrixDisplay\n\u001b[0;32m----> 2\u001b[0m cm \u001b[38;5;241m=\u001b[39m \u001b[43mconfusion_matrix\u001b[49m\u001b[43m(\u001b[49m\u001b[43my1\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my2\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mrange\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mratings\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m disp \u001b[38;5;241m=\u001b[39m ConfusionMatrixDisplay(confusion_matrix\u001b[38;5;241m=\u001b[39mcm,\n\u001b[1;32m 4\u001b[0m display_labels\u001b[38;5;241m=\u001b[39mratings)\n\u001b[1;32m 5\u001b[0m plt\u001b[38;5;241m.\u001b[39mfigure(figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m8\u001b[39m,\u001b[38;5;241m8\u001b[39m))\n", + "File \u001b[0;32m/lib/python3.11/site-packages/sklearn/metrics/_classification.py:317\u001b[0m, in \u001b[0;36mconfusion_matrix\u001b[0;34m(y_true, y_pred, labels, sample_weight, normalize)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconfusion_matrix\u001b[39m(\n\u001b[1;32m 233\u001b[0m y_true, y_pred, \u001b[38;5;241m*\u001b[39m, labels\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, sample_weight\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, normalize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 234\u001b[0m ):\n\u001b[1;32m 235\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Compute confusion matrix to evaluate the accuracy of a classification.\u001b[39;00m\n\u001b[1;32m 236\u001b[0m \n\u001b[1;32m 237\u001b[0m \u001b[38;5;124;03m By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 315\u001b[0m \u001b[38;5;124;03m (0, 2, 1, 1)\u001b[39;00m\n\u001b[1;32m 316\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 317\u001b[0m y_type, y_true, y_pred \u001b[38;5;241m=\u001b[39m \u001b[43m_check_targets\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_pred\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 318\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y_type \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbinary\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmulticlass\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 319\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m is not supported\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m y_type)\n", + "File \u001b[0;32m/lib/python3.11/site-packages/sklearn/metrics/_classification.py:87\u001b[0m, in \u001b[0;36m_check_targets\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Check that y_true and y_pred belong to the same classification task.\u001b[39;00m\n\u001b[1;32m 61\u001b[0m \n\u001b[1;32m 62\u001b[0m \u001b[38;5;124;03mThis converts multiclass or binary types to a common shape, and raises a\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;124;03my_pred : array or indicator matrix\u001b[39;00m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 86\u001b[0m check_consistent_length(y_true, y_pred)\n\u001b[0;32m---> 87\u001b[0m type_true \u001b[38;5;241m=\u001b[39m \u001b[43mtype_of_target\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43my_true\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 88\u001b[0m type_pred \u001b[38;5;241m=\u001b[39m type_of_target(y_pred, input_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124my_pred\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 90\u001b[0m y_type \u001b[38;5;241m=\u001b[39m {type_true, type_pred}\n", + "File \u001b[0;32m/lib/python3.11/site-packages/sklearn/utils/multiclass.py:381\u001b[0m, in \u001b[0;36mtype_of_target\u001b[0;34m(y, input_name)\u001b[0m\n\u001b[1;32m 379\u001b[0m data \u001b[38;5;241m=\u001b[39m y\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;28;01mif\u001b[39;00m issparse(y) \u001b[38;5;28;01melse\u001b[39;00m y\n\u001b[1;32m 380\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m xp\u001b[38;5;241m.\u001b[39many(data \u001b[38;5;241m!=\u001b[39m data\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mint\u001b[39m)):\n\u001b[0;32m--> 381\u001b[0m \u001b[43m_assert_all_finite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 382\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontinuous\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m suffix\n\u001b[1;32m 384\u001b[0m \u001b[38;5;66;03m# Check multiclass\u001b[39;00m\n", + "File \u001b[0;32m/lib/python3.11/site-packages/sklearn/utils/validation.py:161\u001b[0m, in \u001b[0;36m_assert_all_finite\u001b[0;34m(X, allow_nan, msg_dtype, estimator_name, input_name)\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m estimator_name \u001b[38;5;129;01mand\u001b[39;00m input_name \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m has_nan_error:\n\u001b[1;32m 145\u001b[0m \u001b[38;5;66;03m# Improve the error message on how to handle missing values in\u001b[39;00m\n\u001b[1;32m 146\u001b[0m \u001b[38;5;66;03m# scikit-learn.\u001b[39;00m\n\u001b[1;32m 147\u001b[0m msg_err \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 148\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mestimator_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not accept missing values\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m encoded as NaN natively. For supervised learning, you might want\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m#estimators-that-handle-nan-values\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 160\u001b[0m )\n\u001b[0;32m--> 161\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg_err)\n", + "\u001b[0;31mValueError\u001b[0m: Input y_true contains NaN." + ], + "output_type": "error" } ], "id": "1a4f7484-185c-42a3-8931-32a28a6d6964"