-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update the data extraction to process the csv book variant.
- Loading branch information
David Fisher
committed
Jan 17, 2025
1 parent
84d89eb
commit d650256
Showing
1 changed file
with
161 additions
and
216 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,217 +1,162 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "bd7e4efa-eb00-451e-984d-ed6646d8e25f", | ||
"metadata": {}, | ||
"source": [ | ||
"# Fleiss' Kappa \n", | ||
"To understand how much your judges agree with each other. It is meant to be used with more than two judges.\n", | ||
"\n", | ||
"Read https://www.datanovia.com/en/blog/kappa-coefficient-interpretation/ to learn more.\n", | ||
"\n", | ||
"Please copy this example and customize it for your own purposes!" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "e3412382", | ||
"metadata": {}, | ||
"source": [ | ||
"## Imports" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "4972936a", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"from js import fetch\n", | ||
"import json\n", | ||
"\n", | ||
"from collections import defaultdict\n", | ||
"from statsmodels.stats.inter_rater import aggregate_raters\n", | ||
"from statsmodels.stats.inter_rater import fleiss_kappa\n", | ||
"from IPython.display import display, Markdown" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "6da26c5e", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 0: Configuration" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "71803a49-4065-4adf-a69e-cb0fe2d00f22", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"QUEPID_BOOK_NUM = 25" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "420416df-9e6a-41b4-987b-7a03c9dd38b3", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 1: Download the Quepid Book" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "31193536-98eb-4b46-ab98-af04ee07c6d3", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Generic GET call to a JSON endpoint \n", | ||
"async def get_json(url):\n", | ||
" resp = await fetch(url)\n", | ||
" resp_text = await resp.text()\n", | ||
" return json.loads(resp_text)\n", | ||
"\n", | ||
"async def get_text(url):\n", | ||
" resp = await fetch(url)\n", | ||
" resp_text = await resp.text()\n", | ||
" return resp_text\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "8fef6231-daa8-467f-ac57-13a144e8a356", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"data = await get_text(f'/api/books/{QUEPID_BOOK_NUM}.csv')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "79d985ad-cd11-44a9-a7e1-0851bc99aef3", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 2: Extract and Prepare Data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "9a8561fd-2dbf-477e-9ac1-4df6d5ebdc91", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Initialize a list to hold the tuples of (doc_id, rating, count)\n", | ||
"ratings_data = []\n", | ||
"\n", | ||
"# Iterate through each query-doc pair\n", | ||
"for pair in data['query_doc_pairs']:\n", | ||
" # Initialize a dictionary to count the ratings for this pair\n", | ||
" ratings_count = defaultdict(int)\n", | ||
" \n", | ||
" # Extract judgements and count the ratings\n", | ||
" for judgement in pair['judgements']:\n", | ||
" rating = judgement['rating']\n", | ||
" ratings_count[rating] += 1\n", | ||
"\n", | ||
" # Append the counts to the ratings_data list\n", | ||
" for rating, count in ratings_count.items():\n", | ||
" ratings_data.append((pair['doc_id'], rating, count))\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "caf5632b-132a-4e1b-80fe-c8c5ab7f2f3a", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 3: Aggregate Raters' Data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "a7598308-129b-4628-ad3a-fc3d703f8205", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Convert ratings_data to a DataFrame\n", | ||
"df = pd.DataFrame(ratings_data, columns=['doc_id', 'rating', 'count'])\n", | ||
"\n", | ||
"# Use crosstab to create a contingency table\n", | ||
"data_crosstab = pd.crosstab(index=df['doc_id'], columns=df['rating'], values=df['count'], aggfunc='sum')\n", | ||
"\n", | ||
"# Drop any rows missing judgements\n", | ||
"data_crosstab = data_crosstab.dropna(how='any')\n", | ||
"\n", | ||
"# Convert the DataFrame to the format expected by aggregate_raters\n", | ||
"data_for_aggregation = data_crosstab.values\n", | ||
"\n", | ||
"# Aggregate the raters' data\n", | ||
"table, _ = aggregate_raters(data_for_aggregation)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "25c79fbc", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 4: Compute Fleiss' Kappa" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "25a613f9", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"kappa = fleiss_kappa(table, method='fleiss')\n", | ||
"display(Markdown(f\"## Fleiss' Kappa: {kappa:.4f}\"))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "5704579e-2321-4629-8de0-6608b428e2b6", | ||
"metadata": {}, | ||
"source": [ | ||
"_This notebook was last updated 16_January_2025_" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "7203f6cc-c068-4f75-a59a-1f49c5555319", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} | ||
"metadata": { | ||
"kernelspec": { | ||
"name": "python", | ||
"display_name": "Python (Pyodide)", | ||
"language": "python" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "python", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8" | ||
} | ||
}, | ||
"nbformat_minor": 5, | ||
"nbformat": 4, | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"source": "# Fleiss' Kappa \nTo understand how much your judges agree with each other. It is meant to be used with more than two judges.\n\nRead https://www.datanovia.com/en/blog/kappa-coefficient-interpretation/ to learn more.\n\nPlease copy this example and customize it for your own purposes!", | ||
"metadata": {}, | ||
"id": "bd7e4efa-eb00-451e-984d-ed6646d8e25f" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Imports", | ||
"metadata": {}, | ||
"id": "e3412382" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "import pandas as pd\nfrom js import fetch\nimport json\n\nfrom collections import defaultdict\nfrom statsmodels.stats.inter_rater import aggregate_raters\nfrom statsmodels.stats.inter_rater import fleiss_kappa\nfrom IPython.display import display, Markdown", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 1, | ||
"outputs": [], | ||
"id": "4972936a" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 0: Configuration", | ||
"metadata": {}, | ||
"id": "6da26c5e" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "QUEPID_BOOK_NUM = 25", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 2, | ||
"outputs": [], | ||
"id": "71803a49-4065-4adf-a69e-cb0fe2d00f22" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 1: Download the Quepid Book", | ||
"metadata": {}, | ||
"id": "420416df-9e6a-41b4-987b-7a03c9dd38b3" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "async def get_text(url):\n resp = await fetch(url)\n resp_text = await resp.text()\n return resp_text", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 3, | ||
"outputs": [], | ||
"id": "31193536-98eb-4b46-ab98-af04ee07c6d3" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "data = await get_text(f'/api/books/{QUEPID_BOOK_NUM}.csv')", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 4, | ||
"outputs": [], | ||
"id": "8fef6231-daa8-467f-ac57-13a144e8a356" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 2: Extract and Prepare Data", | ||
"metadata": {}, | ||
"id": "79d985ad-cd11-44a9-a7e1-0851bc99aef3" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "from io import StringIO\ndf = pd.read_csv(StringIO(data))\ndf", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 5, | ||
"outputs": [ | ||
{ | ||
"execution_count": 5, | ||
"output_type": "execute_result", | ||
"data": { | ||
"text/plain": " query docid David Tippett Eric Pugh Atita Arora \\\n0 projector screen 325961 3.0 3.0 NaN \n1 projector screen 47471 3.0 3.0 NaN \n2 projector screen 126679 3.0 3.0 NaN \n3 projector screen 254441 NaN 3.0 NaN \n4 projector screen 325958 NaN 3.0 NaN \n... ... ... ... ... ... \n2415 power supply 1667352 NaN 0.0 NaN \n2416 power supply 1667804 NaN 0.0 NaN \n2417 power supply 1667752 NaN 0.0 NaN \n2418 power supply 1667821 NaN 0.0 NaN \n2419 power supply 1667357 NaN 0.0 NaN \n\n Cody Collier Benjamin Trent Jeff Alexander Chris Marino \\\n0 NaN NaN NaN NaN \n1 NaN NaN NaN NaN \n2 NaN NaN NaN NaN \n3 NaN NaN NaN NaN \n4 NaN NaN NaN NaN \n... ... ... ... ... \n2415 NaN NaN NaN NaN \n2416 NaN NaN NaN NaN \n2417 NaN NaN NaN NaN \n2418 NaN NaN NaN NaN \n2419 NaN NaN NaN NaN \n\n [email protected] Michael Froh [email protected] \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n Maximilian Werk David Fisher Ryan Finley Erica Schramma Peter Fries \n0 NaN NaN NaN NaN NaN \n1 NaN NaN NaN NaN NaN \n2 NaN NaN NaN NaN NaN \n3 NaN NaN NaN NaN NaN \n4 NaN NaN NaN NaN NaN \n... ... ... ... ... ... \n2415 NaN NaN NaN NaN NaN \n2416 NaN NaN NaN NaN NaN \n2417 NaN NaN NaN NaN NaN \n2418 NaN NaN NaN NaN NaN \n2419 NaN NaN NaN NaN NaN \n\n[2420 rows x 17 columns]", | ||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>query</th>\n <th>docid</th>\n <th>David Tippett</th>\n <th>Eric Pugh</th>\n <th>Atita Arora</th>\n <th>Cody Collier</th>\n <th>Benjamin Trent</th>\n <th>Jeff Alexander</th>\n <th>Chris Marino</th>\n <th>[email protected]</th>\n <th>Michael Froh</th>\n <th>[email protected]</th>\n <th>Maximilian Werk</th>\n <th>David Fisher</th>\n <th>Ryan Finley</th>\n <th>Erica Schramma</th>\n <th>Peter Fries</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>projector screen</td>\n <td>325961</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>projector screen</td>\n <td>47471</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>projector screen</td>\n <td>126679</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3</th>\n <td>projector screen</td>\n <td>254441</td>\n <td>NaN</td>\n <td>3.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>4</th>\n <td>projector screen</td>\n <td>325958</td>\n <td>NaN</td>\n <td>3.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>2415</th>\n <td>power supply</td>\n <td>1667352</td>\n <td>NaN</td>\n <td>0.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2416</th>\n <td>power supply</td>\n <td>1667804</td>\n <td>NaN</td>\n <td>0.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2417</th>\n <td>power supply</td>\n <td>1667752</td>\n <td>NaN</td>\n <td>0.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2418</th>\n <td>power supply</td>\n <td>1667821</td>\n <td>NaN</td>\n <td>0.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2419</th>\n <td>power supply</td>\n <td>1667357</td>\n <td>NaN</td>\n <td>0.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n<p>2420 rows × 17 columns</p>\n</div>" | ||
}, | ||
"metadata": {} | ||
} | ||
], | ||
"id": "3ab80ff9-a9cb-4007-80e6-3d79a8bf762b" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 3: Aggregate Raters' Data", | ||
"metadata": {}, | ||
"id": "caf5632b-132a-4e1b-80fe-c8c5ab7f2f3a" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "# Count the ratings values\nraters = list(df.columns[2:])\ndf['judgments'] = df[raters].values.tolist()\ndf['judgments'] = df['judgments'].apply(lambda x: pd.Series(x).dropna().tolist())\nrated = df[['query', 'docid', 'judgments']].explode('judgments')\nrated['count'] = rated.groupby(['query', 'docid'])['judgments'].transform('count')\n\n# Use crosstab to create a contingency table\ndata_crosstab = pd.crosstab(index=rated['docid'], columns=rated['judgments'], values=rated['count'], aggfunc='sum')\n\n# Drop any rows missing judgements\ndata_crosstab = data_crosstab.dropna(how='any')\n\n# Convert the DataFrame to the format expected by aggregate_raters\ndata_for_aggregation = data_crosstab.values\n\n# Aggregate the raters' data\ntable, _ = aggregate_raters(data_for_aggregation)", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 6, | ||
"outputs": [], | ||
"id": "b170c198-ef0f-4974-bb4e-4b4311965c1e" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 4: Compute Fleiss' Kappa", | ||
"metadata": {}, | ||
"id": "25c79fbc" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "kappa = fleiss_kappa(table, method='fleiss')\ndisplay(Markdown(f\"## Fleiss' Kappa: {kappa:.4f}\"))", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 7, | ||
"outputs": [ | ||
{ | ||
"output_type": "display_data", | ||
"data": { | ||
"text/plain": "<IPython.core.display.Markdown object>", | ||
"text/markdown": "## Fleiss' Kappa: -0.2632" | ||
}, | ||
"metadata": {} | ||
} | ||
], | ||
"id": "25a613f9" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "_This notebook was last updated 17-JAN-2025_", | ||
"metadata": {}, | ||
"id": "5704579e-2321-4629-8de0-6608b428e2b6" | ||
} | ||
] | ||
} |