Skip to content

Commit

Permalink
update drug response notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
colleenXu committed Mar 23, 2022
1 parent 2c1a970 commit 860c20c
Showing 1 changed file with 98 additions and 30 deletions.
128 changes: 98 additions & 30 deletions ExploringDrugResponseData.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2022-03-23T06:07:20.597680Z",
"start_time": "2022-03-23T06:07:20.311775Z"
"end_time": "2022-03-23T06:23:06.680479Z",
"start_time": "2022-03-23T06:23:06.320579Z"
}
},
"outputs": [],
Expand All @@ -40,11 +40,11 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2022-03-23T06:07:44.110657Z",
"start_time": "2022-03-23T06:07:21.604415Z"
"end_time": "2022-03-23T06:23:30.352564Z",
"start_time": "2022-03-23T06:23:07.123019Z"
}
},
"outputs": [
Expand All @@ -63,7 +63,7 @@
" dtype='object')"
]
},
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
},
Expand All @@ -73,7 +73,7 @@
"(5667705, 22)"
]
},
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -88,11 +88,11 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2022-03-23T06:08:28.760954Z",
"start_time": "2022-03-23T06:08:20.850820Z"
"end_time": "2022-03-23T06:23:44.392619Z",
"start_time": "2022-03-23T06:23:36.346842Z"
}
},
"outputs": [
Expand All @@ -102,29 +102,30 @@
"(5667705, 22)"
]
},
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"## are there lines where everything is duplicated? no\n",
"## are there lines where every value is duplicated? no\n",
"\n",
"drug_response.drop_duplicates().shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2022-03-23T06:09:01.990867Z",
"start_time": "2022-03-23T06:08:59.426279Z"
"end_time": "2022-03-23T06:23:48.558269Z",
"start_time": "2022-03-23T06:23:45.982333Z"
}
},
"outputs": [],
"source": [
"## there are lines where everything we focus on is duplicated\n",
"## there are lines where everything we focus on is duplicated: subject, object, predicate, disease context\n",
"## yes\n",
"duplicates = drug_response[drug_response.duplicated(subset=[\"Subject_Ensembl_gene_ID\",\n",
" \"Subject_NCBI_Gene_ID\",\n",
" \"Object_id\",\n",
Expand All @@ -134,11 +135,36 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2022-03-23T06:23:54.880837Z",
"start_time": "2022-03-23T06:23:54.876704Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(10768, 22)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duplicates.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2022-03-23T06:09:04.608770Z",
"start_time": "2022-03-23T06:09:04.591452Z"
"end_time": "2022-03-23T06:23:56.694550Z",
"start_time": "2022-03-23T06:23:56.675307Z"
}
},
"outputs": [],
Expand All @@ -153,11 +179,11 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2022-03-23T06:09:05.429827Z",
"start_time": "2022-03-23T06:09:05.410093Z"
"end_time": "2022-03-23T06:23:57.499037Z",
"start_time": "2022-03-23T06:23:57.479394Z"
}
},
"outputs": [
Expand Down Expand Up @@ -398,7 +424,7 @@
"[5 rows x 22 columns]"
]
},
"execution_count": 6,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -415,27 +441,69 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2022-03-23T06:26:13.234740Z",
"start_time": "2022-03-23T06:26:13.214329Z"
}
},
"outputs": [],
"source": [
"## sets of duplicates: 5384 \n",
"\n",
"sets_of_dups = duplicates.value_counts(subset = [\"Subject_Ensembl_gene_ID\",\"Subject_NCBI_Gene_ID\",\n",
" \"Object_id\",\"Predicate\", \"Edge_attribute_MONDO_ID\"])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2022-03-23T06:09:37.986230Z",
"start_time": "2022-03-23T06:09:37.982258Z"
"end_time": "2022-03-23T06:26:16.342056Z",
"start_time": "2022-03-23T06:26:16.338543Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(10768, 22)"
"(5384,)"
]
},
"execution_count": 8,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duplicates.shape"
"sets_of_dups.shape"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2022-03-23T06:26:39.157796Z",
"start_time": "2022-03-23T06:26:39.154206Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"10768"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"5384 *2 ## so these are likely 5384 pairs (2 duplicates for each set)"
]
},
{
Expand Down

0 comments on commit 860c20c

Please sign in to comment.