forked from fivethirtyeight/data
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fb54c1e
commit 26bbe40
Showing
18 changed files
with
27,935 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"path" : "~/editing/dieting-project", | ||
"sortOrder" : [ | ||
{ | ||
"ascending" : true, | ||
"columnIndex" : 2 | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"activeTab" : 0 | ||
} |
14 changes: 14 additions & 0 deletions
14
nutrition-studies/.Rproj.user/1ED50CBF/pcs/windowlayoutstate.pper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"left" : { | ||
"panelheight" : 674, | ||
"splitterpos" : 283, | ||
"topwindowstate" : "NORMAL", | ||
"windowheight" : 713 | ||
}, | ||
"right" : { | ||
"panelheight" : 674, | ||
"splitterpos" : 421, | ||
"topwindowstate" : "NORMAL", | ||
"windowheight" : 713 | ||
} | ||
} |
4 changes: 4 additions & 0 deletions
4
nutrition-studies/.Rproj.user/1ED50CBF/pcs/workbench-pane.pper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
{ | ||
"TabSet1" : 0, | ||
"TabSet2" : 3 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"active_set":"","sets":[]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"tempName" : "Untitled1" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"tempName" : "Untitled1" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"tempName" : "Untitled1" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"tempName" : "Untitled1" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
~%2Fediting%2Fdieting-project%2Fdiet_p_hacking.R="194B460A" | ||
~%2Fediting%2Fdieting-project%2Fscratch_work.R="27AAD271" | ||
~%2Fprivate-data%2Fdieting-project%2Fdiet_p_hacking.R="DDD9C5D8" | ||
~%2Fprivate-data%2Fdieting-project%2Fdiet_p_hacking_final.R="E17005B0" |
25 changes: 25 additions & 0 deletions
25
nutrition-studies/.Rproj.user/1ED50CBF/sdb/s-8D6D153C/3EF212EB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"contents" : "", | ||
"created" : 1452009684761.000, | ||
"dirty" : false, | ||
"encoding" : "", | ||
"folds" : "", | ||
"hash" : "0", | ||
"id" : "3EF212EB", | ||
"lastKnownWriteTime" : 140735085965160, | ||
"path" : null, | ||
"project_path" : null, | ||
"properties" : { | ||
"cacheKey" : "l91i3fvvp3", | ||
"caption" : "regAnalysis", | ||
"contentUrl" : "grid_resource/gridviewer.html?env=&obj=regAnalysis&cache_key=l91i3fvvp3", | ||
"displayedObservations" : "27716", | ||
"environment" : "", | ||
"object" : "regAnalysis", | ||
"totalObservations" : "27716", | ||
"variables" : "3" | ||
}, | ||
"relative_order" : 2, | ||
"source_on_save" : false, | ||
"type" : "r_dataframe" | ||
} |
18 changes: 18 additions & 0 deletions
18
nutrition-studies/.Rproj.user/1ED50CBF/sdb/s-8D6D153C/BC3CEA54
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"contents" : "# Diet P-hacking\n# Andrew Flowers <[email protected]>\n\nsetwd(\"~/private-data//dieting-project/\")\n\nrequire(readr)\nrequire(plyr)\nrequire(dplyr)\nrequire(tidyr)\n\nrawData <- read.csv(\"raw_anonymized_data.csv\")\n\n# Fix innie/out characteristics\n\nrawData$belly <- revalue(rawData$belly, c(\"Innie\"=\"Yes\", \"Outie\"=\"No\"))\n\n# FFQ variable names (should total 1066)\n\nffq <- names(rawData)[28:1093]\n\n# Characteristic variable names (should total 26)\n\ncharacteristics <- names(rawData)[2:27]\n\n# Linear regressions with respondent characteristic predicting food frequency\n\nregValues <- data.frame(food=ffq)\n\nfor (c in characteristics) regValues[,c] <- NA # Add characteristics as blank columns to regValues data frame\n\nfor (f in ffq){\n for (c in characteristics){\n \n frm <- formula(paste0(f, \"~\", c))\n reg <- summary(lm(data=rawData, formula=frm))\n regValues[which(regValues$food==f), c] <- reg$coefficients[8]\n }\n}\n\n# Extract p-values\n\nregAnalysis <- regValues %>%\n gather(\"characteristic\", \"p_values\", 2:27) %>% \n arrange(p_values)\n\n# Write out p-values\n\nwrite_csv(regAnalysis, \"p_values_analysis.csv\")\n\n# Note: This is an intentionally shady regression analysis. Both because of the \"p-hacking\" or \n# \"data mining\" behind running over 27,000 regresison, but also in that only the statistics reported \n# were the p-values of the characteristics (the independent variables).\n\n# IN OTHER WORDS: DO NOT TRY THIS AT HOME (AKA, THIS IS NOT AN EXAMPLE OF SOUND DATA ANALYSIS)\n", | ||
"created" : 1450735440535.000, | ||
"dirty" : false, | ||
"encoding" : "UTF-8", | ||
"folds" : "", | ||
"hash" : "2220922350", | ||
"id" : "BC3CEA54", | ||
"lastKnownWriteTime" : 1452010952, | ||
"path" : "~/private-data/dieting-project/diet_p_hacking_final.R", | ||
"project_path" : "diet_p_hacking_final.R", | ||
"properties" : { | ||
"tempName" : "Untitled1" | ||
}, | ||
"relative_order" : 1, | ||
"source_on_save" : false, | ||
"type" : "r_source" | ||
} |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
### Nutrition Studies | ||
|
||
This directory contains data and code behind the story [You Can’t Trust What You Read About Nutrition](http://fivethirtyeight.com/features/you-cant-trust-what-you-read-about-nutrition). | ||
|
||
Many studies of diet and nutrition include multiple variables with vast amounts of data, making it easy to p-hack your way to sexy (and false) results. We learned this firsthand when we invited readers to take a survey about their eating habits known as the food frequency questionnaire and answer a few other questions about themselves. We ended up with 54 complete responses and looked for associations much as researchers look for links between foods and dreaded diseases. It was easy to find them. | ||
|
||
*Warning*: This is evil (statistical) work. Do not go to the dark side. Do not try this at home. | ||
|
||
This directory contains three files: | ||
|
||
File | Description | ||
--- | ----- | ||
`raw_anonymized_data.csv` | The FFQ and survey data from 54 respondents | ||
`p_hacking_final.R` | An R script that performs 27,716 regressions | ||
`p_values_analysis.csv` | The output data file listing the p-values | ||
|
||
**Note:** This is an intentionally shady regression analysis, both because of the "p-hacking" or "data mining" behind running more than 27,000 regressions and because the statistics reported were the p-values of the characteristics (the independent variables). | ||
|
||
**IN OTHER WORDS: THIS IS NOT AN EXAMPLE OF SOUND DATA ANALYSIS.** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# Diet P-hacking | ||
# Andrew Flowers <[email protected]> | ||
|
||
setwd("~/private-data//dieting-project/") | ||
|
||
require(readr) | ||
require(plyr) | ||
require(dplyr) | ||
require(tidyr) | ||
|
||
rawData <- read.csv("raw_anonymized_data.csv") | ||
|
||
# Fix innie/out characteristics | ||
|
||
rawData$belly <- revalue(rawData$belly, c("Innie"="Yes", "Outie"="No")) | ||
|
||
# FFQ variable names (should total 1066) | ||
|
||
ffq <- names(rawData)[28:1093] | ||
|
||
# Characteristic variable names (should total 26) | ||
|
||
characteristics <- names(rawData)[2:27] | ||
|
||
# Linear regressions with respondent characteristic predicting food frequency | ||
|
||
regValues <- data.frame(food=ffq) | ||
|
||
for (c in characteristics) regValues[,c] <- NA # Add characteristics as blank columns to regValues data frame | ||
|
||
for (f in ffq){ | ||
for (c in characteristics){ | ||
|
||
frm <- formula(paste0(f, "~", c)) | ||
reg <- summary(lm(data=rawData, formula=frm)) | ||
regValues[which(regValues$food==f), c] <- reg$coefficients[8] | ||
} | ||
} | ||
|
||
# Extract p-values | ||
|
||
regAnalysis <- regValues %>% | ||
gather("characteristic", "p_values", 2:27) %>% | ||
arrange(p_values) | ||
|
||
# Write out p-values | ||
|
||
write_csv(regAnalysis, "p_values_analysis.csv") | ||
|
||
# Note: This is an intentionally shady regression analysis. Both because of the "p-hacking" or | ||
# "data mining" behind running over 27,000 regresison, but also in that only the statistics reported | ||
# were the p-values of the characteristics (the independent variables). | ||
|
||
# IN OTHER WORDS: DO NOT TRY THIS AT HOME (AKA, THIS IS NOT AN EXAMPLE OF SOUND DATA ANALYSIS) |
Oops, something went wrong.