Skip to content

Commit

Permalink
add nutrition studies repo
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewflowers committed Jan 5, 2016
1 parent fb54c1e commit 26bbe40
Show file tree
Hide file tree
Showing 18 changed files with 27,935 additions and 0 deletions.
Binary file added nutrition-studies/.RData
Binary file not shown.
9 changes: 9 additions & 0 deletions nutrition-studies/.Rproj.user/1ED50CBF/pcs/files-pane.pper
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"path" : "~/editing/dieting-project",
"sortOrder" : [
{
"ascending" : true,
"columnIndex" : 2
}
]
}
3 changes: 3 additions & 0 deletions nutrition-studies/.Rproj.user/1ED50CBF/pcs/source-pane.pper
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"activeTab" : 0
}
14 changes: 14 additions & 0 deletions nutrition-studies/.Rproj.user/1ED50CBF/pcs/windowlayoutstate.pper
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"left" : {
"panelheight" : 674,
"splitterpos" : 283,
"topwindowstate" : "NORMAL",
"windowheight" : 713
},
"right" : {
"panelheight" : 674,
"splitterpos" : 421,
"topwindowstate" : "NORMAL",
"windowheight" : 713
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"TabSet1" : 0,
"TabSet2" : 3
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"active_set":"","sets":[]}
3 changes: 3 additions & 0 deletions nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/194B460A
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"tempName" : "Untitled1"
}
3 changes: 3 additions & 0 deletions nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/27AAD271
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"tempName" : "Untitled1"
}
3 changes: 3 additions & 0 deletions nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/DDD9C5D8
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"tempName" : "Untitled1"
}
3 changes: 3 additions & 0 deletions nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/E17005B0
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"tempName" : "Untitled1"
}
4 changes: 4 additions & 0 deletions nutrition-studies/.Rproj.user/1ED50CBF/sdb/prop/INDEX
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
~%2Fediting%2Fdieting-project%2Fdiet_p_hacking.R="194B460A"
~%2Fediting%2Fdieting-project%2Fscratch_work.R="27AAD271"
~%2Fprivate-data%2Fdieting-project%2Fdiet_p_hacking.R="DDD9C5D8"
~%2Fprivate-data%2Fdieting-project%2Fdiet_p_hacking_final.R="E17005B0"
25 changes: 25 additions & 0 deletions nutrition-studies/.Rproj.user/1ED50CBF/sdb/s-8D6D153C/3EF212EB
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"contents" : "",
"created" : 1452009684761.000,
"dirty" : false,
"encoding" : "",
"folds" : "",
"hash" : "0",
"id" : "3EF212EB",
"lastKnownWriteTime" : 140735085965160,
"path" : null,
"project_path" : null,
"properties" : {
"cacheKey" : "l91i3fvvp3",
"caption" : "regAnalysis",
"contentUrl" : "grid_resource/gridviewer.html?env=&obj=regAnalysis&cache_key=l91i3fvvp3",
"displayedObservations" : "27716",
"environment" : "",
"object" : "regAnalysis",
"totalObservations" : "27716",
"variables" : "3"
},
"relative_order" : 2,
"source_on_save" : false,
"type" : "r_dataframe"
}
18 changes: 18 additions & 0 deletions nutrition-studies/.Rproj.user/1ED50CBF/sdb/s-8D6D153C/BC3CEA54
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"contents" : "# Diet P-hacking\n# Andrew Flowers <[email protected]>\n\nsetwd(\"~/private-data//dieting-project/\")\n\nrequire(readr)\nrequire(plyr)\nrequire(dplyr)\nrequire(tidyr)\n\nrawData <- read.csv(\"raw_anonymized_data.csv\")\n\n# Fix innie/out characteristics\n\nrawData$belly <- revalue(rawData$belly, c(\"Innie\"=\"Yes\", \"Outie\"=\"No\"))\n\n# FFQ variable names (should total 1066)\n\nffq <- names(rawData)[28:1093]\n\n# Characteristic variable names (should total 26)\n\ncharacteristics <- names(rawData)[2:27]\n\n# Linear regressions with respondent characteristic predicting food frequency\n\nregValues <- data.frame(food=ffq)\n\nfor (c in characteristics) regValues[,c] <- NA # Add characteristics as blank columns to regValues data frame\n\nfor (f in ffq){\n for (c in characteristics){\n \n frm <- formula(paste0(f, \"~\", c))\n reg <- summary(lm(data=rawData, formula=frm))\n regValues[which(regValues$food==f), c] <- reg$coefficients[8]\n }\n}\n\n# Extract p-values\n\nregAnalysis <- regValues %>%\n gather(\"characteristic\", \"p_values\", 2:27) %>% \n arrange(p_values)\n\n# Write out p-values\n\nwrite_csv(regAnalysis, \"p_values_analysis.csv\")\n\n# Note: This is an intentionally shady regression analysis. Both because of the \"p-hacking\" or \n# \"data mining\" behind running over 27,000 regresison, but also in that only the statistics reported \n# were the p-values of the characteristics (the independent variables).\n\n# IN OTHER WORDS: DO NOT TRY THIS AT HOME (AKA, THIS IS NOT AN EXAMPLE OF SOUND DATA ANALYSIS)\n",
"created" : 1450735440535.000,
"dirty" : false,
"encoding" : "UTF-8",
"folds" : "",
"hash" : "2220922350",
"id" : "BC3CEA54",
"lastKnownWriteTime" : 1452010952,
"path" : "~/private-data/dieting-project/diet_p_hacking_final.R",
"project_path" : "diet_p_hacking_final.R",
"properties" : {
"tempName" : "Untitled1"
},
"relative_order" : 1,
"source_on_save" : false,
"type" : "r_source"
}
Empty file.
19 changes: 19 additions & 0 deletions nutrition-studies/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
### Nutrition Studies

This directory contains data and code behind the story [You Can’t Trust What You Read About Nutrition](http://fivethirtyeight.com/features/you-cant-trust-what-you-read-about-nutrition).

Many studies of diet and nutrition include multiple variables with vast amounts of data, making it easy to p-hack your way to sexy (and false) results. We learned this firsthand when we invited readers to take a survey about their eating habits known as the food frequency questionnaire and answer a few other questions about themselves. We ended up with 54 complete responses and looked for associations much as researchers look for links between foods and dreaded diseases. It was easy to find them.

*Warning*: This is evil (statistical) work. Do not go to the dark side. Do not try this at home.

This directory contains three files:

File | Description
--- | -----
`raw_anonymized_data.csv` | The FFQ and survey data from 54 respondents
`p_hacking_final.R` | An R script that performs 27,716 regressions
`p_values_analysis.csv` | The output data file listing the p-values

**Note:** This is an intentionally shady regression analysis, both because of the "p-hacking" or "data mining" behind running more than 27,000 regressions and because the statistics reported were the p-values of the characteristics (the independent variables).

**IN OTHER WORDS: THIS IS NOT AN EXAMPLE OF SOUND DATA ANALYSIS.**
54 changes: 54 additions & 0 deletions nutrition-studies/p_hacking.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Diet P-hacking
# Andrew Flowers <[email protected]>

setwd("~/private-data//dieting-project/")

require(readr)
require(plyr)
require(dplyr)
require(tidyr)

rawData <- read.csv("raw_anonymized_data.csv")

# Fix innie/out characteristics

rawData$belly <- revalue(rawData$belly, c("Innie"="Yes", "Outie"="No"))

# FFQ variable names (should total 1066)

ffq <- names(rawData)[28:1093]

# Characteristic variable names (should total 26)

characteristics <- names(rawData)[2:27]

# Linear regressions with respondent characteristic predicting food frequency

regValues <- data.frame(food=ffq)

for (c in characteristics) regValues[,c] <- NA # Add characteristics as blank columns to regValues data frame

for (f in ffq){
for (c in characteristics){

frm <- formula(paste0(f, "~", c))
reg <- summary(lm(data=rawData, formula=frm))
regValues[which(regValues$food==f), c] <- reg$coefficients[8]
}
}

# Extract p-values

regAnalysis <- regValues %>%
gather("characteristic", "p_values", 2:27) %>%
arrange(p_values)

# Write out p-values

write_csv(regAnalysis, "p_values_analysis.csv")

# Note: This is an intentionally shady regression analysis. Both because of the "p-hacking" or
# "data mining" behind running over 27,000 regresison, but also in that only the statistics reported
# were the p-values of the characteristics (the independent variables).

# IN OTHER WORDS: DO NOT TRY THIS AT HOME (AKA, THIS IS NOT AN EXAMPLE OF SOUND DATA ANALYSIS)
Loading

0 comments on commit 26bbe40

Please sign in to comment.