Skip to content
This repository has been archived by the owner on Sep 14, 2021. It is now read-only.

Commit

Permalink
Update to Standard SQL.
Browse files Browse the repository at this point in the history
Change-Id: I5e7aeb7994ff0342bd06f8a5c5de91628e5ff4a3
  • Loading branch information
deflaux committed Nov 8, 2016
1 parent b048931 commit ff4cc44
Show file tree
Hide file tree
Showing 13 changed files with 145 additions and 152 deletions.
5 changes: 4 additions & 1 deletion R/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
1. If needed, install the [bigrquery](https://github.com/hadley/bigrquery#authentication) package.

```
install.packages("bigrquery")
# The currently released version 0.3.0 does not yet have the parameter
# to use Standard SQL instead of Legacy SQL, so we install from github.
library(devtools)
install_github('rstats-db/bigrquery')
```

2. From the R prompt set the working directory and run the script:
Expand Down
6 changes: 3 additions & 3 deletions R/getting-started.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ library(bigrquery)

# Provide a table if you want to query your own data
DisplayAndDispatchQuery <- function(project, queryUri,
table="genomics-public-data:platinum_genomes.variants") {
table="genomics-public-data.platinum_genomes.variants") {
# Read in the SQL from a file or URL.
querySql <- readChar(queryUri, nchars=1e6)
# Find and replace the table name placeholder with the table name.
querySql <- sub("_THE_TABLE_", table, querySql, fixed=TRUE)
querySql <- sub("@THE_TABLE", table, querySql, fixed=TRUE)
# Display the updated SQL.
cat(querySql)
# Dispatch the query to BigQuery for execution.
query_exec(querySql, project)
query_exec(querySql, project, useLegacySql = FALSE)
}

GettingStarted <- function(project) {
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ SELECT
COUNT(reference_name) AS num_records,
COUNT(call.call_set_name) AS num_calls
FROM
[genomics-public-data:platinum_genomes.variants]
`genomics-public-data.platinum_genomes.variants` v, v.call call
GROUP BY
reference_name
ORDER BY
Expand Down
4 changes: 2 additions & 2 deletions RMarkdown/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ line by line in R [literate-programming-demo.R](./literate-programming-demo.R)

[literate-programming-demo.md](./literate-programming-demo.md) was created from [literate-programming-demo.Rmd](./literate-programming-demo.Rmd) via
```
require(knitr)
library(knitr)
knit("./literate-programming-demo.Rmd")
```

[literate-programming-demo.R](./literate-programming-demo.R) was created from [literate-programming-demo.Rmd](./literate-programming-demo.Rmd) via
```
require(knitr)
library(knitr)
purl("./literate-programming-demo.Rmd", documentation=1)
```
Binary file modified RMarkdown/figure/viz-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
25 changes: 7 additions & 18 deletions RMarkdown/literate-programming-demo.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@

## ----one time setup, eval=FALSE------------------------------------------
## ### To install the bigrquery package
## install.packages("bigrquery")

## ### To install the bigrquery package. The currently released version 0.3.0 does not yet
## ### have the parameter to use Standard SQL instead of Legacy SQL, so we install from github.
## library(devtools)
## install_github('rstats-db/bigrquery')

## ----initialize, message=FALSE, warning=FALSE----------------------------
library(bigrquery)
library(ggplot2)
library(xtable)


## ----eval=FALSE----------------------------------------------------------
## ######################[ CHANGE ME ]##################################
## # This codelab assumes that the current working directory is where the Rmd file resides.
Expand All @@ -19,58 +18,48 @@ library(xtable)
## project <- "YOUR-PROJECT-ID"
## #####################################################################


## ------------------------------------------------------------------------
# By default this codelab runs upon the Illumina Platinum Genomes Variants.
# Change the table here if you wish to run these queries against a different table.
theTable <- "genomics-public-data:platinum_genomes.variants"

theTable <- "genomics-public-data.platinum_genomes.variants"

## ------------------------------------------------------------------------
DisplayAndDispatchQuery <- function(queryUri) {
# Read in the SQL from a file or URL.
querySql <- readChar(queryUri, nchars=1e6)
# Find and replace the table name placeholder with our table name.
querySql <- sub("_THE_TABLE_", theTable, querySql, fixed=TRUE)
querySql <- sub("@THE_TABLE", theTable, querySql, fixed=TRUE)
# Display the updated SQL.
cat(querySql)
# Dispatch the query to BigQuery for execution.
query_exec(querySql, project)
query_exec(querySql, project, useLegacySql = FALSE)
}


## ----comment=NA----------------------------------------------------------
result <- DisplayAndDispatchQuery("../sql/sample-variant-counts-for-brca1.sql")


## ----result, comment=NA--------------------------------------------------
head(result)
summary(result)
str(result)


## ----viz, fig.align="center", fig.width=10-------------------------------
ggplot(result, aes(x=call_set_name, y=variant_count)) +
geom_bar(stat="identity") + coord_flip() +
ggtitle("Count of Variants Per Sample")


## ----comment=NA----------------------------------------------------------
result <- DisplayAndDispatchQuery("../sql/variant-level-data-for-brca1.sql")


## ----echo=FALSE, message=FALSE, warning=FALSE, comment=NA, results="asis"----
print(xtable(head(result)), type="html", include.rownames=F)


## ----comment=NA----------------------------------------------------------
result <- DisplayAndDispatchQuery("../sql/sample-level-data-for-brca1.sql")


## ----echo=FALSE, message=FALSE, warning=FALSE, comment=NA, results="asis"----
print(xtable(head(result)), type="html", include.rownames=F)


## ----provenance, comment=NA----------------------------------------------
sessionInfo()

12 changes: 7 additions & 5 deletions RMarkdown/literate-programming-demo.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@ Now let's proceed with a specific example of [literate programming](http://en.wi
If you have not used the [bigrquery](https://github.com/hadley/bigrquery) package previously, you will likely need to do something like the following to get it installed:

```{r one time setup, eval=FALSE}
### To install the bigrquery package
install.packages("bigrquery")
### To install the bigrquery package. The currently released version 0.3.0 does not yet
### have the parameter to use Standard SQL instead of Legacy SQL, so we install from github.
library(devtools)
install_github('rstats-db/bigrquery')
```

Next we will load our needed packages into our session:
Expand All @@ -54,7 +56,7 @@ project <- "YOUR-PROJECT-ID"
```{r}
# By default this codelab runs upon the Illumina Platinum Genomes Variants.
# Change the table here if you wish to run these queries against a different table.
theTable <- "genomics-public-data:platinum_genomes.variants"
theTable <- "genomics-public-data.platinum_genomes.variants"
```


Expand All @@ -64,11 +66,11 @@ DisplayAndDispatchQuery <- function(queryUri) {
# Read in the SQL from a file or URL.
querySql <- readChar(queryUri, nchars=1e6)
# Find and replace the table name placeholder with our table name.
querySql <- sub("_THE_TABLE_", theTable, querySql, fixed=TRUE)
querySql <- sub("@THE_TABLE", theTable, querySql, fixed=TRUE)
# Display the updated SQL.
cat(querySql)
# Dispatch the query to BigQuery for execution.
query_exec(querySql, project)
query_exec(querySql, project, useLegacySql = FALSE)
}
```

Expand Down
Loading

0 comments on commit ff4cc44

Please sign in to comment.