diff --git a/0_generate_QC_reports_DRIVER.pl b/0_generate_QC_reports_DRIVER.pl new file mode 100755 index 0000000..ef70fbc --- /dev/null +++ b/0_generate_QC_reports_DRIVER.pl @@ -0,0 +1,24 @@ +#!/usr/bin/perl -w + +use strict; +use autodie qw(:all); + +use File::Rsync; + +use Settings; + +my $settings = new Settings(); + +# Generate the report +my $command = "perl 1_download_files_and_generate_report.pl"; +system("$command"); + +# Copy to distribution path +my $rsync = new File::Rsync({ + archive => 1, + delete => 1 +}); +$rsync->exec({ + src => 'reports/', + exclude => ['_README.md'], + dest => $settings->{'dist_path'}}) or die("Rsync failed $!"); diff --git a/1_download_files_and_generate_report.pl b/1_download_files_and_generate_report.pl new file mode 100755 index 0000000..3388790 --- /dev/null +++ b/1_download_files_and_generate_report.pl @@ -0,0 +1,88 @@ +#!/usr/bin/perl -w + +# Results written to working/* with the current date + +use strict; +use autodie qw(:all); +use DBI; +use DBD::Oracle; +use POSIX qw/strftime/; +use POSIX::strptime qw(strptime); +use Settings; +use File::Copy qw(copy); + +# takes a string and date format +# returns a time-tuple +sub parse_date { + my ($mday, $mon, $year) = (strptime($_[0], $_[1]))[3,4,5]; + return (0, 0, 0, $mday, $mon, $year, 0, 0, 0); +} + +my $settings = new Settings(); +$ENV{ORACLE_HOME} = $settings->{'env_oracle_home'}; +my $db=DBI->connect( + "dbi:Oracle:host=$settings->{'host'};sid=$settings->{'sid'};port=$settings->{'port'}", + $settings->{'user'}, + $settings->{'password'}, + {RaiseError => 1}); + ++# Get latest run date. +my $query = "SELECT MAX(TO_DATE(L.RUNNAME, 'DD-MON-YY')) " . + "FROM $settings->{'schema'}.Lab_Miseq_Run L "; +my $sth = $db->prepare($query); +$sth->execute(); +my @lastRunInDB = parse_date($sth->fetchrow(), '%d-%b-%y'); + +# Only report on runs we haven't done before. +# TODO: this seems like an amaturish solution. +my @lastRunReported = undef; +my $timestamp = undef; +my ($mday, $mon, $year) = undef;; +if (-e "last_run.txt") { + open $timestamp, ", $settings->{'date_format'}); + close($timestamp); +} +if (@lastRunReported && @lastRunReported == @lastRunInDB) { + print "Reports up to date. Not creating a new report.\n"; + exit 0; +} + +my $fileName = strftime($settings->{'date_format'}, localtime) . ".csv"; +my $dir = "working"; +unless (-e $dir) { mkdir($dir); } +open(my $output, ">$dir/$fileName"); + +# Dump data to a CSV file. +$query = "SELECT L.RUNNAME, R.*, I.* FROM $settings->{'schema'}.Lab_MiSeq_Run L " . + "LEFT JOIN $settings->{'schema'}.MiSeqQC_RunParameters R " . + "ON TO_DATE(L.RUNNAME, 'DD-MON-YYYY') = R.RUNSTARTDATE " . + "LEFT JOIN $settings->{'schema'}.MiSeqQC_InterOpSummary I " . + "ON R.RUNID = I.RUNID " . + "WHERE R.RUNSTARTDATE IS NULL OR R.RUNSTARTDATE >= TO_DATE('$settings->{'start_date'}', 'DD-MON-YY')"; +$sth = $db->prepare($query); +$sth->execute(); + +print $output join(",", @{$sth->{NAME}}) . "\n"; +while (my @row = $sth->fetchrow_array()) { + @row = map { defined($_)? $_ : "NA" } @row; + print $output join(",", @row) . "\n"; +} +$sth->finish(); + +# Generate report from csv, and move it to /reports, and delete the csv file +my $sitesFolder = $settings->{'sites_path'}; +my $folderName = strftime($settings->{'date_format'}, localtime); +my $reportFolder = "$sitesFolder/$folderName"; +unless (-e $reportFolder) { mkdir($reportFolder); } + +system("/usr/bin/env Rscript 2_generate_report.R $dir/$fileName $reportFolder > /dev/null 2>&1"); +rename("report.html", "$reportFolder/index.html"); +copy("R2HTML.css", "$reportFolder/R2HTML.css"); +unlink("$dir/$fileName"); # delete csv + +open $timestamp, ">last_run.txt"; +print $timestamp strftime($settings->{'date_format'}, @lastRunInDB); +close($timestamp); + +exit; diff --git a/2_generate_report.R b/2_generate_report.R new file mode 100755 index 0000000..f8d4d0f --- /dev/null +++ b/2_generate_report.R @@ -0,0 +1,280 @@ +#!/usr/bin/env Rscript + +library(R2HTML) +library(Cairo) +source(file="westgard.R") + +############################################################################### +# Settings/constants # +############################################################################### + +# Absolute paths to make this R script launchd / crontab friendly +currPath = "./" +scriptName = "2_generate_report.R" + +# Customization. +clusterdensity.min <- 500 +clusterdensity.max <- 1200 + +# Plot settings. +plotWidth <- 800 +plotHeight <- 500 +plotColor <- c("black", "forestgreen") +plotType <- "o" +plotLwd <- 2 +plotPch <- c(16, 17) +flagColor <- c("red", "red") +flagPch <- c(1, 17) +plotColumns <- 3 +plotRows <- 2 + +# Westgard rules in use (defined in westgard.R). +westgardRules <- c(westgard.1_3s, westgard.2_2s, westgard.4_1s, westgard.R4s, westgard.10x) + +############################################################################### +# Data-related/utility functions # +############################################################################### + +westgard <- function (x) { + z <- standardize(x) + Reduce("|", lapply(westgardRules, function (rule) rule(z))) +} + +# Format a number for display in a table. +table.format <- function (n) { + formatC(n, format="f", digits=2) +} + +# Scale down outliers which are more than dev.max standard deviations +# away from the mean. +shrink.outliers <- function(x, dev.max) { + xmean <- mean(x) + xsd <- sd(x) + xmin <- xmean - dev.max*xsd + xmax <- xmean + dev.max*xsd + x[x < xmin] <- xmin + x[x > xmax] <- xmax + x +} + + +############################################################################### +# Plotting-related functions # +############################################################################### + +# Add circles to indicate points have been flagged. +flag.points <- function (x, y, flags, pch, col) { + points(x[flags], y[flags], pch=pch, cex=2, lwd=2, col=col) +} + +# Add reference lines for different standard deviation values +lines.mean.sd <- function (y) { + my.mean <- mean(y) + my.sd <- sd(y) + abline(h=my.mean, lwd=2) + abline(h=my.mean + my.sd, lty="dotted", lwd=2, col="green3") + abline(h=my.mean - my.sd, lty="dotted", lwd=2, col="green3") + abline(h=my.mean + 2*my.sd, lty="longdash", lwd=2, col="darkgoldenrod1") + abline(h=my.mean - 2*my.sd, lty="longdash", lwd=2, col="darkgoldenrod1") + abline(h=my.mean + 3*my.sd, lwd=2, col="red") + abline(h=my.mean - 3*my.sd, lwd=2, col="red") +} + +############################################################################### +# HTML-related functions # +############################################################################### + +# Make an HTML image map for a scatterplot. +make.image.map <- function (x, y, titles, map.name) { + x.dev <- as.integer(grconvertX(x, to="device")) + y.dev <- as.integer(grconvertY(y, to="device")) + + html <- c(paste0('')) + areas <- apply(cbind(x.dev, y.dev, titles), 1, function (row) { + sprintf('', + row[1], row[2], row[3]) + }) + html <- c(html, areas) + c(html, "") +} + +# Make an HTML list of items +make.html.list <- function (items) { + c("") +} + +############################################################################### +# Read and process data # +############################################################################### + +# Read the CSV file (dump of MiSeqQC_RunParameters JOIN MiSeqQC_InteropSummary). +args<-commandArgs(TRUE) +if (length(args) != 2) { stop(paste("Syntax: ./", scriptName, " ", sep="")) } +filePath <- args[1] +fileName <- gsub(".csv", "", basename(filePath)) +reportDir <- args[2] +data <- read.csv(filePath, header=TRUE) + +# Read list of parameters (name,desc,unit). +param.list <- read.csv("parameter_list.csv", header=T, stringsAsFactors=F) +rownames(param.list) <- param.list$parameter +param.list$ylab <- with(param.list, + ifelse(!is.na(param.unit), + paste0(param.desc, " (", param.unit, ")"), + param.desc)) + +# Read list of reagents (name,desc). +reagent.list <- read.csv("reagent_list.csv", header=T, stringsAsFactors=F) +rownames(reagent.list) <- reagent.list$reagent + +# Parse dates. +data$RUNSTARTDATE <- as.Date(data$RUNSTARTDATE, "%d-%b-%y") +expiration.cols <- paste0(reagent.list$reagent, "_EXPIRATION") +data[,expiration.cols] <- lapply(data[,expiration.cols], as.Date, format="%d-%b-%y") +data <- data[order(data$RUNSTARTDATE),] + +# Collect missing runs, and remove from further analysis. +missing.runs <- data[is.na(data$RUNID.1),] +data <- data[!is.na(data$RUNID.1),] + +# Change proportions to percentages. +data$Q30_1 <- data$Q30_1*100 +data$Q30_2 <- data$Q30_2*100 + +# Apply Westgard rules. +flags <- apply(data[,param.list$parameter], 2, westgard) +colnames(flags) <- paste0(colnames(flags), ".FLAG") +data <- cbind(data, flags) + +# Number of days until expiration dates. +reagent.cols <- paste0(reagent.list$reagent, "_EXPIRATION") +days.left <- lapply(data[,reagent.cols], function(col) as.numeric(col-Sys.Date())) +names(days.left) <- sub("_EXPIRATION", ".DAYSLEFT", names(days.left)) +data <- cbind(data, days.left) + +# Create alerts. +# Runs missing from QC tables. +alerts <- apply(missing.runs, 1, function (row) { + sprintf("Sample sheet created on %s has no associated QC data", row["RUNNAME"], + strftime(row["RUNSTARTDATE"], "%b %d, %Y")) +}) +# Parameters flagged on most recent run. +flag.cols <- colnames(data)[grepl("[.]FLAG$", colnames(data))] +most.recent <- data[nrow(data), flag.cols] +most.recent <- most.recent[,which(t(most.recent))] +alerts <- c(alerts, sapply(names(most.recent), function (col) { + sprintf("Parameter %s has been flagged", + sub(".FLAG", "", col, fixed=T)) +})) +# Reagents which are about to expire or have already expired. +expire.cols <- colnames(data)[grepl("[.]DAYSLEFT$", colnames(data))] +most.recent <- data[nrow(data), expire.cols] +most.recent <- most.recent[,which(t(most.recent) < 31)] +alerts <- c(alerts, sapply(names(most.recent), function (col) { + sprintf("The %s will expire in %d days", + sub(".DAYSLEFT", "", col, fixed=T), + most.recent[,col]) +})) + +############################################################################### +# Make plots # +############################################################################### + +x <- data$RUNSTARTDATE +xlab <- "run date" +point.labels <- strftime(data$RUNSTARTDATE, "%b %d, %Y") + +nup <- plotRows*plotColumns +nplots <- as.integer(nrow(param.list)/(nup))+1 +. <- sapply(seq(0, nplots-1), function (i) { + params <- param.list[(i*nup+1):(i*nup+nup),"parameter"] + params <- params[!is.na(params)] + plotName <- paste0("parameters", i, ".png") + CairoPNG(file.path(reportDir, plotName), width=plotWidth, height=plotHeight) + par(yaxs="i", mfrow=c(plotRows, plotColumns), mar=c(2, 2, 1, 1), oma=c(1, 1, 0, 4)) + sapply(1:length(params), function (i) { + y <- data[,params[i]] + flag <- data[,paste0(params[i], ".FLAG")] + ylim <- c(mean(y)-4*sd(y), mean(y)+4*sd(y)) + y.scaled <- shrink.outliers(y, 4) + title <- param.list[params[i],"ylab"] + plot(x, y.scaled, ylim=ylim, type="n") + lines.mean.sd(y) + points(x, y.scaled, type=plotType, pch=plotPch[1], col=plotColor[1], lwd=plotLwd, + xlab=NA, ylab=NA, ylim=ylim) + flag.points(x, y.scaled, flag, flagPch[1], flagColor[1]) + if (i %% plotColumns == 0 | i == length(params)) { + axis(4, at=mean(y)+sd(y)*seq(-3, 3), labels=seq(-3, 3)) + } + + limits <- par("usr") + xmiddle <- (limits[1] + limits[2])/2 + ymax <- limits[4] + text(xmiddle, ymax, title, font=2, cex=1.5, pos=1) + }) + mtext("standard deviations from mean", side=4, outer=T, line=2) + dev.off() +}) + +image.maps <- c() + +# Report 1: Cluster Density +CairoPNG(file.path(reportDir, "REPORT-1.png"), + width=plotWidth, height=plotHeight, bg="transparent") +y <- data$CLUSTERDENSITY +ymin <- min(c(clusterdensity.min, y)) +ymax <- max(c(clusterdensity.max, y)) +plot(x, y, type=plotType, pch=plotPch[1], col=plotColor[1], lwd=plotLwd, + ylim=c(ymin, ymax), xlab=xlab, ylab=param.list["CLUSTERDENSITY","ylab"]) + +flag <- data$CLUSTERDENSITY.FLAG +flag.points(x, y, flag, flagPch[1], flagColor[1]) +flag <- y > clusterdensity.max | y < clusterdensity.min +flag.points(x, y, flag, flagPch[2], flagColor[2]) + +abline(h=clusterdensity.min, lty="longdash", lwd=3) +abline(h=clusterdensity.max, lty="longdash", lwd=3) +text(max(x), clusterdensity.min, labels=clusterdensity.min, pos=3, cex=1.5) +text(max(x), clusterdensity.max, labels=clusterdensity.max, pos=3, cex=1.5) + +title(main="Cluster density") +grid(nx=0, ny=NULL, col="black") +legend(par("usr")[2], par("usr")[4], xjust=1, yjust=0, xpd=T, + legend=c("Tolerance", "Flagged (Westgard)", "Flagged (outside tolerance)"), + lty=c("longdash", NA, NA), pch=c(NA, flagPch), col=c("black", flagColor)) +image.maps <- c(image.maps, make.image.map(x, y, point.labels, "REPORT1Map")) +dev.off() + + +# Report 2: % bases passing Q30 cutoff +CairoPNG(file.path(reportDir, "REPORT-2.png"), + width=plotWidth, height=plotHeight, bg="transparent") +par(yaxs="i") +q30.means <- c(mean(data$Q30_1), mean(data$Q30_2)) +q30.stdevs <- c(sd(data$Q30_1), sd(data$Q30_2)) +ymin <- min(q30.means-4*q30.stdevs) +ymax <- max(c(q30.means+4*q30.stdevs, 100)) + +y <- data$Q30_1 +flag <- data$Q30_1.FLAG +plot(x, y, type=plotType, col=plotColor[1], lwd=plotLwd, pch=plotPch[1], + ylim=c(ymin, ymax), ylab="bases passing Q30 cutoff (%)", xlab=xlab) +flag.points(x, y, flag, flagPch[1], flagColor[1]) + +y <- data$Q30_2 +flag <- data$Q30_2.FLAG +lines(x, y, type=plotType, pch=plotPch[2], col=plotColor[2], lwd=plotLwd) +flag.points(x, y, flag, flagPch[1], flagColor[1]) + +title(main="Bases passing Q30 cutoffs") +grid(nx=0, ny=NULL, col="black") +legend(par("usr")[2], par("usr")[4], xjust=1, yjust=0, xpd=T, + legend=c("read 1", "read 2", "Flagged (Westgard)"), + col=c(plotColor, flagColor[1]), pch=c(plotPch, flagPch[1])) +image.maps <- c(image.maps, make.image.map(c(x, x), c(data$Q30_1, data$Q30_2), + c(point.labels, point.labels), + "REPORT2Map")) +dev.off() + +# Finally, generate the report. +Sweave("report.Rnw", driver=RweaveHTML, syntax="SweaveSyntaxNoweb") diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..92ebae6 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,192 @@ +Setting up a developer workstation +================================== + +This will document the installation steps to get the MiSeq QC reports running +locally on your workstation. + +The steps are for Eclipse with EPIC on Ubuntu, adapt as needed to your preferred +IDE or operating system. + +1. Check the version of Java you have installed: + + java -version + +2. If the java version is lower than 1.7, then install JDK7: + + sudo apt-get install openjdk-7-source + +3. Check what version of perl you have installed. These reports were tested + with version 5.18.2, but earlier versions may also work. + + perl -v + +4. If it's not there, install it. + + sudo apt-get install perl + +5. Install Eclipse, although you might prefer a more recent version from the + [Eclipse web site][eclipse]: + + sudo apt-get install eclipse + + [eclipse]: https://www.eclipse.org/downloads/ + +6. Launch Eclipse. From the Help menu, choose either Eclipse Marketplace... or + Install New Software.... + +7. In the marketplace, just type EPIC and search. In the install wizard, use + the [EPIC update site][epic]. + [epic]: http://e-p-i-c.sf.net/updates/ + +7. From the Window menu, choose Preferences, and navigate down to Perl EPIC: + Editor. + * Use spaces instead of tabs. + * Insert 4 spaces on indent. + * Show line numbers (your choice) + * Show print margin (your choice) + +8. From the File menu, choose Import.... Navigate down to Git: Projects from Git. + +9. Choose Clone URI, and paste this URI: + https://github.com/cfe-lab/MiSeqQCReport.git + +10. Ask your supervisor for the password, and use the defaults for everything + else. Select the new project wizard with a Perl project. + +11. Change the folder to point at the new miseq_qc_report folder created by git, + and finish the import. + +12. Install [Oracle Instant Client][oracle]. Use the basic lite version, and + test that sqlplus works. You will probably have to follow the steps to set + up the libraries, and you may have to run sqlplus64 instead of sqlplus. + + sqlplus USER@\"//192.168.?.?:1521/SID\" + + If you want to have history and tab expansion in sqlplus, install rlwrap: + + sudo apt-get install rlwrap + alias sqlplus="rlwrap sqlplus" + + You also need to set the `ORACLE_HOME` environment variable. + + sudo vi /etc/profile.d/oracle.sh # Add the following line: + export ORACLE_HOME=/usr/lib/oracle/12.1/client64 + + [oracle]: https://help.ubuntu.com/community/Oracle%20Instant%20Client + +13. Install Perl's Database Interface package (DBI), File::Rsync, and XML::Simple. + + sudo apt-get install libdbi-perl libfile-rsync-perl libxml-simple-perl + +14. Install DBD::Oracle CPAN module. The first command will begin the + installation of CPAN, just accept the defaults. It will eventually open a + `cpan>` prompt where you can enter the second command. That eventually + opens a root shell where you can enter the rest. + + sudo cpan + look DBD::Oracle + . /etc/profile.d/oracle.sh + perl Makefile.PL + make + make test # Won't be able to connect to database, that's fine. + make install + exit + exit + + If you encounter the error "Unable to locate an oracle.mk or other + suitable *.mk", replace "perl Makefile.PL" with "perl Makefile.PL + -l". + +14. Repeat the above commands, without the oracle.sh part, for the + packages IPC::System::Simple, File::Rsync, and POSIX::strptime. + +14. Copy the `QC_Reports/Settings_template.pm` file, and modify the settings to + match your environment. + +14. Install the Cairo development library. + + sudo apt-get install libcairo2-dev + +15. Install R. The last two commands are run in the R console, and you should + check the [StatET installation page][statet] to see exactly which version + of the rj package is compatible with the version of StatET you are going to + install. You also need to install the R2HTML and Cairo packages. + + sudo apt-get install r-base r-base-dev + sudo R + install.packages(c("rj", "rj.gd"), repos="http://download.walware.de/rj-1.1") + install.packages("R2HTML") + install.packages("Cairo") + q() + + [statet]: http://www.walware.de/it/statet/installation.mframe + +16. Launch Eclipse. For some reason, you can't currently install StatET from the + Eclipse Marketplace, so from the Help menu, choose Install New Software.... + +17. Go to the [StatET installation page][statet], and find the update site for + your version of Eclipse. Paste that address in the install wizard, and + select the StatET for R component. Finish the installation. + +18. From the Window menu, choose Preferences. Navigate down to StatET: + Run/Debug: R Environments. + +19. Click the Add... button. + +20. Next to the Location (R_HOME) field, press the + button, and choose Try + find automatically. It should find the R you just installed. + +21. Click the Detect Default Properties/Settings button. Click OK. Click OK. + +22. If you want an R console, open the Run menu, and choose + Run Configurations.... Select R Console and click the add button. Click Run. + +23. To run an R script with command-line arguments, modify the R console + configuration by setting the working directory and adding this to the + Options/Arguments field with whatever CSV file name was created by the + previous step: + + --args working/2014-06-25.csv + + Then you can use `source("2_generate_report.R")` in the console to launch it. + +24. If you get an error about a missing font -adobe-helvetica-..., + install the gsfonts-x11 package and refresh your font cache. + + sudo apt-get install gsfonts-x11 + xset fp rehash + +The reports are currently run on a virtual machine by a certain user, +and then displayed on the local network. They are scheduled under that +virtual machine's user's crontab at 8:00 AM each day. You can see the +tasks by logging in as that user and then typing `crontab -l`. For the +IP addresses of the machine, and the user as whom you must log on, ask +your supervisor. + + +Setting up Oracle +================= + +To run the scripts, you will need access to certain Oracle tables. + +1. Ask the database administrator to create an account for you, with + read access to the MiSeqQC_* tables. This will need to be okayed by + the lab director. You will be told the default password. Also ask for + the IP address, port, and SID for the Oracle database. + +2. The account will be created with a default password. Log in for the + first time, and change the password. + + sqlplus username/password@//dbhost:1521/SID + + You will be prompted to enter a new password. Oracle passwords at the + CfE must conform to the following guidelines: + * Password can not be same as username + * Password must be different than previous 3 passwords + * Password must be at least 8 characters long + * Password must begin with letter (all letters in uppercase) + * Password must contain at least two digits + * Password must contain at least one punctuation !"#$%&()*+,-/:;<=>?_ + If you have chosen a password which fulfills all these criteria, but + get errors about an invalid login, contact the database administrator + and ask to set your password on her computer. diff --git a/R2HTML.css b/R2HTML.css new file mode 100644 index 0000000..d9f6002 --- /dev/null +++ b/R2HTML.css @@ -0,0 +1,59 @@ +html, body { + width: 800; + margin: auto; +} + +h1, h2 { + font-family: sans-serif; +} + +img { + margin: auto; + padding: 0px; +} + +table, th, td { + border: 1px solid grey; +} + +th, td { + padding: 5px; +} + +table { + width: 100%; + border-collapse: collapse; +} + +.image_cell { + margin: auto; + text-align: center; +} + +.number_cell { + text-align: right; +} + +area { + cursor: pointer; + display: block; +} + +dt { + font-family: sans-serif; + font-weight: bold; +} + +dt:not(.noclear) { + float: left; + clear: left; + margin-right: 5px; +} + +dt:after { + content: ":"; +} + +dd { + padding: 0 0 0.5em 0; +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..40037c4 --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +How to Change QC Report Parameters +================================== + +Settings for this project are stored in two files: + - database-related settings are in Settings.pm + - report-related settings are in the top of 2_generate_report.R + + +Changing the run start date +--------------------------- +Open "Settings.pm" in a text editor. Look for these lines near the +bottom of the file. + + # When to start counting from. + start_date => "01-NOV-13" + +Here, "01-NOV-13" is the start date (it may be a different date in the +file). Change that to the start date you want to use, but keep the +formatting of the date the same. For example, to change the start date +to January 31, 2014, I would edit the lines to look like this. + + # When to start counting from. + start_date => "31-JAN-14" + + +Changing the Westgard rules +--------------------------- +Edit "2_generate_report.R", and look for the variable "westgardRules" in +the top section. The available rules are defined in the file +"westgard.R". As of this writing, all the "common" rules (taken from the +website http://www.westgard.com/mltirule.htm) were defined. Put the +functions for whichever rules you want to use in this array. + +Next, edit "report.Rnw", which is an html file with some R code chunks, +to describe the new Westgard rules you are using. The relevant part is +in a tag near the bottom of the body (search for Westgard). + + +Changing the cluster density tolerances +--------------------------------------- +Edit "2_generate_report.R", and look for the variables +"clusterdensity.min" and "clusterdensity.max" in the top section. Edit +these to whatever values you like. The changes will be automatically +shown in the report (you do not need to edit "report.Rnw"). + + +Developer notes +=============== + +If you are not a programmer, you do not need to read this section. + +Database information +-------------------- + +The MiSeq QC report primarily uses information from two Oracle tables. +- MISEQQC_RUNPARAMETERS contains the reagent expration dates. +- MISEQQC_INTEROPSUMMARY contains the run parameters (CLUSTERDENSITY + etc.) +These two tables are linked by the RUNID field. In theory, there should +be one row of MISEQQC_INTEROPSUMMARY for each row in +MISEQQC_RUNPARAMETERS, but this is not currently the case. These tables +get populated after a run is complete and Eric's Interop parsing scripts +run. Evidently, there is something wrong with the scripts. + +Additionally, the LAB_MISEQ_RUN table has an entry for each run. It is +populated whenever anybody creates a new sample sheet in the QAI. Again, +there be a one-to-one relationship between LAB_MISEQ_RUN.RUNNAME +and MISEQQC_RUNPARAMETERS.EXPERIMENNAME (and by extension +MISEQQC_INTEROPSUMMARY.RUNID). However, the MISEQQC* tables get +populated by scripts after the run has completed, so it's possible that +runs don't appear there if the scripts break. + +Also, if a run is aborted, then it will be in the LAB_MISEQ_RUN table +but not in the MISEQQC* tables. Currently, it is not recorded anywhere +if a run was aborted or not. diff --git a/Settings_template.pm b/Settings_template.pm new file mode 100644 index 0000000..09a8fc5 --- /dev/null +++ b/Settings_template.pm @@ -0,0 +1,37 @@ +# Copy this file to Settings.pm, then edit all the configuration entries to +# match your environment. + +package Settings; + +sub new { + my ($class_name) = @_; + my ($self) = { + # Database client + env_oracle_home => "/usr/oracle_instantClient64", + + # Database connection params + host => "192.168.?.?", + port => "1521", + sid => "??????", + user => "??????", + password => "??????", + schema => "??????", + + # This is an rsync path. + dist_path => "dist", + + # Where to put reports. + sites_path => "reports", + + # Date format for file names. To the day in prod, to the minute in dev. + date_format => "%Y-%m-%d_%H%M", + + # When to start counting from. + start_date => "01-NOV-13" + }; + + bless ($self, $class_name); + return $self; +} + +1; diff --git a/dist/_README.md b/dist/_README.md new file mode 100644 index 0000000..241a807 --- /dev/null +++ b/dist/_README.md @@ -0,0 +1,2 @@ +This folder is a dummy target for rsync in development. In production, rsync +will send the files to the public web server. diff --git a/parameter_list.csv b/parameter_list.csv new file mode 100644 index 0000000..3238b80 --- /dev/null +++ b/parameter_list.csv @@ -0,0 +1,29 @@ +parameter,param.desc,param.unit +ALIGNED_F,"reads aligned, forward",% +ALIGNED_R,"reads aligned, reverse",% +Q30_1,"bases passing Q30, read 1",% +Q30_2,"bases passing Q30, read 2",% +CLUSTERDENSITY,cluster density,1000/mm^2 +PERCENTCLUSTERSPF,clusters passing filters,% +PHASING_F,"phasing, forward",% +PHASING_R,"phasing, reverse",% +PREPHASING_F,"prephasing, forward",% +PREPHASING_R,"prephasing, reverse",% +ERRORRATE_F,"error rate, forward",% +ERRORRATE_R,"error rate, reverse",% +INT_1_1_A,"A intensity, read 1, cycle 1",NA +INT_1_1_C,"C intensity, read 1, cycle 1",NA +INT_1_1_G,"G intensity, read 1, cycle 1",NA +INT_1_1_T,"T intensity, read 1, cycle 1",NA +INT_1_20_A,"A intensity, read 1, cycle 20",NA +INT_1_20_C,"C intensity, read 1, cycle 20",NA +INT_1_20_G,"G intensity, read 1, cycle 20",NA +INT_1_20_T,"T intensity, read 1, cycle 20",NA +INT_2_1_A,"A intensity, read 2, cycle 1",NA +INT_2_1_C,"C intensity, read 2, cycle 1",NA +INT_2_1_G,"G intensity, read 2, cycle 1",NA +INT_2_1_T,"T intensity, read 2, cycle 1",NA +INT_2_20_A,"A intensity, read 2, cycle 20",NA +INT_2_20_C,"C intensity, read 2, cycle 20",NA +INT_2_20_G,"G intensity, read 2, cycle 20",NA +INT_2_20_T,"T intensity, read 2, cycle 20",NA diff --git a/reagent_list.csv b/reagent_list.csv new file mode 100644 index 0000000..b7f9f70 --- /dev/null +++ b/reagent_list.csv @@ -0,0 +1,4 @@ +reagent,description +FLOWCELL,flow cell +PR2,PR2 incorporation buffer +REAGENTKIT,reagent kit diff --git a/report.Rnw b/report.Rnw new file mode 100644 index 0000000..4a29591 --- /dev/null +++ b/report.Rnw @@ -0,0 +1,116 @@ + + + + MiSeq QC Report for \Sexpr{strftime(Sys.Date(), format="%B %d, %Y")} + + + + + +

MiSeq QC Report for \Sexpr{strftime(Sys.Date(),, format="%B %d, %Y")}

+ +
+
Run ID
+
\Sexpr{data[nrow(data), "RUNID"]}
+
Run Date
+
\Sexpr{strftime(data[nrow(data), "RUNSTARTDATE"], "%B %d, %Y")}
+
Cutoff Date
+
\Sexpr{strftime(min(data$RUNSTARTDATE), "%B %d, %Y")}
+<>= + html <- c(apply(reagent.list, 1, function (row) { + daysleft <- data[nrow(data), paste0(row["reagent"], ".DAYSLEFT")] + desc <- paste0("
Days until ", row["description"], " expiration
") + item <- paste0("
", daysleft, "
") + c(desc, item) + })) + paste0(html, collapse="\n") +@ +
+ +

Alerts

+<>= + if (length(alerts) > 0) { + paste0(make.html.list(alerts), collapse="\n") + } else { + "

Nothing to report.

" + } +@ +

Reports

+ + +
+ +
+ +

Parameters

+

Values more than 4 standard deviations away from the mean are shown at 4 standard deviations.

+<>= + html <- sapply(0:(nplots-1), function (i) { + paste0('
') + }) + paste0(html, collapse="\n") +@ + + + + + + + + + + + +<>= + tbody <- c(sapply(param.list$parameter, function (p) { + cells <- c(paste0('")) + cells <- c(cells, paste0('")) + cells <- c(cells, paste0('")) + cells <- c(cells, paste0('")) + cells <- c(cells, paste0('")) + c('', cells, "") + })) + paste0(tbody, collaspe="\n") +@ + +
ParameterDescriptionMeanMedianStandard Deviation
', p, "', param.list[p, "param.desc"], "', table.format(mean(data[,p])), "', table.format(median(data[,p])), "', table.format(sd(data[,p])), "
+
+ + +

Using the following Westgard rules: 13s (one measurement + >3σ away from the mean), 22s (two consecutive + measurements >2σ in the same direction), 41s (four + consecutive measurements >1σ in the same direction), + R4s (change from >2σ to <2σ or vice versa), 10x + (ten or more consecutive measurements on one side of the mean).

+ +

Using tolerances of \Sexpr{clusterdensity.min} and + \Sexpr{clusterdensity.max} to flag cluster density.

+
+ +<>= + paste0(image.maps, collapse="\n") +@ + + diff --git a/reports/_README.md b/reports/_README.md new file mode 100644 index 0000000..2f36327 --- /dev/null +++ b/reports/_README.md @@ -0,0 +1 @@ +This folder holds the reports that get generated. \ No newline at end of file diff --git a/westgard.R b/westgard.R new file mode 100644 index 0000000..38a3503 --- /dev/null +++ b/westgard.R @@ -0,0 +1,53 @@ +# http://www.westgard.com/mltirule.htm +standardize <- function (x) { + (x-mean(x))/sd(x) +} + +# x is a boolean vector +# replace all runs of TRUE in x which are shorter than minlen with FALSE +conseq.trues <- function (x, minlen) { + r <- rle(x) + r$values <- r$values & r$lengths >= minlen + inverse.rle(r) +} + +# In each of the rule functions, z is assumed to be standardized, +# that is, z is a vector of standard deviations away from the mean. + +westgard.1_3s <- function (z) { + abs(z) > 3 +} + +westgard.1_2s <- function (z) { + abs(z) > 2 +} + +westgard.2_2s <- function (z) { + conseq.trues(z > 2, 2) | conseq.trues(z < -2, 2) +} + +westgard.R4s <- function (z) { + d <- abs(diff(z)) > 4 + s <- diff(sign(z)) > 0 + conseq.trues((c(d, F) | c(F, d)) & (c(F, s) | c(s, F)) & abs(z) > 2, 2) +} + +westgard.3_1s <- function (z) { + conseq.trues(z > 2, 3) | conseq.trues(z < -2, 3) +} + +westgard.4_1s <- function (z) { + conseq.trues(z > 2, 4) | conseq.trues(z < -2, 4) +} + +westgard.8x <- function (z) { + conseq.trues(z > 0, 8) | conseq.trues(z < 0, 8) +} + +westgard.10x <- function (z) { + conseq.trues(z > 0, 10) | conseq.trues(z < 0, 10) +} + +westgard.12x <- function (z) { + conseq.trues(z > 0, 12) | conseq.trues(z < 0, 12) +} diff --git a/working/_README.md b/working/_README.md new file mode 100644 index 0000000..a521525 --- /dev/null +++ b/working/_README.md @@ -0,0 +1 @@ +This folder holds working files that are used to prepare a report. \ No newline at end of file