diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml index 093bbc4f..6dc8651e 100644 --- a/.github/workflows/check-full.yaml +++ b/.github/workflows/check-full.yaml @@ -26,8 +26,8 @@ jobs: # for windows and mac all builds are pushed to drat repo on merge to master (except pre-release builds), except 'next' # 2023-08-23 fails, not getting Reca. Because it is not built yet for macos and next or 4.3. - #- {os: macOS-latest, r: 'next', pkgext: '.tgz'} - #- {os: macOS-latest, r: '4.3', pkgext: '.tgz'} + - {os: macOS-latest, r: 'next', pkgext: '.tgz'} #tested without Reca + - {os: macOS-latest, r: '4.3', pkgext: '.tgz'} #tested without Reca - {os: macOS-latest, r: '4.2', pkgext: '.tgz'} - {os: macOS-latest, r: '4.1', pkgext: '.tgz'} - {os: macOS-latest, r: '4.0', pkgext: '.tgz'} @@ -181,15 +181,26 @@ jobs: sessioninfo::session_info(pkgs, include_base = TRUE) shell: Rscript {0} - - name: Check + # turn off testing of suggestions for configurations where Reca is not provided in StoX package repositories + - name: Check without suggest-dependencies + if: runner.os == 'macOS' && (matrix.config.r== 'next' || matrix.config.r== '4.3') env: _R_CHECK_CRAN_INCOMING_: false + _R_CHECK_FORCE_SUGGESTS_ : false run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "error", check_dir = "check") shell: Rscript {0} - - name: Show testthat output + - name: Check with suggest-dependencies + if: runner.os != 'macOS' || (matrix.config.r!= 'next' && matrix.config.r!= '4.3') + env: + _R_CHECK_CRAN_INCOMING_: false + _R_CHECK_FORCE_SUGGESTS_ : true + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "error", check_dir = "check") + shell: Rscript {0} + + - name: Show unit test output if: always() - run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true + run: find check -name 'tinytest.Rout*' -exec cat '{}' \; || true shell: bash - name: Upload check results diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index e0ba4b36..964b3d0e 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -2,8 +2,10 @@ on: push: branches: - master + - testing pull_request: branches: + - testing - develop name: test-coverage diff --git a/DESCRIPTION b/DESCRIPTION index 4275f047..50a84f4c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: RstoxFDA -Version: 1.3.0-9002 +Version: 1.3.0-9003 Date: 2023-12-07 Title: Fisheries Dependent Analysis with RstoX Authors@R: c( diff --git a/R/CatchLotteryExample-datadoc.R b/R/CatchLotteryExample-datadoc.R new file mode 100644 index 00000000..c3e3f04c --- /dev/null +++ b/R/CatchLotteryExample-datadoc.R @@ -0,0 +1,23 @@ +#' Data from the Norwegian catch lottery sampling program. +#' +#' Example of data formatted as \code{\link[RstoxData]{StoxBioticData}} +#' Hauls are primary sampling units, selected by Poission sampling with selection probabilities proportional to the catch size. +#' The data contain North Sea herring samples from catch lottery sampling in 2022. +#' +#' @docType data +#' +#' @usage data(CatchLotteryExample) +#' +#' @format \code{\link[RstoxData]{StoxBioticData}} +#' +#' @keywords datasets +#' @concept Analytical estimation +#' +#' @examples +#' RstoxFDA::plotArea(RstoxFDA::CatchLotteryExample$Station, +#' areaDef=RstoxFDA::mainareaFdir2018, +#' latCol = "Latitude", +#' lonCol = "Longitude", +#' areaLabels = TRUE) +"CatchLotteryExample" + diff --git a/R/CatchLotterySamplingExample-datadoc.R b/R/CatchLotterySamplingExample-datadoc.R new file mode 100644 index 00000000..80d881f7 --- /dev/null +++ b/R/CatchLotterySamplingExample-datadoc.R @@ -0,0 +1,24 @@ +#' Sampling parameters from the Norwegian catch lottery sampling program. +#' +#' Example of data formatted as \code{\link[RstoxFDA]{PSUSamplingParametersData}} +#' Hauls are primary sampling units, selected by Poission sampling with selection probabilities proportional to the catch size. +#' The data contain sampling parameters for North Sea herring samples from catch lottery sampling in 2022. +#' +#' The corresponding samples are provided in \code{\link[RstoxFDA]{CatchLotteryExample}} +#' +#' @docType data +#' +#' @usage data(CatchLotterySamplingExample) +#' +#' @format \code{\link[RstoxData]{StoxBioticData}} +#' +#' @keywords datasets +#' @concept Analytical estimation +#' +#' @examples +#' #all selected PSU that where actuall sampled are provided in CatchLotteryExample +#' sum(!is.na(CatchLotterySamplingExample$SelectionTable$SamplingUnitId)) +#' sum(CatchLotterySamplingExample$SelectionTable$SamplingUnitId %in% +#' CatchLotteryExample$Haul$HaulKey) +"CatchLotterySamplingExample" + diff --git a/R/RecaFormatChecks.R b/R/RecaFormatChecks.R index cdffe167..f82bc89c 100644 --- a/R/RecaFormatChecks.R +++ b/R/RecaFormatChecks.R @@ -185,7 +185,6 @@ checkWeightLength<-function(weightlength, landings){ checkCovariateConsistency <- function(modelobj, landingscov){ inlandings <- rownames(modelobj$info[modelobj$info[,"in.landings"]==1,]) if (any(!(inlandings %in% names(landingscov)))){ - browser() stop("some covariates labeled as in.landings are not found in corresponding covariate matrix in landings") } diff --git a/R/RecaWrap.R b/R/RecaWrap.R index 224dec76..e5e9825f 100644 --- a/R/RecaWrap.R +++ b/R/RecaWrap.R @@ -1108,3 +1108,9 @@ rEcaDataReport <- function(samples, landings, covariates){ out <- data.table::as.data.table(out) return(out) } + +#' Runs Reca::eca.estimate. Provided so that tinytest unittest can be implemented in a way that is dependent on Reca being available +#' @noRd +eca.estimate <- function(AgeLength, WeightLength, Landings, GlobalParameters){ + return(Reca::eca.estimate(AgeLength, WeightLength, Landings, GlobalParameters)) +} diff --git a/R/StoxAnalyticalAnalysisFunctions.R b/R/StoxAnalyticalAnalysisFunctions.R new file mode 100644 index 00000000..6169a515 --- /dev/null +++ b/R/StoxAnalyticalAnalysisFunctions.R @@ -0,0 +1,3 @@ +#' @noRd +PrepareHorvitzThompsonDomainEstimate <- function(){} + diff --git a/R/StoxAnalyticalBaselineFunctions.R b/R/StoxAnalyticalBaselineFunctions.R new file mode 100644 index 00000000..4d04a195 --- /dev/null +++ b/R/StoxAnalyticalBaselineFunctions.R @@ -0,0 +1,513 @@ +#' Construct design parameters assuming FSWOR, non-finite, equal prob, potentially stratified +#' @noRd +assumeDesignParametersStoxBiotic <- function(StoxBioticData, SamplingUnitId, StratificationColumns=c()){ + targetTable <- NULL + for (n in names(StoxBioticData)){ + if (SamplingUnitId %in% names(StoxBioticData[[n]])){ + targetTable=n + } + } + + if (is.null(targetTable)){ + stop(paste("The SamplingUnitId", SamplingUnitId, "was not found in StoxBioticData")) + } + + flatStox <- StoxBioticData[[targetTable]] + if (isGiven(StratificationColumns) & length(StratificationColumns)>0 & !all(StratificationColumns %in% names(flatStox))){ + stop("Not all stratification columns were found at ", targetTable, ", where the SamplingUnitId ", SamplingUnitId, " is found.") + } + + if (any(is.na(flatStox[[SamplingUnitId]]))){ + stop(paste("Cannot construct design parameters for missing SamplingUnitIds. Missing values (NA) found for", SamplingUnitId)) + } + for (n in StratificationColumns){ + if (any(is.na(flatStox[[n]]))){ + stop(paste("Cannot construct design parameters with missing strata information. Missing values (NA) found for stratification column", n)) + } + } + + flatStox$Stratum <- "All" + if (length(StratificationColumns)>0){ + flatStox$Stratum <- apply(flatStox[,.SD, .SDcol=StratificationColumns], 1, paste, collapse="/") + } + flatStox$SamplingUnitId <- flatStox[[SamplingUnitId]] + flatStox$Order <- as.numeric(NA) + + CommonSelectionData <- flatStox[,list(InclusionProbability=as.numeric(NA), HTsamplingWeight=1/length(unique(get("SamplingUnitId"))), SelectionProbability=as.numeric(NA), HHsamplingWeight=as.numeric(NA), SelectionDescription=as.character(NA)), by=c("Stratum")] + selectionUnits <- flatStox[,.SD, .SDcol=c("Stratum", "Order", "SamplingUnitId")] + selectionUnits <- selectionUnits[!duplicated(selectionUnits$SamplingUnitId),] + selectionTable <- merge(flatStox[,.SD, .SDcol=c("Stratum", "Order", "SamplingUnitId")], CommonSelectionData) + sampleTable <- flatStox[,list(N=as.numeric(NA), n=length(unique(get("SamplingUnitId"))), SelectionMethod="FSWR", FrameDescription=as.character(NA)), by=c("Stratum")] + sampleTable <- sampleTable[,.SD,.SDcol=c("Stratum", "N", "n", "SelectionMethod", "FrameDescription")] + stratificationTable <- flatStox[,.SD,.SDcol=c("Stratum", StratificationColumns)] + stratificationTable <- stratificationTable[!duplicated(stratificationTable$Stratum),] + assignmentTable <- data.table::data.table(DataRecordsId=character()) + + designParameters <- list() + designParameters$SampleTable <- sampleTable + designParameters$SelectionTable <- selectionTable + designParameters$StratificationVariables <- stratificationTable + designParameters$Assignment <- assignmentTable + + return(designParameters) + +} + +#' parse design parameters from tab delimited file +#' @noRd +parseDesignParameters <- function(filename){ + + colClasses <- c(Stratum="character", N="numeric", n="numeric", SelectionMethod="character", FrameDescription="character", Order="numeric", SamplingUnitId="character", InclusionProbability="numeric", HTsamplingWeight="numeric", SelectionProbability="numeric", HHsamplingWeight="numeric", SelectionDescription="character") + headers <- data.table::fread(filename, sep="\t", dec=".", header = T, nrows = 1) + if (!all(names(colClasses) %in% names(headers))){ + missing <- names(colClasses)[!(names(colClasses) %in% names(headers)),] + stop(paste("Invalid format. Missing columns:", paste(missing, collapse=","))) + } + stratificationColumns <- c() + for (n in names(headers)){ + if (!n %in% names(colClasses)){ + stratificationColumns <- c(stratificationColumns, n) + newnames <- c(names(colClasses), n) + colClasses <- c(colClasses, "character") + names(colClasses) <- newnames + } + } + + designParameters <- data.table::fread(filename, sep="\t", dec=".", header = T, colClasses = colClasses, na.strings = c("")) + + selectionTable <- designParameters[,.SD,.SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "HTsamplingWeight", "SelectionProbability", "HHsamplingWeight", "SelectionDescription")] + sampleTable <- designParameters[,.SD,.SDcol=c("Stratum", "N", "n", "SelectionMethod", "FrameDescription")] + stratificationTable <- designParameters[,.SD,.SDcol=c("Stratum", names(designParameters)[!(names(designParameters) %in% names(selectionTable)) & !(names(designParameters) %in% names(sampleTable))])] + assignmentTable <- data.table::data.table(DataRecordsId=character()) + + if (any(is.na(sampleTable$Stratum)) | any(is.na(selectionTable$Stratum))){ + stop("Invalid design specification. The mandatory column 'Stratum' may not contain missing values (NA).") + } + if (any(is.na(sampleTable$SelectionMethod))){ + stop("Invalid design specification. The mandatory column 'SelectionMethod' may not contain missing values (NA).") + } + + for (n in stratificationColumns){ + stopifnot(n %in% names(sampleTable)) + if (any(is.na(sampleTable[[n]]))){ + stop(paste("Invalid design specification. The stratification column", n, "may not contain missing values (NA).")) + } + } + + sampleTableStrings <- apply(sampleTable, 1, paste, collapse="/") + sampleTable <- sampleTable[!duplicated(sampleTableStrings),] + duplicatedStrata <- sampleTable$Stratum[duplicated(sampleTable$Stratum)] + if (length(duplicatedStrata)>0){ + stop(paste("Invalid design specification. The column stratum must uniquely identify all sample table variables. Duplicates found for:", paste(duplicatedStrata, collapse=","))) + } + + if (length(stratificationColumns) > 0){ + stratificationVariableStrings <- apply(stratificationTable[,.SD, .SDcol=stratificationColumns], 1, paste, collapse="/") + duplicatedStrata <- stratificationTable$Stratum[duplicated(stratificationVariableStrings)] + + if (length(duplicatedStrata)>0){ + stop(paste("Invalid design specification. The stratification variables must uniquely identify a stratum. Duplicates found for:", paste(duplicatedStrata, collapse=","))) + } + } + + if (any(!is.na(selectionTable$SampleUnitId) & duplicated(paste(selectionTable$Stratum, selectionTable$SampleUnitId)))){ + stop("Invalid design specification. Some strata contain duplicated SampleUnitIds.") + } + + stratificationTable <- stratificationTable[!duplicated(stratificationTable$Stratum),] + + validSelectionMethod <- c("Poisson", "FSWR", "FSWOR") + if (!all(sampleTable$SelectionMethod %in% validSelectionMethod)){ + invalid <- sampleTable$SelectionMethod[!(sampleTable$SelectionMethod %in% validSelectionMethod)] + stop(paste("Invalid design specification. Unkown selection method:", paste(invalid, collapse=","))) + } + + designParameters <- list() + designParameters$SampleTable <- sampleTable + designParameters$SelectionTable <- selectionTable + designParameters$StratificationVariables <- stratificationTable + designParameters$Assignment <- assignmentTable + + return(designParameters) +} + +#' Define PSU Sampling Design Parameters +#' @description +#' Define sampling parameters for Primary Sampling Units in multi-stage sampling. +#' @details +#' The DefintionMethod 'ResourceFile' reads sampling parameters from a tab delimited file with headers corresponding to those listed in +#' \code{\link[RstoxFDA]{PSUSamplingParametersData}}. The data is provided as one table, so that the information in 'sampleTable' is repeated for each entry in 'selectionTable'. +#' Any columns not named in \code{\link[RstoxFDA]{PSUSamplingParametersData}} are assumed to be stratification variables. +#' The conditions listed for the variables in \code{\link[RstoxFDA]{PSUSamplingParametersData}} are checked upon reading the data, and +#' execution halts with error if any are violated. +#' +#' The DefinitionMethod 'AdHocStoxBiotic' constructs Sampling Design Parameters from data, +#' assuming equal probability sampling with fixed sample size, selection without replacement and complete response. +#' This is a reasonable approximation if within-strata sampling is approximately simple random selections, +#' and non-response is believed to be at random. +#' @param processData \code{\link[RstoxFDA]{PSUSamplingParametersData}} as returned from this function. +#' @param DefinitionMethod 'ResourceFile' or 'AdHocStoxBiotic' +#' @param FileName path to resource file +#' @param StoxBioticData \code{\link[RstoxData]{StoxBioticData}} Sample data to construct design parameters from +#' @param SamplingUnitId name of column in 'StoxBioticData' that identifies the Primary Sampling Unit the design is constructed for. +#' @param StratificationColumns name of any column (at the same table as 'SamplingUnitId') that are to be used to define Strata for sampling. +#' @param UseProcessData If TRUE, bypasses execution of function and returns existing 'processData' +#' @return \code{\link[RstoxFDA]{PSUSamplingParametersData}} +#' @noRd +#' @concept StoX-functions +#' @concept Analytical estimation +#' @md +DefinePSUSamplingParameters <- function(processData, DefinitionMethod=c("ResourceFile", "AdHocStoxBiotic"), FileName=character(), StoxBioticData, SamplingUnitId=character(), StratificationColumns=character(), UseProcessData=F){ + + if (UseProcessData){ + return(processData) + } + + DefinitionMethod <- checkOptions(DefinitionMethod, "DefinitionMethod", c("ResourceFile", "AdHocStoxBiotic")) + + if (DefinitionMethod == "ResourceFile"){ + return(parseDesignParameters(FileName)) + } + if (DefinitionMethod == "AdHocStoxBiotic"){ + return(assumeDesignParametersStoxBiotic(StoxBioticData, SamplingUnitId, StratificationColumns)) + } +} + +#' collapse strata, recalulate n/N and sampling weights +#' For strata with unknown inclusion and selection probability, sampling weights will be NA. +#' @noRd +collapseStrataIndividualDesignParamaters <- function(designParam, collapseVariables=c()){ + + sv <- names(designParam$StratificationVariables)[!names(designParam$StratificationVariables) %in% c("SampleId", "Stratum")] + if (!all(collapseVariables %in% sv)){ + missing <- collapseVariables[!(collapseVariables %in% sv)] + stop("The following are specified as strata to collapse, but are not StratificationVariables:", paste(missing, collapse=",")) + } + + Nstrata <- designParam$StratificationVariables[,list(Nstrata=get(".N")), by="SampleId"] + if (all(Nstrata$Nstrata==1)){ + return(designParam) + } + + retain <- sv[!(sv %in% collapseVariables)] + selectionStratumIndex <- match(paste(designParam$SelectionTable$Stratum, designParam$SelectionTable$SampleId), paste(designParam$StratificationVariables$Stratum, designParam$StratificationVariables$SampleId)) + sampleStratumIndex <- match(paste(designParam$SampleTable$Stratum, designParam$SampleTable$SampleId), paste(designParam$StratificationVariables$Stratum, designParam$StratificationVariables$SampleId)) + + if (length(retain)==0){ + designParam$StratificationVariables$Stratum <- "All" + } + else{ + designParam$StratificationVariables$Stratum <- apply(designParam$StratificationVariables[,.SD, .SDcol=retain], 1, paste, collapse="/") + } + + designParam$SampleTable$Stratum <- designParam$StratificationVariables$Stratum[sampleStratumIndex] + designParam$SelectionTable$Stratum <- designParam$StratificationVariables$Stratum[selectionStratumIndex] + + NselectionMethods <- designParam$SampleTable[,list(NselMet=length(unique(get("SelectionMethod")))), by=c("SampleId", "Stratum")] + if (any(NselectionMethods$NselMet>1)){ + stop("Cannot collapse strate with heterogenous selection methods") + } + + weights <- designParam$SelectionTable[,list(HTsum=sum(1/get("InclusionProbability")), HHsum=sum(1/get("SelectionProbability"))),by=c("SampleId", "Stratum")] + designParam$SelectionTable <- merge(designParam$SelectionTable, weights, by=c("SampleId", "Stratum")) + designParam$SelectionTable$HTsamplingWeight <- 1/(designParam$SelectionTable$InclusionProbability * designParam$SelectionTable$HTsum) + designParam$SelectionTable$HHsamplingWeight <- 1/(designParam$SelectionTable$SelectionProbability * designParam$SelectionTable$HHsum) + designParam$SelectionTable$HHsum <- NULL + designParam$SelectionTable$HTsum <- NULL + + designParam$SampleTable <- designParam$SampleTable[,list(N=sum(get("N")), n=sum(get("n")), SelectionMethod=get("SelectionMethod")[1], SampleDescription=as.character(NA)), by=c("SampleId", "Stratum")] + designParam$StratificationVariables <- designParam$StratificationVariables[!duplicated(paste(designParam$StratificationVariables$SampleId, designParam$StratificationVariables$Stratum)),.SD, .SDcol=c("SampleId", "Stratum", retain)] + + return(designParam) +} + +#' make IndividualDesignParameters for stratified selection of Individuals +#' @noRd +extractIndividualDesignParametersStoxBiotic <- function(StoxBioticData, StratificationColumns, Parameters){ + + StratificationColumns <- c("SpeciesCategory", StratificationColumns) + + individuals <- RstoxData::mergeByIntersect(StoxBioticData$Individual, StoxBioticData$Sample) + individuals <- RstoxData::mergeByIntersect(individuals, StoxBioticData$SpeciesCategory) + individuals <- RstoxData::mergeByIntersect(individuals, StoxBioticData$Haul) + + if (any(is.na(individuals$CatchFractionNumber))){ + missing <- unique(individuals$Sample[is.na(individuals$CatchFractionNumber)]) + stop(paste("Cannot infer sampling parameters for individuals from Samples with missing total number. CatchFractionNumber missing for Sample:", paste(truncateStringVector(missing), collapse=","))) + } + + #check first, so no restrictions need to be put on names of Parameters. + hasParam <- rep(FALSE, nrow(individuals)) + for (p in Parameters){ + hasParam <- hasParam | !is.na(individuals[[p]]) + } + + if (length(StratificationColumns)>0){ + individuals$Stratum <- apply(individuals[,.SD, .SDcol=StratificationColumns], 1, paste, collapse="/") + } + else{ + individuals$Stratum <- "All" + } + + individuals$SampleId <- individuals$Haul + + stratificationTable <- individuals[!duplicated(paste(individuals$SampleId, individuals$Stratum)), .SD,.SDcol=c("SampleId", "Stratum", StratificationColumns)] + + individuals$Sampled <- hasParam + stratumTotals <- individuals[,list(totalInStratum=get(".N"), sampledInStratum=sum(get("Sampled"))), by=c("Stratum", "SampleId")] + sampleTotals <- individuals[,list(totalInSample=get(".N")), by=c("SampleId")] + stratumFraction <- merge(stratumTotals, sampleTotals, by="SampleId") + stratumFraction$StratumFraction <- stratumFraction$totalInStratum / stratumFraction$totalInSample + individuals <- merge(individuals, stratumFraction, by=c("SampleId", "Stratum")) + individuals$N <- individuals$CatchFractionNumber*individuals$StratumFraction + individuals$n <- individuals$sampledInStratum + + sampleTable <- individuals[!duplicated(paste(individuals$Stratum, individuals$SampleId)), .SD, .SDcol=c("SampleId", "Stratum", "N", "n")] + sampleTable$SelectionMethod <- "FSWOR" + sampleTable$SampleDescription <- as.character(NA) + + selectedIndividuals <- individuals[individuals$Sampled,] + selectedIndividuals$IndividualId <- selectedIndividuals$Individual + selectedIndividuals$Order <- as.numeric(NA) + selectedIndividuals$InclusionProbability <- selectedIndividuals$n/selectedIndividuals$N + selectedIndividuals$HTsamplingWeight <- 1/selectedIndividuals$n + selectedIndividuals$SelectionProbability <- as.numeric(NA) #Need order. Could possibly be obtained by convention from StoxBioticiIndividual$IndividualKey, would have to be user choice. + selectedIndividuals$HHsamplingWeight <- as.numeric(NA) + selectedIndividuals$SelectionDescription <- as.character(NA) + + selectionTable <- selectedIndividuals[,.SD,.SDcol=c("SampleId", "Stratum", "Order", "IndividualId", "InclusionProbability", "HTsamplingWeight", "SelectionProbability", "HHsamplingWeight", "SelectionDescription")] + + designParams <- list() + designParams$SampleTable <- sampleTable + designParams$SelectionTable <- selectionTable + designParams$StratificationVariables <- stratificationTable + + return(designParams) +} + +#' Define Sampling Parameters for Individuals +#' @description +#' Define approximate sampling parameters for the selection of individuals from a haul. Design parameters are inferred from data provided in ~\code{\link[RstoxData]{StoxBioticData}}, +#' and specify how a set of individuals recorded on the Individual table were selected for observation/measurement from a Haul (the table Haul in StoxBioticData). +#' @details +#' StoxBioticData represents sorting of species as a separate level in the hierarchy (SpeciesCategory) and Samples are selected in Stratified from the species categories. +#' This represent sampling stratified on taxons in addition to some additional stratification criteria in the cases where more than one sample is present for +#' a species-category in a Haul. The exact criteria for stratification is not important for the calculation of sampling parameters, but only clearly encoded criteria can be used +#' in subsequent analysis, so sampling parameters are reported stratified only on SpeciesCategory. Any other stratification has been incorporated into selection or inclusion probabilities. +#' +#' Sampling parameters are approximately inferred, assuming that all selected individuals are recorded, and based on some user-controllable assumptions about the selection process, +#' specified by the appropriate 'DefinitionMethod'. +#' +#' Individuals with a non-missing value for any of the parameters in 'Parameters' are treated as selected for observation. +#' In this way selection of individuals may be specified differently for different parameters. +#' For instance one may define one design for length-measurements and another for length-stratified age, weight and sex observations. +#' +#' The available DefinitionMethods specifies how Individuals are selected from a Sample, and are: +#' \describe{ +#' \item{SRS}{Simple Random Selection. Individuals are selected for measurment by simple random selection without replacement from each Sample.} +#' \item{Stratified}{Stratified Selection. Individuals are selected for measurement by stratified random selection without replacement from each Sample. Strata are specified as the combination of columns provided in 'StratificationColumns'. The number of fish in each stratum is estimated by the total in sample and the proportion of measured fish in each stratum.} +#' \item{LengthStratified}{Length stratified selection. Individuals are selected for measurement by stratified random selection without replacement from each Sample. Strata are length groups, specified by the left closed intervals starting with [0,'LengthInterval'>.} +#' } +#' +#' +#' @param processData \code{\link[RstoxFDA]{IndividualSamplingParametersData}} as returned from this function. +#' @param StoxBioticData Data to define individual sampling parameters for +#' @param DefinitionMethod Method to infer sampling parameters, 'SRS', 'Stratified' or 'LengthStratified'. See details. +#' @param Parameters Measurements / observations of individuals included in the design specification. Must all be columns on the Individual-table of StoxBioticData. +#' @param LengthInterval width of length strata in cm. Specifies left closed intervals used for Length Stratified selection (DefinitionMethod 'Stratified'). A value of 5 indicates that observation are selected stratified on length groups [0 cm,5 cm>, [5 cm, 10 cm>, and so on. +#' @param StratificationColumns names of columns in the Individual table of StoxBioticData that identify strata for Stratified selection (DefinitionMethod 'Stratified'). +#' @param UseProcessData If TRUE, bypasses execution of function and returns existing 'processData' +#' @return \code{\link[RstoxFDA]{IndividualSamplingParametersData}} where SampleId refers to the variable 'Haul' on the 'Haul' table in StoxBioticData, and IndividualId refers to the variable 'Individual' on the 'Individual' table of StoxBioticData. +#' @noRd +#' @concept StoX-functions +#' @concept Analytical estimation +#' @md +DefineIndividualSamplingParameters <- function(processData, StoxBioticData, DefinitionMethod=c("SRS", "Stratified", "LengthStratified"), Parameters=c(), LengthInterval=numeric(), StratificationColumns=character(), UseProcessData=FALSE){ + + #May want to expose this option if DefinitionMethods are added that only provides relative selection probabilities. + CollapseStrata=TRUE + + if (UseProcessData){ + return(processData) + } + DefinitionMethod <- checkOptions(DefinitionMethod, "DefinitionMethod", c("SRS", "Stratified", "LengthStratified")) + checkMandatory(StoxBioticData, "StoxBioticData") + checkMandatory(Parameters, "Parameters") + + if (!all(Parameters %in% names(StoxBioticData$Individual))){ + stop("All values for the argument 'Parameters' must be names columns of the Individual table in 'StoxBioticData'") + } + + # + # Perform checks and + # Set stratification columns in accordance with DefinitionMethod + # + + if (DefinitionMethod == "SRS"){ + if (isGiven(LengthInterval) | isGiven(StratificationColumns)){ + stop("The arguments 'LengthInterval' and 'StratificationColumns' should not be provided in combination with DefinitionMethod SRS.") + } + } + + if (DefinitionMethod == "LengthStratified"){ + if (isGiven(StratificationColumns)){ + stop("The argument 'StratificationColumns' should not be provided in combination with DefinitionMethod 'LengthStratified'.") + } + if (!isGiven(LengthInterval)){ + stop("The argument 'LengthInterval' must be provided when DefinitionMethod is 'LengthStratified") + } + if (LengthInterval <=0){ + stop("LengthInterval must be a positive value.") + } + if ("IndividualTotalLength" %in% Parameters){ + stop("'IndividualTotalLength' may not be among the variables in 'Parameters' for length-stratified sampling.") + } + if ("LengthStratum" %in% Parameters){ + stop("'LengthStratum' may not be used as a 'Parameter' with DefinitionMethod 'LengthStratified'. Consider renaming or using the DefinitionMethod 'Stratified'") + } + if (any(is.na(StoxBioticData$Individual$IndividualTotalLength))){ + missing <- StoxBioticData$Individual$Individual[is.na(StoxBioticData$Individual$IndividualTotalLength)] + stop(paste("Cannot specify length stratified selection when some individuals are not measured. Missing IndividualTotalLength for:", paste(truncateStringVector(missing), collapse=","))) + } + + lengthGroups <- seq(0,max(StoxBioticData$Individual$IndividualTotalLength)+LengthInterval,LengthInterval) + StoxBioticData$Individual$LengthStratum <- paste(as.character(cut(StoxBioticData$Individual$IndividualTotalLength, lengthGroups, right=F)), "cm") + StratificationColumns <- c("LengthStratum") + } + + if (DefinitionMethod == "Stratified"){ + if (!isGiven(StratificationColumns)){ + stop("The argument 'StratificationColumns' must be provided when DefinitionMethod is 'Stratified") + } + if (!all(StratificationColumns %in% names(StoxBioticData$Individual))){ + stop("All values for the argument 'StratificationColumns' must be names of columns of the Individual table in 'StoxBioticData'") + } + for (st in StratificationColumns){ + if (any(is.na(StoxBioticData$Individual[[st]]))){ + stop("Cannot specify stratified selection when some individuals are not assigned a stratum. Missing values for:", st) + } + } + reserved_names <- c("Stratum", "SampleId") + if (any(reserved_names %in% StratificationColumns)){ + stop(paste(paste(reserved_names, collapse=","), "are reserved names in IndividualSamplingParametersData and cannot be specified as StratificationColumns")) + } + } + + params <- extractIndividualDesignParametersStoxBiotic(StoxBioticData, StratificationColumns, Parameters) + if (CollapseStrata){ + #SpeciesCategory is added to Stratification in extractIndividualDesignParametersStoxBiotic, and is not retained (not in 'StratificationColumns') + params <- collapseStrataIndividualDesignParamaters(params, StratificationColumns) + } + return(params) + +} + +#' Extend IndividualSamplingParametersData to reflect selection from PSU by specifying intermediate selection. +#' @return \code{\link[RstoxFDA]{IndividualSamplingParametersData}} +#' @noRd +DefineSamplingHierarchy <- function(IndividualSamplingParametersData, Hierarchy=character(), Stratification=character(), StrataSizes=character(), SelectionMetod=character(), CollapseStrata=character()){ + +} + +#' Assign PSU Sampling Parameters +#' @description +#' Assigns data records to PSU Sampling Parameters and provides non-response adjustments for +#' selected PSUs that was not sampled. +#' @details +#' Some sampling parameters provided in ~\code{\link[RstoxFDA]{PSUSamplingParametersData}} are only +#' interpretable for sampling with complete response. This function adjusts these parameters, removes non-respondents from the +#' ~\code{\link[RstoxFDA]{PSUSamplingParametersData}}, and checks that all responding PSUs are present in data records. +#' +#' If any respondants (rows of the SelectionTable of PSUSamplingParametersData that does not have NA for SamplingUnitId) are not +#' found in 'SamplingUnitId', execution halts with an error. +#' +#' Response after selection can generally be considered a process that modifies the sampling parameters that are set by design. +#' Typically sample size, InclusionProbabilities and normalized SamplingWeights need to be adjusted as non-respondents are removed, +#' since these are depend of the entire sample, not just the sampling unit they are assigned to. +#' SelectionProbabilites are by definition set for a single draw of a single sampling unit from the population and are valid even +#' when response is not complete. +#' +#' Treatment of non-response requires some assumption about systematic differences between respondents and non-respondents. +#' These assumptions are specified via the argument 'DefinitionMethod' and the following options are available: +#' \describe{ +#' \item{MissingAtRandom}{A response propensity is estimated for each stratum as the fraction of the sample resonding, and sample size (n) and InclusionProbability are adjusted by multiplying with this propensity. Sampling weights are adjusted by dividing them with their sum over repsondents in a stratum.} +#' } +#' +#' @param PSUSamplingParametersData ~\code{\link[RstoxFDA]{PSUSamplingParametersData}} with sampling parameters for PSU selection +#' @param StoxBioticData ~\code{\link[RstoxData]{StoxBioticData}} with data records for responding PSUs. +#' @param DataRecordsId name of Variable in ~\code{\link[RstoxData]{StoxBioticData}} that represent records of sampled PSUs +#' @param DefinitionMethod The method for dealing with non-response, e.g. 'MissingAtRandon' +#' @return ~\code{\link[RstoxFDA]{PSUSamplingParametersData}} +#' @concept StoX-functions +#' @concept Analytical estimation +#' @md +#' @noRd +AssignPSUSamplingParameters <- function(PSUSamplingParametersData, StoxBioticData, DataRecordsId, DefinitionMethod=c("MissingAtRandom")){ + checkMandatory(PSUSamplingParametersData, "PSUSamplingParametersData") + checkMandatory(StoxBioticData, "StoxBioticData") + checkMandatory(DataRecordsId, "DataRecordsId") + checkOptions(DefinitionMethod, "DefinitionMethod", c("MissingAtRandom")) + + level <- NULL + for (l in names(StoxBioticData)){ + if (DataRecordsId %in% names(StoxBioticData[[l]])){ + level <- l + } + } + if (is.null(level)){ + stop(paste("The variable provided for DataRecordsId (", DataRecordsId,") is not a variable in 'StoxBioticData'"), sep="") + } + + records <- PSUSamplingParametersData$SelectionTable$SamplingUnitId[!is.na(PSUSamplingParametersData$SelectionTable$SamplingUnitId)] + if (!all(records %in% StoxBioticData[[l]][[DataRecordsId]])){ + missing <- records[!(records %in% StoxBioticData[[l]][[DataRecordsId]])] + stop(paste("Records are not found for all sampled PSUs. Missing for the following SamplingUnitIds (", DataRecordsId,"): ", paste(truncateStringVector(missing), collapse=","), sep="")) + } + + if (DefinitionMethod == "MissingAtRandom"){ + responsePropensity <- PSUSamplingParametersData$SelectionTable[,list(ResponsePropensity=sum(!is.na(get("SamplingUnitId")))/get(".N")), by=c("Stratum")] + + PSUSamplingParametersData$SampleTable$n <- PSUSamplingParametersData$SampleTable$n * responsePropensity$ResponsePropensity[match(PSUSamplingParametersData$SampleTable$Stratum, responsePropensity$Stratum)] + + # correct sampling probabilities + PSUSamplingParametersData$SelectionTable$InclusionProbability <- PSUSamplingParametersData$SelectionTable$InclusionProbability * responsePropensity$ResponsePropensity[match(PSUSamplingParametersData$SelectionTable$Stratum, responsePropensity$Stratum)] + + #remove non respondants + PSUSamplingParametersData$SelectionTable <- PSUSamplingParametersData$SelectionTable[!is.na(PSUSamplingParametersData$SelectionTable$SamplingUnitId)] + + #correct normalized sampling weights + weights <- PSUSamplingParametersData$SelectionTable[,list(HHsum=sum(get("HHsamplingWeight")), HTsum=sum(get("HTsamplingWeight"))), by=c("Stratum")] + PSUSamplingParametersData$SelectionTable$HTsamplingWeight <- PSUSamplingParametersData$SelectionTable$HTsamplingWeight / weights$HTsum[match(PSUSamplingParametersData$SelectionTable$Stratum, weights$Stratum)] + PSUSamplingParametersData$SelectionTable$HHsamplingWeight <- PSUSamplingParametersData$SelectionTable$HHsamplingWeight / weights$HHsum[match(PSUSamplingParametersData$SelectionTable$Stratum, weights$Stratum)] + + } + + PSUSamplingParametersData$Assignment$DataRecordsId <- DataRecordsId + return(PSUSamplingParametersData) +} + +AnalyticalPSUEstimate <- function(StoxBioticData, IndividualSamplingParametersData, variables, DomainVariables=character(), IncludeStratumInDomain=FALSE){ + # Estimate totals and means of all variables, and total and mean number in domain, depending on sampling parameters available (only means if only sample weights present) +} + +#' @noRd +AnalyticalPopulationEstimate <- function(StoxBioticData, PSUSamplingParametersData, AnalyticalPSUEstimateData, DomainVariables=character(), MeanOfMeans=F, IncludeStratumInDomain=FALSE){ + + #calculate total and means for all counts and totalvariables in AnalyticalPSUEstimateData. If MeanOfMeans, calculate mean of Means in stead. + + #estimate by stratum + + #add over strata if not stratum is included in domain + +} + +AnalyticalRatioEstimate <- function(AnalyticalPopulationEstimateData, StoxLandingData, DomainVariables){ + + #ratio estimate for total number in domain. Domain variables not in landings are taken to be estimated domain of interest. Additional domain variables are specified in DomainVariables + +} + +#' @noRd +ProbabilisticSuperIndividuals <- function(StoxBioticData, PSUSamplingParametersData, IndividualSamplingParametersData){ + +} diff --git a/R/StoxAnalyticalReportFunction.R b/R/StoxAnalyticalReportFunction.R new file mode 100644 index 00000000..34b8bd54 --- /dev/null +++ b/R/StoxAnalyticalReportFunction.R @@ -0,0 +1,2 @@ +#' @noRd +ReportAnalyticalCatchAtAge <- function(){} \ No newline at end of file diff --git a/R/StoxDataTypes.R b/R/StoxDataTypes.R index c6ca8d0b..0f0231b6 100644 --- a/R/StoxDataTypes.R +++ b/R/StoxDataTypes.R @@ -16,6 +16,257 @@ is.Date <- function(date){ return(FALSE) } +#' PSU Sampling Design Parameters +#' +#' Sampling parameters for selection of Primary Sampling Units +#' +#' @details +#' Encodes information about the selection of Primary Sampling Units in multi-stage sampling, used in analytical design based estimation. +#' Information is encoded in three tables. +#' +#' The SampleTable encodes information about the sample of sampling units: +#' \describe{ +#' \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} +#' \item{N}{Optional, num: The total number of PSUs in Stratum (total available for selection, not total selected)} +#' \item{n}{Optional, num: The number of PSUs selected from the Stratum} +#' \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} +#' \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} +#' } +#' +#' The SelectionTable encodes information abut the selection of sampling units for sampling: +#' \describe{ +#' \item{Stratum}{Mandatory, chr: Identifies the stratum the PSU is taken from.} +#' \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} +#' \item{SamplingUnitId}{Optional, chr: Identifes PSU. NA encodes non-response} +#' \item{InclusionProbability}{Optional, num: The inclusion probability of the PSU} +#' \item{HTsamplingWeight}{Optional, num: The normalized Horvitz-Thompson sampling weight of the PSU} +#' \item{SelectionProbability}{Optional, num: The selection probability of the PSU} +#' \item{HHsamplingWeight}{Optional, num: The normalized Hansen-Hurwitz sampling weight of the PSU} +#' \item{SelectionDescription}{Optional, chr: Free text description of the PSU.} +#' } +#' +#' The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): +#' \describe{ +#' \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} +#' \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} +#' } +#' +#' The Assignment encodes which identifier in sample records (e.g. \code{\link[RstoxData]{StoxBioticData}}) correspond to the SamplingUnitId +#' \describe{ +#' \item{DataRecordsId}{Optional, character. The identifier in data records that correspond to SamplingUnitId} +#' } +#' This field is optional, since SamplingParameters may be subject to processing before they are assigned to data records. +#' +#' Optional columns may be NA. +#' +#' The selection methods available for 'SelectionMethod' are explained here: +#' \describe{ +#' \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} +#' \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} +#' \item{FSWOR}{Fixed sample size without replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection could be specified in the 'selectionTable'} +#' } +#' +#' The SelectionProbability is defined as: The probability of selecting the sampling unit when it was selected from the population. +#' The HHsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sampled unit when estimating with the Hansen-Hurwitz strategy: 1 / (SelectionProbability*Q) , where Q is the sum of the reciprocal of the SelectionProbabilites for the sampled units. For equal probability sampling with replacement, this is simply 1/n, where n i sample size. +#' The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. +#' The HTsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sample when estimating with the Horvitz-Thompson strategy: 1 / (InclusionProbability*P), where P is the sum of the reciprocal of the InclusionProbabilites for the sampled units. For equal probability sampling without replacement, this is simply 1/n, where n is sample size. +#' +#' @name PSUSamplingParametersData +#' @concept Data types +#' @concept Analytical estimation +#' +NULL + +#' Check if table is correctly formatted PSU Sampling Parameters Data +#' @param table \code{\link[RstoxFDA]{PSUSamplingParametersData}} +#' @return validity +#' @concept Data types +#' @noRd +is.PSUSamplingParametersData <- function(PSUSamplingParametersData){ + if (!is.list(PSUSamplingParametersData)){ + return(FALSE) + } + if (!all(sapply(PSUSamplingParametersData, data.table::is.data.table))){ + return(FALSE) + } + if (!all(c("SampleTable", "SelectionTable", "StratificationVariables", "Assignment") %in% names(PSUSamplingParametersData))){ + return(FALSE) + } + if (nrow(PSUSamplingParametersData$Assignment)>1){ + return(FALSE) + } + if (!("DataRecordsId" %in% names(PSUSamplingParametersData$Assignment))){ + return(FALSE) + } + if (!all(c("Stratum", "N", "n", "SelectionMethod", "FrameDescription") %in% names(PSUSamplingParametersData$SampleTable))){ + return(FALSE) + } + if (!all(c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "HHsamplingWeight", "SelectionDescription") %in% names(PSUSamplingParametersData$SelectionTable))){ + return(FALSE) + } + if (!all(c("Stratum") %in% names(PSUSamplingParametersData$StratificationVariables))){ + return(FALSE) + } + if (any(duplicated(PSUSamplingParametersData$SampleTable$Stratum))){ + return(FALSE) + } + #test that mandatory fields are not NA. + if (any(is.na(PSUSamplingParametersData$SampleTable$Stratum))){ + return(FALSE) + } + if (any(is.na(PSUSamplingParametersData$SampleTable$SelectionMethod))){ + return(FALSE) + } + if (any(is.na(PSUSamplingParametersData$SelectionTable$Stratum))){ + return(FALSE) + } + if (any(is.na(PSUSamplingParametersData$StratificationVariables$Stratum))){ + return(FALSE) + } + for (n in names(PSUSamplingParametersData$StratificationVariables)){ + if (any(is.na(PSUSamplingParametersData$StratificationVariables[[n]]))){ + return(FALSE) + } + } + + if (ncol(PSUSamplingParametersData$StratificationVariables) > 1){ + stratificationVariableStrings <- apply(PSUSamplingParametersData$StratificationVariables[,.SD, .SDcol=names(PSUSamplingParametersData$StratificationVariables[names(PSUSamplingParametersData$StratificationVariables)!="Stratum"])], 1, paste, collapse="/") + duplicatedStrata <- PSUSamplingParametersData$StratificationVariables$Stratum[duplicated(stratificationVariableStrings)] + + if (length(duplicatedStrata)>0){ + return(FALSE) + } + } + return(TRUE) +} + +#' Individual Sub-Sampling Design Parameters +#' +#' Sampling parameters for selection of a sub-sample of individuals +#' +#' @details +#' Encodes information about the selection of a sub-sample of observations from individuals, used in analytical design based estimation. +#' A sub-sample is simply a sample of a sample. This data type is intended to represent the final stage of sampling in multi-stage sampling, +#' and therefor has a reference to the Sample it was taken from ('SampleId'). Apart from that there is no principal difference from single +#' stage sampling. All stratification is specified within the sample identifed by 'SampleId', and all sampling probabilites are specified within strata. +#' +#' The SampleTable encodes information about the sample of sampling units: +#' \describe{ +#' \item{SampleId}{Mandatory, chr: Identifies the sample the sub-sample is taken from.} +#' \item{Stratum}{Mandatory, chr: Identifies the within-sample stratum the sub-sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} +#' \item{N}{Optional, num: The total number of individuals in Stratum. For unstratified sampling, the total number of individuals in the sample the sub-sample is taken from.} +#' \item{n}{Optional, num: The number of individuals selected from the Stratum} +#' \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} +#' \item{SampleDescription}{Optional, chr: Free text field describing the sample that is subsampled.} +#' } +#' +#' The SelectionTable encodes information abut the selection of sampling units for sampling: +#' \describe{ +#' \item{SampleId}{Mandatory, chr: Identifies the sample the sub-sample is taken from.} +#' \item{Stratum}{Mandatory, chr: Identifies the within sample-stratum the individual is taken from.} +#' \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} +#' \item{IndividualId}{Optional, chr: Identifes individual. NA encodes non-response / observation failure} +#' \item{InclusionProbability}{Optional, num: The inclusion probability of the individual} +#' \item{HTsamplingWeight}{Optional, num: The normalized Horvitz-Thompson sampling weight of the individual} +#' \item{SelectionProbability}{Optional, num: The selection probability of the individual} +#' \item{HHsamplingWeight}{Optional, num: The normalized Hansen-Hurwitz sampling weight of the individual} +#' \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} +#' } +#' +#' The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): +#' \describe{ +#' \item{SampleId}{Mandatory, chr: Identifies the sample the stratification applies to} +#' \item{Stratum}{Mandatory, chr: Identifies the within-sample stratum. In addition the Stratum is identified by the combination of all other columns on this table.} +#' \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} +#' } +#' +#' Optional columns may be NA. +#' +#' The selection methods available for 'SelectionMethod' are explained here: +#' \describe{ +#' \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} +#' \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} +#' \item{FSWOR}{Fixed sample size without replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} +#' } +#' +#' The SelectionProbability is defined as: The probability of selecting the sampling unit when it was selected from the population. +#' The HHsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sampled unit when estimating with the Hansen-Hurwitz strategy: 1 / (SelectionProbability*Q) , where Q is the sum of the reciprocal of the SelectionProbabilites for the sampled units. For equal probability sampling with replacement, this is simply 1/n, where n i sample size. +#' The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. +#' The HTsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sample when estimating with the Horvitz-Thompson strategy: 1 / (InclusionProbability*P), where P is the sum of the reciprocal of the InclusionProbabilites for the sampled units. For equal probability sampling without replacement, this is simply 1/n, where n is sample size. +#' +#' @name IndividualSamplingParametersData +#' @concept Data types +#' @concept Analytical estimation +#' +NULL + +#' Check if table is correctly formatted Individual Sampling Parameters Data +#' @param table \code{\link[RstoxFDA]{IndividualSamplingParametersData}} +#' @return validity +#' @concept Data types +#' @noRd +is.IndividualSamplingParametersData <- function(IndividualSamplingParametersData){ + if (!is.list(IndividualSamplingParametersData)){ + return(FALSE) + } + if (!all(sapply(IndividualSamplingParametersData, data.table::is.data.table))){ + return(FALSE) + } + if (!all(c("SampleTable", "SelectionTable", "StratificationVariables") %in% names(IndividualSamplingParametersData))){ + return(FALSE) + } + if (!all(c("SampleId", "Stratum", "N", "n", "SelectionMethod", "SampleDescription") %in% names(IndividualSamplingParametersData$SampleTable))){ + return(FALSE) + } + if (!all(c("SampleId", "Stratum", "Order", "IndividualId", "InclusionProbability", "HTsamplingWeight", "SelectionProbability", "HHsamplingWeight", "SelectionDescription") %in% names(IndividualSamplingParametersData$SelectionTable))){ + return(FALSE) + } + if (!all(c("Stratum") %in% names(IndividualSamplingParametersData$StratificationVariables))){ + return(FALSE) + } + if (any(duplicated(paste(IndividualSamplingParametersData$SampleTable$Stratum, IndividualSamplingParametersData$SampleTable$SampleId)))){ + return(FALSE) + } + #test that mandatory fields are not NA. + if (any(is.na(IndividualSamplingParametersData$SampleTable$Stratum))){ + return(FALSE) + } + if (any(is.na(IndividualSamplingParametersData$SampleTable$SampleId))){ + return(FALSE) + } + if (any(is.na(IndividualSamplingParametersData$SampleTable$SelectionMethod))){ + return(FALSE) + } + if (any(is.na(IndividualSamplingParametersData$SelectionTable$Stratum))){ + return(FALSE) + } + if (any(is.na(IndividualSamplingParametersData$SelectionTable$SampleId))){ + return(FALSE) + } + if (any(is.na(IndividualSamplingParametersData$StratificationVariables$Stratum))){ + return(FALSE) + } + if (any(is.na(IndividualSamplingParametersData$StratificationVariables$SampleId))){ + return(FALSE) + } + for (n in names(IndividualSamplingParametersData$StratificationVariables)){ + if (any(is.na(IndividualSamplingParametersData$StratificationVariables[[n]]))){ + return(FALSE) + } + } + + if (ncol(IndividualSamplingParametersData$StratificationVariables) > 2){ + stratificationVariableStrings <- apply(IndividualSamplingParametersData$StratificationVariables[,.SD, .SDcol=names(IndividualSamplingParametersData$StratificationVariables)[!(names(IndividualSamplingParametersData$StratificationVariables) %in% c("Stratum"))]], 1, paste, collapse="/") + duplicatedStrata <- IndividualSamplingParametersData$StratificationVariables$Stratum[duplicated(stratificationVariableStrings)] + + if (length(duplicatedStrata)>0){ + return(FALSE) + } + } + return(TRUE) + +} + #' Trip Partition #' diff --git a/R/auxfunctions.R b/R/auxfunctions.R index 23b01409..66a27c58 100644 --- a/R/auxfunctions.R +++ b/R/auxfunctions.R @@ -130,4 +130,14 @@ deprecationWarning <- function(functionName, deprecationTime, message=NULL){ warningstring <- paste(warningstring, message) } stoxWarning(warningstring) +} + +#' Construct truncated vector of strings with aal elements more than maxsize replaced by a single character "..." +#' @noRd +truncateStringVector <- function(missing, maxsize=5){ + + if (length(missing)>maxsize){ + missing <- c(missing[1:maxsize], "...") + } + return(missing) } \ No newline at end of file diff --git a/README.md b/README.md index d1e8c0ad..1a7e915a 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,10 @@ The preferred way to communicate problems is by raising an issue on the RstoxFDA When reporting bugs, please report the versions your were using of R, RstoxFDA, and operating system. Please also report any error messages, and if possible include instructions for how to reproduce the problem. ## Reca -Reca is a library for estimating total catch at age from commerical catches. RstoxFDA contains functions for adapting data to Reca, running estimates, and plotting or tabulating results. These functions are availble in the StoX user interface. In addition some functions are provided for adapting Reca to other data formats than just the ones supported by Stox. +Reca is a library for estimating total catch at age from commerical catches. RstoxFDA contains functions for adapting data to Reca, running estimates, and plotting or tabulating results. These functions are available in the StoX user interface. In addition some functions are provided for adapting Reca to other data formats than just the ones supported by Stox. RstoxFDA development is otherwise independent of development of Reca, and Reca is only a suggested dependency. That means that RstoxFDA may be available for platforms (operating systems or R-versions) where Reca is not available, and functions dependent on Reca will not work in these cases. Reca is primarily available at: https://github.com/NorskRegnesentral/Reca. - One may also consider installing from the fork at https://github.com/StoXProject/reca or from the StoX repository at https://stoxproject.github.io/repo/, but these resources should be considered experimental, and they are not backed by a maintenance policy: ```r @@ -63,4 +62,11 @@ Currently the latest release/pre-release of RstoxFDA is being tested for the fol * R 4.0 (mac, linux, windows) * R 4.1 (mac, linux, windows) * R 4.2 (mac, linux, windows) -* R 4.3 (windows) +* R 4.3 (mac, linux, windows) + +and is tested with Reca for the following R versions: + +* R 4.0 (mac, linux, windows) +* R 4.1 (mac, linux, windows) +* R 4.2 (mac, linux, windows) +* R 4.3 (linux, windows) \ No newline at end of file diff --git a/data/CatchLotteryExample.rda b/data/CatchLotteryExample.rda new file mode 100644 index 00000000..6784c26c Binary files /dev/null and b/data/CatchLotteryExample.rda differ diff --git a/data/CatchLotterySamplingExample.rda b/data/CatchLotterySamplingExample.rda new file mode 100644 index 00000000..6bfb261d Binary files /dev/null and b/data/CatchLotterySamplingExample.rda differ diff --git a/inst/dataPrepScripts/prepDesignParameters.R b/inst/dataPrepScripts/prepDesignParameters.R new file mode 100644 index 00000000..c289988b --- /dev/null +++ b/inst/dataPrepScripts/prepDesignParameters.R @@ -0,0 +1,129 @@ +# +# Prepares example for catch lotteri design parameters +# + +#' Reads catch lottery parameters from file as exported by lottery system Pr oct 2023. +#' @param filename path to file with lottery parameters +#' @return ~\code{\link[data.table]{data.table}} with colmns: +#' \describe{ +#' \item{aar}{year (integer)} +#' \item{RC}{Radio call signal of vessel (character)} +#' \item{SQ}{serialnumber for message (integer) identifies message given year, vessel and message recipient} +#' \item{TM}{ERS message type (character). HIA means departure message sent to IMR catch lottery (determines target species and therefor inclusion in lottery). HIF means catch message sent to IMR catch lottery, also encoding wheter sample is requested, along with lottery parameters} +#' \item{BD}{Date for start of fishing operation in UTC: YYYYMMDD} +#' \item{BT}{Tome for start of fishing operation in UTC: HHMM} +#' \item{Svar}{Code for whether sample is requested. Code 641 means that a sample is requested. 642 means sample is not requested. 643 means no (more) samples will be requested from this trip (until ned departure message).} +#' \item{i.prob}{Inclusion probability used in sample selection. Assigned also to samples not selected} +#' \item{lotteri}{Identifier for lottery, sampling frame, all inclusion probabilities are conditioned only on catch being in lottery} +#' \item{HIF.stratum}{Any stratification used in setting sampling parameters. Stratification is already factored into inclusion probabilities, some other column that goes into inclusion prob calculation depends on HIF.stratum (e.g. kapasitet)} +#' \item{kvote}{quota / expected total catch, used in calculation of inclusion probabilities} +#' \item{kapasitet}{sampling capacity / expected number of samples, used in calculation of inclusion probabilities} +#' \item{lotteri.kg}{reported catch in kg that was used in calculation of inclusion probability} +#' +#' The fields RC, SQ, TM, BC and BT are defined in the ERS regulation (https://lovdata.no/dokument/SF/forskrift/2009-12-21-1743). +#' } +#' @noRd +parseLotteryFile <- function(filename){ + lotteriparams <- data.table::fread(filename, sep = "\t", dec=".", header = T, colClasses = c("integer", "character", "integer", "character", "character", "character", "character", "numeric", "character", "character", "numeric", "numeric", "numeric")) + return(lotteriparams) +} + +#' Prepares PSU design parameters from catch lottery data and associates them with serial number. +#' @description +#' Vessels are not always compliant about time-zone usage. Catches are therefore associated with the closest lottery message that does not exceed a difference of 'maxDiff' hours. +#' @param lotteryParams lottery parameters as read by parseLotteryFile +#' @param StoxBiotic ~\code{\link[RstoxData]{StoxBioticData}} with hauls sampled by catch lottery +#' @param platformCodes table relating platform codes to other identifiers. As downloaded from the platform table in NMD reference. Contains columns: "Platformnumber", "syscode", "sysname", "Value", "Description", "Deprecated", "Valid from", "Valid to, "New code" +#' @param maxDiff the highest acceptable difference in time (hours) between StoxBiotic station time and lottery BD and BT time. +#' @noRd +prepDesignParamFile <- function(lotteryParams, StoxBiotic, platformCodes, maxDiff=2){ + #partition lottery messages into set where sample was requested and other set + lotteryParamsFiltered <- lotteryParams + lotteryParamsFiltered$lotteryMessage <- paste(lotteryParamsFiltered$RC, lotteryParamsFiltered$BD, lotteryParamsFiltered$BT) + lotteryParamsFiltered$lotteryDateTime <- as.POSIXct(paste(lotteryParamsFiltered$BD,lotteryParamsFiltered$BT, sep=""), format="%Y%m%d%H%M", tz="UTC") + sampleRequested <- lotteryParamsFiltered[lotteryParamsFiltered$Svar=="641",] + sampleNotRequested <- lotteryParamsFiltered[lotteryParamsFiltered$Svar=="642",] + sampleNotRequestedTrip <- lotteryParamsFiltered[lotteryParamsFiltered$Svar=="631",] + + stationTable <- merge(StoxBiotic$Station, StoxBiotic$Haul) + + # annotate biotic data with the RC that was valid at the time of catch. + ITU <- data.table::data.table(platformCodes[platformCodes$sysname == "ITU Call Sign",]) + ITU$RC <- ITU$Value + ITU <- ITU[,.SD,.SDcol=c("Platformnumber", "RC", "Valid to")] + ITU <- merge(ITU, stationTable[,.SD, .SDcol=c("CatchPlatform", "DateTime")], by.x="Platformnumber", by.y="CatchPlatform") + ITU <- ITU[ITU$`Valid to`>=ITU$DateTime,] + ITU <- ITU[order(ITU$`Valid to`, decreasing = T),] + ITU <- ITU[!duplicated(paste(ITU$Platformnumber, ITU$DateTime)),] + + stationTable <- merge(stationTable, ITU[,.SD, .SDcol=c("Platformnumber", "RC", "DateTime")], by.x=c("CatchPlatform", "DateTime"), by.y=c("Platformnumber", "DateTime"), all.x=T) + + if (any(is.na(stationTable$RC))){ + stop("Could not find ITU call signal for all platforms, for the given dates.") + } + + stationTable <- merge(stationTable, sampleRequested, by="RC", all.y=T) + stationTable$timeDiff <- abs(difftime(stationTable$DateTime, stationTable$lotteryDateTime, unit="h")) + stationTable <- stationTable[is.na(stationTable$timeDiff) | stationTable$timeDiff<=maxDiff,] + stationTable <- stationTable[order(stationTable$timeDiff, decreasing=F),] + stationTable <- stationTable[is.na(stationTable$timeDiff) | !duplicated(stationTable$lotteryMessage),] + + if (!all(StoxBiotic$Haul$HaulKey %in% stationTable$HaulKey)){ + missing <- StoxBiotic$Haul[!(StoxBiotic$Haul$HaulKey %in% stationTable$HaulKey),] + warning(paste(nrow(missing), " samples are not requested from lottery and omitted from Design Parameters table.")) + } + + stationTable$description <- paste(stationTable$lotteri, stationTable$HIF.stratum, stationTable$lotteryMessage, sep="/") + stationTable$SelectionProbability <- stationTable$lotteri.kg/(stationTable$kvoteT*1000) + stationTable$HHsamplingWeight <- 1 / (stationTable$SelectionProbability * sum(1/stationTable$SelectionProbability)) + selectionTable <- stationTable[,.SD, .SDcol=c("HIF.stratum", "HaulKey", "i.prob", "SelectionProbability", "HHsamplingWeight", "kapasitet", "description")] + names(selectionTable) <- c("Stratum", "SamplingUnitId", "InclusionProbability", "SelectionProbability", "HHsamplingWeight", "n", "SelectionDescription") + if (length(unique(stationTable$kapasitet))!=1){ + selectionTable$SelectionProbability <- as.numeric(NA) + stationTable$HHsamplingWeight <- as.numeric(NA) + } + selectionTable$Order <- as.numeric(NA) + selectionTable$HTsamplingWeight <- 1 / (selectionTable$InclusionProbability * sum(1/selectionTable$InclusionProbability)) + selectionTable <- selectionTable[, .SD, .SDcol=c("Stratum", "Order", "SamplingUnitId", "InclusionProbability", "HTsamplingWeight", "SelectionProbability", "HHsamplingWeight", "SelectionDescription")] + selectionTable$SelectionDescription <- as.character(NA) #remove vessel identifying descriptions + + stopifnot(length(unique(stationTable$HIF.stratum))==1) + sampleTable <- data.table::data.table(Stratum = stationTable$HIF.stratum[[1]], N = sum(!is.na(lotteryParams$i.prob) & lotteryParams$i.prob>0)) + sampleTable$n <- sum(lotteryParams$Svar=="641") + sampleTable$SelectionMethod <- "Poisson" + stopifnot(length(unique(stationTable$lotteri))==1) + sampleTable$FrameDescription <- stationTable$lotteri[[1]] + + designTable <- list() + designTable$sampleTable <- sampleTable + designTable$selectionTable <- selectionTable + + return(designTable) +} + +#' Save design table +#' @noRd +saveDesignTable <- function(filename, designTable){ + data.table::fwrite(file = filename, x=merge(designTable$sampleTable, designTable$selectionTable), sep = "\t") +} + +lotteryParams <- parseLotteryFile("~/hi_sync/fiskerisampling/fangstprøvelotteri/lotterifiler/example2022.txt") +lotteryParams <- lotteryParams[lotteryParams$lotteri=="Sild2022" & lotteryParams$HIF.stratum=="Nordsjo",] + +bioData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic("~/bioticsets/lotterieksempel/biotic_cruiseNumber_19-2022-20_Silde-sampling_2023-07-06T22.00.19.567Z.xml")) +platformCodes <- readxl::read_excel("~/codelists/NMDeksempler/platform.xlsx", 2) + +designParams <- prepDesignParamFile(lotteryParams, bioData, platformCodes) +designParamsFile <- "inst/testresources/lotteryParameters/lotteryDesignNSH.txt" +saveDesignTable(designParamsFile, designParams) + +#remove potential vessel identifying information +bioData$Station$CatchPlatform <- as.character(NA) +CatchLotteryExample <- bioData +#fix missing catchfractionnumber +filter <- is.na(CatchLotteryExample$Sample$CatchFractionNumber) +CatchLotteryExample$Sample$CatchFractionNumber[filter] <- CatchLotteryExample$Sample$CatchFractionWeight[filter]*CatchLotteryExample$Sample$SampleNumber[filter] / CatchLotteryExample$Sample$SampleWeight[filter] +usethis::use_data(CatchLotteryExample, overwrite = T) + +CatchLotterySamplingExample <- RstoxFDA::DefinePSUSamplingParameters(NULL, "ResourceFile", designParamsFile) +usethis::use_data(CatchLotterySamplingExample, overwrite = T) \ No newline at end of file diff --git a/inst/dataPrepScripts/prepHH_HT_comp.R b/inst/dataPrepScripts/prepHH_HT_comp.R new file mode 100644 index 00000000..2a81c837 --- /dev/null +++ b/inst/dataPrepScripts/prepHH_HT_comp.R @@ -0,0 +1,92 @@ +# +# Contains some quality assurance checks on lottery parameters. Package resources are not prepared here, but in prepDesignParameters.R +# + + +#' Reads catch lottery parameters from file as exported by lottery system Pr oct 2023. +#' @param filename path to file with lottery parameters +#' @return ~\code{\link[data.table]{data.table}} with colmns: +#' \describe{ +#' \item{aar}{year (integer)} +#' \item{RC}{Radio call signal of vessel (character)} +#' \item{SQ}{serialnumber for message (integer) identifies message given year, vessel and message recipient} +#' \item{TM}{ERS message type (character). HIA means departure message sent to IMR catch lottery (determines target species and therefor inclusion in lottery). HIF means catch message sent to IMR catch lottery, also encoding wheter sample is requested, along with lottery parameters} +#' \item{BD}{Date for start of fishing operation in UTC: YYYYMMDD} +#' \item{BT}{Tome for start of fishing operation in UTC: HHMM} +#' \item{Svar}{Code for whether sample is requested. Code 641 means that a sample is requested. 642 means sample is not requested. 643 means no (more) samples will be requested from this trip (until ned departure message).} +#' \item{i.prob}{Inclusion probability used in sample selection. Assigned also to samples not selected} +#' \item{lotteri}{Identifier for lottery, sampling frame, all inclusion probabilities are conditioned only on catch being in lottery} +#' \item{HIF.stratum}{Any stratification used in setting sampling parameters. Stratification is already factored into inclusion probabilities, some other column that goes into inclusion prob calculation depends on HIF.stratum (e.g. kapasitet)} +#' \item{kvote}{quota / expected total catch, used in calculation of inclusion probabilities} +#' \item{kapasitet}{sampling capacity / expected number of samples, used in calculation of inclusion probabilities} +#' \item{lotteri.kg}{reported catch in kg that was used in calculation of inclusion probability} +#' +#' The fields RC, SQ, TM, BC and BT are defined in the ERS regulation (https://lovdata.no/dokument/SF/forskrift/2009-12-21-1743). +#' } +#' @noRd +parseLotteryFile <- function(filename){ + lotteriparams <- data.table::fread(filename, sep = "\t", dec=".", header = T, colClasses = c("integer", "character", "integer", "character", "character", "character", "character", "numeric", "character", "character", "numeric", "numeric", "numeric")) + return(lotteriparams) +} + +est_total <- function(lotteryParams, lottery, stratum){ + + lott <- lotteryParams[lotteryParams$lotteri==lottery & lotteryParams$HIF.stratum==stratum] + stopifnot(length(unique(lott$kvoteT))==1) + + kapasitet <- NA + if (length(unique(lott$kapasitet))==1){ + kapasitet <- lott$kapasitet[1] + } + + totalFrame <- sum(lott$lotteri.kg[!is.na(lott$i.prob) & lott$i.prob>0]) + kvote <- lott$kvoteT[1]*1000 + sample <- lott[lott$Svar=="641",] + dekning <- sum(lott$lotteri.kg[!is.na(lott$i.prob)& lott$i.prob>0]) / kvote + HTtot <- sum(sample$lotteri.kg/sample$i.prob) + HHtot <- mean(sample$lotteri.kg/(sample$lotteri.kg/kvote))*(sum(lott$lotteri.kg, na.rm=T)/kvote) + + result <- data.table::data.table(lotteri=lottery, stratum=stratum, totalFrame=totalFrame, kvote=kvote, HTtot=HTtot, kapasitet=kapasitet, n=nrow(sample), RelErrHTFrame=(HTtot-totalFrame)/totalFrame) + return(result) +} + +lotteryStats <- function(lotteryParams){ + results <- NULL + for (lottery in unique(lotteryParams$lotteri)){ + for (stratum in unique(lotteryParams$HIF.stratum[lotteryParams$lotteri==lottery])){ + stats <- est_total(lotteryParams, lottery, stratum) + results <- rbind(stats, results) + } + } + results <- results[order(results$n, decreasing=T),] + return(results) +} + +simulate <- function(lotteryParams, lottery, stratum, iterations=1000){ + frame <- lotteryParams[lotteryParams$lotteri==lottery & lotteryParams$HIF.stratum==stratum & !is.na(lotteryParams$i.prob) & lotteryParams$i.prob>0, ] + + tab <- NULL + for (it in 1:iterations){ + for (i in 1:nrow(frame)){ + p <- frame$i.prob[i] + if (sample(c(T,F), 1, replace = TRUE,prob=c(p,1-p))){ + frame$Svar[i] <- "641" + } + else{ + frame$Svar[i] <- "642" + } + } + res <- est_total(frame, lottery, stratum) + tab <- rbind(tab, res) + } + + tab$iteration <- 1:iterations + return(tab) +} + +lotteryParams <- parseLotteryFile("~/hi_sync/fiskerisampling/fangstprøvelotteri/lotterifiler/example2022.txt") +lotteryStats <- lotteryStats(lotteryParams) + +#check if the perfomranse for Norsjo is extreme: +nssim<-simulate(lotteryParams, "Sild2022", "Nordsjo") +print(paste("Percentile Sild2002, Nordsjo: ", sum(nssim$RelErrHTFrame<(-.191))*100/nrow(nssim), "%")) diff --git a/inst/testresources/lotteryParameters/lotteryDesignNSH.txt b/inst/testresources/lotteryParameters/lotteryDesignNSH.txt new file mode 100644 index 00000000..28faae08 --- /dev/null +++ b/inst/testresources/lotteryParameters/lotteryDesignNSH.txt @@ -0,0 +1,65 @@ +Stratum N n SelectionMethod FrameDescription Order SamplingUnitId InclusionProbability HTsamplingWeight SelectionProbability HHsamplingWeight SelectionDescription +Nordsjo 790 71 Poisson Sild2022 38401 0.213196915139625 0.00894691694298864 0.00217741935483871 0.00849720338523697 +Nordsjo 790 71 Poisson Sild2022 38433 0.247412405326388 0.00770961783318542 0.00258064516129032 0.00716951535629369 +Nordsjo 790 71 Poisson Sild2022 38440 0.1700984890555 0.0112138273705256 0.00169354838709677 0.010924975781019 +Nordsjo 790 71 Poisson Sild2022 38445 0.108975250145035 0.0175035624118044 0.00104838709677419 0.0176480378001076 +Nordsjo 790 71 Poisson Sild2022 38438 0.641175307024414 0.00297493535911075 0.0092741935483871 0.00199499557740346 +Nordsjo 790 71 Poisson Sild2022 38441 0.233906828422691 0.00815476446377476 0.00241935483870968 0.00764748304671327 +Nordsjo 790 71 Poisson Sild2022 38448 0.267229615158194 0.00713788810842111 0.00282258064516129 0.00655498546861138 +Nordsjo 790 71 Poisson Sild2022 38403 0.0767615148687669 0.0248491069452791 0.000725806451612903 0.0254916101557109 +Nordsjo 790 71 Poisson Sild2022 38435 0.143125007547176 0.0133271964483698 0.00140322580645161 0.0131853155977815 +Nordsjo 790 71 Poisson Sild2022 38436 0.147680162555427 0.012916122648089 0.00145161290322581 0.0127458050778555 +Nordsjo 790 71 Poisson Sild2022 38443 0.0182839435244132 0.104324052943433 0.000167741935483871 0.110300236250672 +Nordsjo 790 71 Poisson Sild2022 38446 0.267229615158194 0.00713788810842111 0.00282258064516129 0.00655498546861138 +Nordsjo 790 71 Poisson Sild2022 38447 0.247412405326388 0.00770961783318542 0.00258064516129032 0.00716951535629369 +Nordsjo 790 71 Poisson Sild2022 38402 0.108975250145035 0.0175035624118044 0.00104838709677419 0.0176480378001076 +Nordsjo 790 71 Poisson Sild2022 38434 0.220161129326957 0.00866390492312064 0.00225806451612903 0.00819373183576422 +Nordsjo 790 71 Poisson Sild2022 38442 0.068529393639208 0.0278341160042645 0.000645161290322581 0.0286780614251748 +Nordsjo 790 71 Poisson Sild2022 38444 0.147680162555427 0.012916122648089 0.00145161290322581 0.0127458050778555 +Nordsjo 790 71 Poisson Sild2022 38437 0.147680162555427 0.012916122648089 0.00145161290322581 0.0127458050778555 +Nordsjo 790 71 Poisson Sild2022 38439 0.311481476435603 0.0061238154964569 0.00338709677419355 0.00546248789050948 +Nordsjo 790 71 Poisson Sild2022 38431 0.0889747087440887 0.0214381718038762 0.000846774193548387 0.0218499515620379 +Nordsjo 790 71 Poisson Sild2022 38415 0.136247006433322 0.0139999779972348 0.00133064516129032 0.0139045146303878 +Nordsjo 790 71 Poisson Sild2022 38406 0.097027756531011 0.0196588601081999 0.00092741935483871 0.0199499557740346 +Nordsjo 790 71 Poisson Sild2022 38417 0.101027788104015 0.0188804993957877 0.000967741935483871 0.0191187076167832 +Nordsjo 790 71 Poisson Sild2022 38419 0.0849215381190344 0.0224613818179075 0.000806451612903226 0.0229424491401398 +Nordsjo 790 71 Poisson Sild2022 38408 0.184718409696934 0.0103262858065158 0.00185483870967742 0.00997497788701731 +Nordsjo 790 71 Poisson Sild2022 38412 0.216686691440281 0.00880282531233035 0.00221774193548387 0.00834270877823266 +Nordsjo 790 71 Poisson Sild2022 38422 0.0518463272604551 0.0367905537970575 0.000483870967741936 0.0382374152335664 +Nordsjo 790 71 Poisson Sild2022 38432 0.162691408611336 0.0117243750517427 0.00161290322580645 0.0114712245700699 +Nordsjo 790 71 Poisson Sild2022 38429 0.338519898362406 0.00563469119978754 0.00375 0.0049338600301376 +Nordsjo 790 71 Poisson Sild2022 38404 0.162691408611336 0.0117243750517427 0.00161290322580645 0.0114712245700699 +Nordsjo 790 71 Poisson Sild2022 38413 0.097027756531011 0.0196588601081999 0.00092741935483871 0.0199499557740346 +Nordsjo 790 71 Poisson Sild2022 38414 0.240689370659604 0.00792496605491253 0.0025 0.00740079004520639 +Nordsjo 790 71 Poisson Sild2022 38405 0.267229615158194 0.00713788810842111 0.00282258064516129 0.00655498546861138 +Nordsjo 790 71 Poisson Sild2022 38411 0.458713264018625 0.00415827324360556 0.00556451612903226 0.00332499262900577 +Nordsjo 790 71 Poisson Sild2022 38416 0.116853084910627 0.0163235321832924 0.00112903225806452 0.0163874636715284 +Nordsjo 790 71 Poisson Sild2022 38427 0.589628784636336 0.00323501013172564 0.00806451612903226 0.00229424491401398 +Nordsjo 790 71 Poisson Sild2022 38409 0.162691408611336 0.0117243750517427 0.00161290322580645 0.0114712245700699 +Nordsjo 790 71 Poisson Sild2022 38424 0.347296770955766 0.00549229146878123 0.00387096774193548 0.0047796769041958 +Nordsjo 790 71 Poisson Sild2022 38421 0.116853084910627 0.0163235321832924 0.00112903225806452 0.0163874636715284 +Nordsjo 790 71 Poisson Sild2022 38430 0.116853084910627 0.0163235321832924 0.00112903225806452 0.0163874636715284 +Nordsjo 790 71 Poisson Sild2022 38426 0.233906828422691 0.00815476446377476 0.00241935483870968 0.00764748304671327 +Nordsjo 790 71 Poisson Sild2022 38407 0.068529393639208 0.0278341160042645 0.000645161290322581 0.0286780614251748 +Nordsjo 790 71 Poisson Sild2022 38418 0.229685923270579 0.00830462339656995 0.00236952419354839 0.00780830816726504 +Nordsjo 790 71 Poisson Sild2022 38428 0.143125007547176 0.0133271964483698 0.00140322580645161 0.0131853155977815 +Nordsjo 790 71 Poisson Sild2022 38420 0.0425447856855458 0.044834051024581 0.000395161290322581 0.0468213247757956 +Nordsjo 790 71 Poisson Sild2022 38423 0.463520590962966 0.00411514640221889 0.00564516129032258 0.00327749273430569 +Nordsjo 790 71 Poisson Sild2022 38410 0.267034025829916 0.00714311626141064 0.00282016129032258 0.00656060884762363 +Nordsjo 790 71 Poisson Sild2022 38425 0.0930100890906527 0.0205080449971025 0.000887096774193548 0.0208567719455817 +Nordsjo 790 71 Poisson Sild2022 0.199083079634323 0.00958120145498676 0.00201612903225806 0.00917697965605593 +Nordsjo 790 71 Poisson Sild2022 0.1700984890555 0.0112138273705256 0.00169354838709677 0.010924975781019 +Nordsjo 790 71 Poisson Sild2022 0.1700984890555 0.0112138273705256 0.00169354838709677 0.010924975781019 +Nordsjo 790 71 Poisson Sild2022 0.174511532419619 0.0109302523782158 0.00174193548387097 0.0106215042315462 +Nordsjo 790 71 Poisson Sild2022 0.202634883401628 0.00941326123239596 0.00205645161290323 0.0089970388784862 +Nordsjo 790 71 Poisson Sild2022 0.068529393639208 0.0278341160042645 0.000645161290322581 0.0286780614251748 +Nordsjo 790 71 Poisson Sild2022 0.132402285228299 0.0144065118586634 0.00129032258064516 0.0143390307125874 +Nordsjo 790 71 Poisson Sild2022 0.101027788104015 0.0188804993957877 0.000967741935483871 0.0191187076167832 +Nordsjo 790 71 Poisson Sild2022 0.0808504995083754 0.0235923723892148 0.000766129032258065 0.0241499464633051 +Nordsjo 790 71 Poisson Sild2022 0.140074844132902 0.0136173993557747 0.00137096774193548 0.0134955583177293 +Nordsjo 790 71 Poisson Sild2022 0.0552064585338686 0.0345513032879191 0.000516129032258065 0.0358475767814685 +Nordsjo 790 71 Poisson Sild2022 0.112922833036567 0.0168916687702827 0.00108870967741935 0.0169944067704739 +Nordsjo 790 71 Poisson Sild2022 0.386753608156008 0.00493196456873441 0.00443548387096774 0.00417135438911633 +Nordsjo 790 71 Poisson Sild2022 0.413480846839946 0.00461316432631274 0.00483870967741936 0.00382374152335664 +Nordsjo 790 71 Poisson Sild2022 0.247412405326388 0.00770961783318542 0.00258064516129032 0.00716951535629369 +Nordsjo 790 71 Poisson Sild2022 0.0518463272604551 0.0367905537970575 0.000483870967741936 0.0382374152335664 diff --git a/inst/tinytest/test-ConvergenceAnalysis.R b/inst/tinytest/test-ConvergenceAnalysis.R new file mode 100644 index 00000000..0f9d5d5e --- /dev/null +++ b/inst/tinytest/test-ConvergenceAnalysis.R @@ -0,0 +1,45 @@ +# ECA tests are only run if Reca is installed. +if (nchar(system.file(package="Reca"))>0){ +#context("Test StoxReportFunctions: ReportRecaParameterStatistics") +StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") +StoxLandingData <- readRDS(StoxLandingFile) +StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") +StoxBioticData <- readRDS(StoxBioticFile) +StoxBioticData$Individual <- StoxBioticData$Individual[StoxBioticData$Individual$IndividualAge<4,] +StoxBioticData$Haul$Gear <- StoxLandingData$Landing$Gear[c(1:20, 1:20, 1:5)] +StoxBioticData$Station$Area <- StoxLandingData$Landing$Area[c(1:20, 1:20, 1:5)] +prep <- RstoxFDA::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Gear", "Area"), CellEffect = c("All"), MinAge = 2, MaxAge = 3) + +fpath1 <- RstoxFDA:::makeTempDirReca("chain1") +fpath2 <- RstoxFDA:::makeTempDirReca("chain2") +fpath3 <- RstoxFDA:::makeTempDirReca("chain3") + +paramOut1 <- RstoxFDA::ParameterizeRecaModels(prep, 10, 50, 1, ResultDirectory = fpath1) +paramOut2 <- RstoxFDA::ParameterizeRecaModels(prep, 10, 50, 1, ResultDirectory = fpath2) + +paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1) +paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut2, paramSummary, AppendReport = TRUE) +expect_true(RstoxFDA::is.ParameterizationSummaryData(paramSummary)) + +RstoxFDA:::removeTempDirReca(fpath1) +RstoxFDA:::removeTempDirReca(fpath2) +RstoxFDA:::removeTempDirReca(fpath3) + +convergence <- RstoxFDA::ReportParameterConvergence(paramSummary) + +expect_true(RstoxFDA::is.ParameterConvergenceData(convergence)) +expect_true(nrow(convergence$ConvergenceReport) < 433) +expect_true(nrow(convergence$ConvergenceReport) > 0) + +#construct three identical chains, should signal convergence +paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1) +paramOut1$GlobalParameters$GlobalParameters$resultdir="B" +paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1, paramSummary, AppendReport = T) +paramOut1$GlobalParameters$GlobalParameters$resultdir="C" +paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1, paramSummary, AppendReport = T) + +#context("Check Gelman-Rubin for equal chains") +convergence <- RstoxFDA::ReportParameterConvergence(paramSummary, Tolerance = 0) +expect_equal(nrow(convergence$ConvergenceReport), 433) +expect_true(all(abs(convergence$ConvergenceReport$GelmanRubinR-1)<.1)) +} diff --git a/inst/tinytest/test-RecaFormatChecks.R b/inst/tinytest/test-RecaFormatChecks.R index b3c09efb..3421f2d9 100644 --- a/inst/tinytest/test-RecaFormatChecks.R +++ b/inst/tinytest/test-RecaFormatChecks.R @@ -1,3 +1,8 @@ +# ECA tests are not run for platforms where Reca is not available from StoX repositories. +# ECA tests are only run if Reca is installed. + +if (nchar(system.file(package="Reca"))>0){ + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") StoxLandingData <- readRDS(StoxLandingFile) StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") @@ -145,4 +150,4 @@ errorPrep$GlobalParameters$delta.age <- .01 errorPrep$GlobalParameters$lgamodel <- "non-linear" expect_error(RstoxFDA:::sanitizeRecaInput(GlobalParameters=errorPrep$GlobalParameters, AgeLength=errorPrep$AgeLength, WeightLength=errorPrep$WeightLength, stage="parameterize"), "Some required global parameters are NA: thin") - +} diff --git a/inst/tinytest/test-RecaWrap.R b/inst/tinytest/test-RecaWrap.R index c9283512..8ad5129f 100644 --- a/inst/tinytest/test-RecaWrap.R +++ b/inst/tinytest/test-RecaWrap.R @@ -1,4 +1,7 @@ +# ECA tests are only run if Reca is installed. +if (nchar(system.file(package="Reca"))>0){ + fishdata <- data.table::as.data.table(readRDS(system.file(package = "RstoxFDA", "testresources", "fishdata.rda"))) landings <- data.table::as.data.table(readRDS(system.file(package = "RstoxFDA", "testresources", "landings.rda"))) @@ -237,3 +240,5 @@ expect_true(!is.null(recaObj$AgeLength$CovariateMatrix$constant)) expect_true(!is.null(recaObj$WeightLength$CovariateMatrix$constant)) expect_true("constant" %in% rownames(recaObj$AgeLength$info)) expect_true("constant" %in% rownames(recaObj$WeightLength$info)) + +} diff --git a/inst/tinytest/test-StoxAnalysisFunctions.R b/inst/tinytest/test-StoxAnalysisFunctions.R index ff4787a6..eb7cbad4 100644 --- a/inst/tinytest/test-StoxAnalysisFunctions.R +++ b/inst/tinytest/test-StoxAnalysisFunctions.R @@ -1,357 +1,362 @@ -#context("Test ParameterizeRecaModels cache") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -StoxBioticDataWDupl <- StoxBioticData -StoxBioticDataWDupl$Station <- rbind(StoxBioticDataWDupl$Station, StoxBioticDataWDupl$Station) -expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticDataWDupl, StoxLandingData, FixedEffects = c(), RandomEffects = c()), "Malformed StoxBioticData.") - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) - -#test wrong groupingvariables -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) -fpath <- RstoxFDA:::makeTempDirReca() -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99) -expect_error(pred <- RstoxFDA::RunRecaModels(paramOut, StoxLandingData,GroupingVariables = c("")), "All 'GroupingVariables' must be column in 'StoxLandingData', the following are not: ") -RstoxFDA:::removeTempDirReca(fpath) - -#test non-linear setting -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) -fpath <- RstoxFDA:::makeTempDirReca() -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99, Lgamodel = "log-linear") -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99, Lgamodel = "non-linear") -prednl <- RstoxFDA::RunRecaModels(paramOut, StoxLandingData) -RstoxFDA:::removeTempDirReca(fpath) - -fpath <- RstoxFDA:::makeTempDirReca() -# check that seed works -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99) -paramOut2 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed = paramOut$GlobalParameters$GlobalParameters$seed) -paramOut3 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed = paramOut$GlobalParameters$GlobalParameters$seed+1) - -expect_equal(paramOut2$FitProportionAtAge, paramOut$FitProportionAtAge) -expect_equal(paramOut2$FitLengthGivenAge, paramOut$FitLengthGivenAge) -expect_equal(paramOut2$FitWeightGivenLength, paramOut$FitWeightGivenLength) -expect_true(!all(paramOut$FitWeightGivenLength$fish$tau_Intercept == paramOut3$FitWeightGivenLength$fish$tau_Intercept)) - - -# check that cache works -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=155) -expect_warning(paramOut2 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=155, UseCache=T), "Using cached data for ParameterizeRecaModels") -expect_true(identical(paramOut, paramOut2)) -# check that cache fails when arguments are changed -expect_error(RstoxFDA:::ParameterizeRecaModels(prep, 10, 51, 1, fpath, Seed=155, UseCache=T), "Arguments or data are not identical to cached run. Re-run with UseCache=FALSE.") -# check that cache fails when data are changed -prep2 <- prep -prep2$AgeLength$DataMatrix$lengthCM[1]<-5 -expect_error(RstoxFDA:::ParameterizeRecaModels(prep2, 10, 50, 1, fpath, Seed=155, UseCache=T), "Arguments or data are not identical to cached run. Re-run with UseCache=FALSE.") -# check that failed runs didnt touch cache files -expect_warning(paramOut3 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=155, UseCache=T), "Using cached data for ParameterizeRecaModels") -expect_true(identical(paramOut, paramOut2)) -# check that new run overwrites cahce files -paramOut4 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 52, 1, fpath, Seed=156) -expect_warning(paramOut5 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 52, 1, fpath, Seed=156, UseCache=T), "Using cached data for ParameterizeRecaModels") -expect_true(identical(paramOut4, paramOut5)) -RstoxFDA:::removeTempDirReca(fpath) - -fpath <- RstoxFDA:::makeTempDirReca() -# check that halts with error when no cache is found -expect_error(RstoxFDA:::ParameterizeRecaModels(prep, 10, 52, 1, fpath, Seed=156, UseCache=T), "No cached input found. Re-run with UseCache=FALSE.") -RstoxFDA:::removeTempDirReca(fpath) - -#context("PrepRecaEstimate: Missing values warnings") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -StoxBioticData$Cruise$Cruise[1] <- NA -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Cruise")))) -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Station$CatchPlatform[1] <- NA -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("CatchPlatform")))) -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Haul$Gear[1] <- NA -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("Gear"), RandomEffects = c()))) -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Sample$CatchFractionNumber[1] <- NA -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("CatchFractionNumber"), RandomEffects = c()))) -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Individual$IndividualTotalLength[1] <- NA -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()))) -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Station$DateTime[1] <- NA -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()),"Cannot proceed with missing values for Reca-effects")) - -#context("PrepRecaEstimate: Missing cell warnings") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) -StoxBioticData$Station$Area <- NA -StoxBioticData$Station$Area <- c(StoxLandingData$Landing$Area[1:20], StoxLandingData$Landing$Area[1:20], StoxLandingData$Landing$Area[1:5]) -expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("Gear", "Area"), RandomEffects = c()))) - -#context("PrepRecaEstimate: StockSplitting") -manual <- RstoxFDA:::DefineStockSplittingParameters(DefinitionMethod = "FunctionParameters", - StockNameCC="S1", StockNameS="S2", ProbabilityType1As1=.8, - ProbabilityType1As5=.2, ProbabilityType2As2=.6, - ProbabilityType2As4=.4, ProbabilityType4As2=.4, - ProbabilityType4As4=.6, ProbabilityType5As1=.2, - ProbabilityType5As5=.8) -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Individual$otolithtype <- c(rep(c(1,5), 1045), c(1,5,1)) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseStockSplitting=T, UseStockSplittingError=T, StockSplittingParameters=manual) -expect_true(prep$GlobalParameters$GlobalParameters$CC) -expect_true(prep$GlobalParameters$GlobalParameters$CCerror) -expect_true(RstoxFDA:::is.StockSplittingParameters(prep$AgeLength$StockSplittingParameters)) -expect_true(is.null(prep$AgeLength$CCerrorList)) -fpath <- RstoxFDA:::makeTempDirReca() -#make sure it works with trailing "/" on path -pathWtrailing <- paste0(fpath, "/") -param <- RstoxFDA:::ParameterizeRecaModels(prep, 100, 400, ResultDirectory = pathWtrailing, Seed = 100) - -pathWsubDir <- file.path(fpath, "subdir") -param <- RstoxFDA:::ParameterizeRecaModels(prep, 100, 400, ResultDirectory = pathWsubDir, Seed = 100) - -#context("check that age group names are set correct for stock splitting") -expect_equal(sum(is.na(param$FitProportionAtAge$constant$Age)), 0) -expect_equal(param$FitProportionAtAge$constant$Age[1], "S1 2") - - -#context("Check that back conversion to eca objects works fine with stock splitting") -ecafit <- RstoxFDA:::stox2recaFit(param) - -result <- RstoxFDA:::RunRecaModels(param, StoxLandingData = StoxLandingData) - -expect_true("Stock" %in% result$GroupingVariables$GroupingVariables) -resultAgg <- RstoxFDA:::RunRecaModels(param, StoxLandingData = StoxLandingData, GroupingVariables = c("Gear")) -expect_true(all(c("Stock", "Gear") %in% resultAgg$GroupingVariables$GroupingVariables)) - -RstoxFDA:::removeTempDirReca(fpath) -expect_true(RstoxFDA:::is.StockSplittingParameters(param$AgeLength$StockSplittingParameters)) -expect_true(is.null(prep$AgeLength$CCerrorList)) -expect_equal(param$AgeLength$StockSplittingParameters, manual) -expect_true(RstoxFDA::is.RecaCatchAtAge(result)) -expect_true("Stock" %in% names(result$CatchAtAge)) -expect_true("Stock" %in% names(result$MeanLength)) -expect_true("Stock" %in% names(result$MeanWeight)) - -#stock splitting w warning -StoxBioticData$Individual$otolithtype[1] <- 9 -expect_warning(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseStockSplitting=T, UseStockSplittingError=T, StockSplittingParameters=manual), "StoX: Some aged fish does not have Otolithtype set, or have it set to an unrecognized value. This may slow down Stox processing of Reca results.") -StoxBioticData$Individual$IndividualAge[1] <- NA -expect_silent(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseStockSplitting=T, UseStockSplittingError=T, StockSplittingParameters=manual)) - -#context("PrepRecaEstimate: AgerrorMatrix") -ageerorfile <- system.file("testresources","AgeErrorHirstEtAl2012.txt", package="RstoxFDA") -ageerror <- RstoxFDA::DefineAgeErrorMatrix(FileName = ageerorfile) -expect_true(RstoxFDA::is.AgeErrorMatrix(ageerror)) -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseAgingError = T, AgeErrorMatrix = ageerror, MinAge = 0, MaxAge = 14) -expect_true(!is.null(prep$AgeLength$AgeErrorMatrix)) -expect_warning(est <- RstoxFDA::RunRecaEstimate(prep, 10, 50, Seed = 99)) - -#context("PrepareRecaEstimate: configuration tests") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -StoxLandingData$Landing$NewConst <- 1 -StoxBioticData$Station$NewConst <- 1 - - -expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("NewConst"), RandomEffects = c()), "Only one level for categorical covariate NewConst") -expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("NewConst")), "Only one level for categorical covariate NewConst") - -StoxBioticData$Station$Area <- StoxLandingData$Landing$Area[c(7,8,13,4,3,4,11,20,4,5,6,20,4,12,3,3,10,4,1,20,11,5,11,5,15,8,14,7,10,6,13,16,11,14,19,20,2,19,11,16,15,5,11,11,9)] -expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("Area"), RandomEffects = c("Area")), "Some random effects are also specified as fixed effects: Area") -expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area"), CarEffect = "Area"), "UseCarEffect is False, while the parameter 'CarEffect' is given") -expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area"), CarEffect = "Area", UseCarEffect = T, CarNeighbours = list()), "The CAR effect Area is also specified as fixed effect or random effect") - -#check CAR value cehcks -carfile <- system.file("testresources","mainarea_neighbour_correct_codes.txt", package="RstoxFDA") -car <- RstoxFDA::DefineCarNeighbours(NULL, FileName = carfile) -expect_true(RstoxFDA::is.CarNeighbours(car)) -car$Neighbours[9] <- paste(car$Neighbours[9], "30", sep=",") -car$Neighbours[29] <- paste(car$Neighbours[29], "08", sep=",") -prepCar <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), CarEffect = "Area", UseCarEffect = T, CarNeighbours = car) - -fpath <- RstoxFDA:::makeTempDirReca() -paramOut <- RstoxFDA:::ParameterizeRecaModels(prepCar, 10, 50, 1, fpath, Seed=42) -result <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) -RstoxFDA:::removeTempDirReca(fpath) -expect_true(RstoxFDA::is.RecaParameterData(paramOut)) -expect_true(RstoxFDA::is.RecaCatchAtAge(result)) - - -#context("ParameterizeRecaModels: simple case") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) - -fpath <- RstoxFDA:::makeTempDirReca() -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=43) - -expect_true(c("FitLengthGivenAge") %in% names(paramOut)) -expect_equal(length(paramOut$FitLengthGivenAge), 4) -expect_true(RstoxFDA::is.RecaParameterData((paramOut))) - -#context("test-StoxAnalysisFunctions: RunRecaModels") -results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) -expect_true("Age" %in% names(results$CatchAtAge)) -expect_true(RstoxFDA::is.RecaCatchAtAge(results)) - -#context("test-StoxAnalysisFunctions: RunRecaModels with GroupingVariables") -results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = c("Area", "Usage")) -expect_equal(length(unique(paste(results$CatchAtAge$Area, results$CatchAtAge$Usage))), length(unique(paste(StoxLandingData$Landing$Area, StoxLandingData$Landing$Usage)))) -expect_true(RstoxFDA::is.RecaCatchAtAge(results)) -expect_warning(RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = c("Area", "Usage"), CollapseLength = F), "StoX: Producing estimates for all length groups in combination with age and several 'GroupingVariables'. This may exhaust memory, consider the option 'CollapseLength'") -RstoxFDA:::removeTempDirReca(fpath) - -#context("test-StoxAnalysisFunctions: RunRecaModels with random effects in landings") - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Haul$Gear <- StoxLandingData$Landing$Gear[sample.int(20,45, replace=T)] - -fpath <- RstoxFDA:::makeTempDirReca() - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Gear")) - -#check that seed works -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=111) -seed <- sample.int(.Machine$integer.max, 1) -results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) -paramOut2 <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=paramOut$GlobalParameters$GlobalParameters$seed) -results2 <- RstoxFDA:::RunRecaModels(paramOut2, StoxLandingData) -paramOut3 <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=paramOut$GlobalParameters$GlobalParameters$seed+1) -results3 <- RstoxFDA:::RunRecaModels(paramOut3, StoxLandingData) -expect_equal(results2, results) -expect_true(!all(results$CatchAtAge == results3$CatchAtAge)) -#/ seed - -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=100) -results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) - -expect_true("Gear" %in% names(paramOut$Landings$AgeLengthCov)) -expect_true("Age" %in% names(results$CatchAtAge)) -expect_true(RstoxFDA::is.RecaCatchAtAge(results)) - -paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=44) -results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = "Gear") -expect_true("Gear" %in% names(paramOut$Landings$AgeLengthCov)) -expect_true("Age" %in% names(results$CatchAtAge)) -expect_true(RstoxFDA::is.RecaCatchAtAge(results)) - -#context("RunRecaModels: Test collapse Length") -resultsWlength <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = "Gear", CollapseLength = F) -expect_true("Age" %in% names(resultsWlength$CatchAtAge)) -expect_true(RstoxFDA::is.RecaCatchAtAge(resultsWlength)) -expect_equal(nrow(results$CatchAtAge)*2*results$CatchAtAge$Length[1], nrow(resultsWlength$CatchAtAge)) -expect_equal(length(unique(results$CatchAtAge$Length)),1) -expect_true(length(unique(resultsWlength$CatchAtAge$Length))> 1) - -#context("RunRecaModels: Test collapse Length wo Aggregation") -results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) -expect_true("Age" %in% names(results$CatchAtAge)) -expect_true(RstoxFDA::is.RecaCatchAtAge(results)) - -resultsWlength <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, CollapseLength = F) -expect_true("Age" %in% names(resultsWlength$CatchAtAge)) -expect_true(RstoxFDA::is.RecaCatchAtAge(resultsWlength)) -expect_equal(nrow(results$CatchAtAge)*2*results$CatchAtAge$Length[1], nrow(resultsWlength$CatchAtAge)) -expect_equal(length(unique(results$CatchAtAge$Length)),1) -expect_true(length(unique(resultsWlength$CatchAtAge$Length))> 1) - - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate missing arguments") -expect_error(RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, ResultDirectory = NULL), "Argument 'ResultDirectory' must be provided.") - -RstoxFDA:::removeTempDirReca(fpath) - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate simple case") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) -expect_equal(length(prep$CovariateMaps$CovariateMaps_randomEffects_AgeLength_catchSample$values), length(unique(StoxBioticData$Individual$HaulKey))) - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), MinAge=1, MaxAge=30) - -#context("test-StoxAnalysisFunctions: RunRecaEstimate simple case") -expect_warning(result <- RstoxFDA::RunRecaEstimate(prep, 10, 50, Thin=1, Seed = 42)) -expect_true(all(c("input", "fit", "prediction", "covariateMaps") %in% names(result))) -expect_equal(dim(result$prediction$TotalCount)[3], 10) - - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate, missing sample dates") -StoxBioticData$Station$DateTime[1] <- NA -expect_error(suppressWarnings(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()))) - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate, stratified samples (nFish), missing CatchFractionNumber") -StoxBioticDataDelp <- readRDS(system.file("testresources","StoxBioticDelpr.rds", package="RstoxFDA")) -expect_error(suppressWarnings(RstoxFDA:::PrepareRecaEstimate(StoxBioticDataDelp, StoxLandingData, FixedEffects = c(), RandomEffects = c()))) - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate, stratified samples (nFish)") -StoxBioticDataDelp$Sample$CatchFractionNumber[2] <- 3000 -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticDataDelp, StoxLandingData, FixedEffects = c(), RandomEffects = c()) - -##context("test-StoxAnalysisFunctions: RunRecaEstimate, stratified samples (nFish)") -#to few iterations to converge consistently. removing test -#est <- RunRecaEstimate(prep, 10, 200, 0) - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate with with random effect Area") -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) - -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) - -StoxBioticData$Station$Area <- c(rep(StoxLandingData$Landing$Area[10], 20), rep(StoxLandingData$Landing$Area[20], 25)) -StoxBioticData$Station$GG <- c(rep(StoxLandingData$Landing$Gear[10], 20), rep(StoxLandingData$Landing$Gear[20], 25)) -StoxLandingData$Landing$GG <- StoxLandingData$Landing$Gear - -prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area")) -expect_true("Area" %in% names(prep$Landings$AgeLengthCov)) - -#context("test-StoxAnalysisFunctions: PrepareRecaEstimate cellEffect") -prepCell <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area", "GG"), CellEffect = "All") -expect_equal(prepCell$AgeLength$info$interaction[prepCell$AgeLength$info$covariate=="Area"], 1) -expect_equal(prepCell$AgeLength$info$interaction[prepCell$AgeLength$info$covariate=="GG"], 1) - -fpath <- RstoxFDA:::makeTempDirReca() -paramOut <- RstoxFDA:::ParameterizeRecaModels(prepCell, 10, 50, 1, fpath, Seed = 451) -expect_true("cell" %in% names(paramOut$FitProportionAtAge)) - -RstoxFDA:::removeTempDirReca(fpath) - -#context("test-StoxAnalysisFunctions: RunRecaEstimate with random effect Area") -expect_warning(est <- RstoxFDA::RunRecaEstimate(prep, 10, 100, 0, Seed = 112)) -expect_true("Area" %in% names(est$fit$ProportionAtAge$Intercept$cov)) +# Tests StoX analysis functions that interfaces Reca. +# ECA tests are only run if Reca is installed. + +if (nchar(system.file(package="Reca"))>0){ + #context("Test ParameterizeRecaModels cache") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + StoxBioticDataWDupl <- StoxBioticData + StoxBioticDataWDupl$Station <- rbind(StoxBioticDataWDupl$Station, StoxBioticDataWDupl$Station) + expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticDataWDupl, StoxLandingData, FixedEffects = c(), RandomEffects = c()), "Malformed StoxBioticData.") + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) + + #test wrong groupingvariables + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) + fpath <- RstoxFDA:::makeTempDirReca() + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99) + expect_error(pred <- RstoxFDA::RunRecaModels(paramOut, StoxLandingData,GroupingVariables = c("")), "All 'GroupingVariables' must be column in 'StoxLandingData', the following are not: ") + RstoxFDA:::removeTempDirReca(fpath) + + #test non-linear setting + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) + fpath <- RstoxFDA:::makeTempDirReca() + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99, Lgamodel = "log-linear") + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99, Lgamodel = "non-linear") + prednl <- RstoxFDA::RunRecaModels(paramOut, StoxLandingData) + RstoxFDA:::removeTempDirReca(fpath) + + fpath <- RstoxFDA:::makeTempDirReca() + # check that seed works + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=99) + paramOut2 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed = paramOut$GlobalParameters$GlobalParameters$seed) + paramOut3 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed = paramOut$GlobalParameters$GlobalParameters$seed+1) + + expect_equal(paramOut2$FitProportionAtAge, paramOut$FitProportionAtAge) + expect_equal(paramOut2$FitLengthGivenAge, paramOut$FitLengthGivenAge) + expect_equal(paramOut2$FitWeightGivenLength, paramOut$FitWeightGivenLength) + expect_true(!all(paramOut$FitWeightGivenLength$fish$tau_Intercept == paramOut3$FitWeightGivenLength$fish$tau_Intercept)) + + + # check that cache works + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=155) + expect_warning(paramOut2 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=155, UseCache=T), "Using cached data for ParameterizeRecaModels") + expect_true(identical(paramOut, paramOut2)) + # check that cache fails when arguments are changed + expect_error(RstoxFDA:::ParameterizeRecaModels(prep, 10, 51, 1, fpath, Seed=155, UseCache=T), "Arguments or data are not identical to cached run. Re-run with UseCache=FALSE.") + # check that cache fails when data are changed + prep2 <- prep + prep2$AgeLength$DataMatrix$lengthCM[1]<-5 + expect_error(RstoxFDA:::ParameterizeRecaModels(prep2, 10, 50, 1, fpath, Seed=155, UseCache=T), "Arguments or data are not identical to cached run. Re-run with UseCache=FALSE.") + # check that failed runs didnt touch cache files + expect_warning(paramOut3 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=155, UseCache=T), "Using cached data for ParameterizeRecaModels") + expect_true(identical(paramOut, paramOut2)) + # check that new run overwrites cahce files + paramOut4 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 52, 1, fpath, Seed=156) + expect_warning(paramOut5 <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 52, 1, fpath, Seed=156, UseCache=T), "Using cached data for ParameterizeRecaModels") + expect_true(identical(paramOut4, paramOut5)) + RstoxFDA:::removeTempDirReca(fpath) + + fpath <- RstoxFDA:::makeTempDirReca() + # check that halts with error when no cache is found + expect_error(RstoxFDA:::ParameterizeRecaModels(prep, 10, 52, 1, fpath, Seed=156, UseCache=T), "No cached input found. Re-run with UseCache=FALSE.") + RstoxFDA:::removeTempDirReca(fpath) + + #context("PrepRecaEstimate: Missing values warnings") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + StoxBioticData$Cruise$Cruise[1] <- NA + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Cruise")))) + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Station$CatchPlatform[1] <- NA + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("CatchPlatform")))) + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Haul$Gear[1] <- NA + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("Gear"), RandomEffects = c()))) + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Sample$CatchFractionNumber[1] <- NA + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("CatchFractionNumber"), RandomEffects = c()))) + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Individual$IndividualTotalLength[1] <- NA + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()))) + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Station$DateTime[1] <- NA + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()),"Cannot proceed with missing values for Reca-effects")) + + #context("PrepRecaEstimate: Missing cell warnings") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + StoxBioticData$Station$Area <- NA + StoxBioticData$Station$Area <- c(StoxLandingData$Landing$Area[1:20], StoxLandingData$Landing$Area[1:20], StoxLandingData$Landing$Area[1:5]) + expect_warning(expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("Gear", "Area"), RandomEffects = c()))) + + #context("PrepRecaEstimate: StockSplitting") + manual <- RstoxFDA:::DefineStockSplittingParameters(DefinitionMethod = "FunctionParameters", + StockNameCC="S1", StockNameS="S2", ProbabilityType1As1=.8, + ProbabilityType1As5=.2, ProbabilityType2As2=.6, + ProbabilityType2As4=.4, ProbabilityType4As2=.4, + ProbabilityType4As4=.6, ProbabilityType5As1=.2, + ProbabilityType5As5=.8) + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Individual$otolithtype <- c(rep(c(1,5), 1045), c(1,5,1)) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseStockSplitting=T, UseStockSplittingError=T, StockSplittingParameters=manual) + expect_true(prep$GlobalParameters$GlobalParameters$CC) + expect_true(prep$GlobalParameters$GlobalParameters$CCerror) + expect_true(RstoxFDA:::is.StockSplittingParameters(prep$AgeLength$StockSplittingParameters)) + expect_true(is.null(prep$AgeLength$CCerrorList)) + fpath <- RstoxFDA:::makeTempDirReca() + #make sure it works with trailing "/" on path + pathWtrailing <- paste0(fpath, "/") + param <- RstoxFDA:::ParameterizeRecaModels(prep, 100, 400, ResultDirectory = pathWtrailing, Seed = 100) + + pathWsubDir <- file.path(fpath, "subdir") + param <- RstoxFDA:::ParameterizeRecaModels(prep, 100, 400, ResultDirectory = pathWsubDir, Seed = 100) + + #context("check that age group names are set correct for stock splitting") + expect_equal(sum(is.na(param$FitProportionAtAge$constant$Age)), 0) + expect_equal(param$FitProportionAtAge$constant$Age[1], "S1 2") + + + #context("Check that back conversion to eca objects works fine with stock splitting") + ecafit <- RstoxFDA:::stox2recaFit(param) + + result <- RstoxFDA:::RunRecaModels(param, StoxLandingData = StoxLandingData) + + expect_true("Stock" %in% result$GroupingVariables$GroupingVariables) + resultAgg <- RstoxFDA:::RunRecaModels(param, StoxLandingData = StoxLandingData, GroupingVariables = c("Gear")) + expect_true(all(c("Stock", "Gear") %in% resultAgg$GroupingVariables$GroupingVariables)) + + RstoxFDA:::removeTempDirReca(fpath) + expect_true(RstoxFDA:::is.StockSplittingParameters(param$AgeLength$StockSplittingParameters)) + expect_true(is.null(prep$AgeLength$CCerrorList)) + expect_equal(param$AgeLength$StockSplittingParameters, manual) + expect_true(RstoxFDA::is.RecaCatchAtAge(result)) + expect_true("Stock" %in% names(result$CatchAtAge)) + expect_true("Stock" %in% names(result$MeanLength)) + expect_true("Stock" %in% names(result$MeanWeight)) + + #stock splitting w warning + StoxBioticData$Individual$otolithtype[1] <- 9 + expect_warning(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseStockSplitting=T, UseStockSplittingError=T, StockSplittingParameters=manual), "StoX: Some aged fish does not have Otolithtype set, or have it set to an unrecognized value. This may slow down Stox processing of Reca results.") + StoxBioticData$Individual$IndividualAge[1] <- NA + expect_silent(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseStockSplitting=T, UseStockSplittingError=T, StockSplittingParameters=manual)) + + #context("PrepRecaEstimate: AgerrorMatrix") + ageerorfile <- system.file("testresources","AgeErrorHirstEtAl2012.txt", package="RstoxFDA") + ageerror <- RstoxFDA::DefineAgeErrorMatrix(FileName = ageerorfile) + expect_true(RstoxFDA::is.AgeErrorMatrix(ageerror)) + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), UseAgingError = T, AgeErrorMatrix = ageerror, MinAge = 0, MaxAge = 14) + expect_true(!is.null(prep$AgeLength$AgeErrorMatrix)) + expect_warning(est <- RstoxFDA::RunRecaEstimate(prep, 10, 50, Seed = 99)) + + #context("PrepareRecaEstimate: configuration tests") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + StoxLandingData$Landing$NewConst <- 1 + StoxBioticData$Station$NewConst <- 1 + + + expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("NewConst"), RandomEffects = c()), "Only one level for categorical covariate NewConst") + expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("NewConst")), "Only one level for categorical covariate NewConst") + + StoxBioticData$Station$Area <- StoxLandingData$Landing$Area[c(7,8,13,4,3,4,11,20,4,5,6,20,4,12,3,3,10,4,1,20,11,5,11,5,15,8,14,7,10,6,13,16,11,14,19,20,2,19,11,16,15,5,11,11,9)] + expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c("Area"), RandomEffects = c("Area")), "Some random effects are also specified as fixed effects: Area") + expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area"), CarEffect = "Area"), "UseCarEffect is False, while the parameter 'CarEffect' is given") + expect_error(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area"), CarEffect = "Area", UseCarEffect = T, CarNeighbours = list()), "The CAR effect Area is also specified as fixed effect or random effect") + + #check CAR value cehcks + carfile <- system.file("testresources","mainarea_neighbour_correct_codes.txt", package="RstoxFDA") + car <- RstoxFDA::DefineCarNeighbours(NULL, FileName = carfile) + expect_true(RstoxFDA::is.CarNeighbours(car)) + car$Neighbours[9] <- paste(car$Neighbours[9], "30", sep=",") + car$Neighbours[29] <- paste(car$Neighbours[29], "08", sep=",") + prepCar <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), CarEffect = "Area", UseCarEffect = T, CarNeighbours = car) + + fpath <- RstoxFDA:::makeTempDirReca() + paramOut <- RstoxFDA:::ParameterizeRecaModels(prepCar, 10, 50, 1, fpath, Seed=42) + result <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) + RstoxFDA:::removeTempDirReca(fpath) + expect_true(RstoxFDA::is.RecaParameterData(paramOut)) + expect_true(RstoxFDA::is.RecaCatchAtAge(result)) + + + #context("ParameterizeRecaModels: simple case") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) + + fpath <- RstoxFDA:::makeTempDirReca() + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=43) + + expect_true(c("FitLengthGivenAge") %in% names(paramOut)) + expect_equal(length(paramOut$FitLengthGivenAge), 4) + expect_true(RstoxFDA::is.RecaParameterData((paramOut))) + + #context("test-StoxAnalysisFunctions: RunRecaModels") + results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) + expect_true("Age" %in% names(results$CatchAtAge)) + expect_true(RstoxFDA::is.RecaCatchAtAge(results)) + + #context("test-StoxAnalysisFunctions: RunRecaModels with GroupingVariables") + results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = c("Area", "Usage")) + expect_equal(length(unique(paste(results$CatchAtAge$Area, results$CatchAtAge$Usage))), length(unique(paste(StoxLandingData$Landing$Area, StoxLandingData$Landing$Usage)))) + expect_true(RstoxFDA::is.RecaCatchAtAge(results)) + expect_warning(RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = c("Area", "Usage"), CollapseLength = F), "StoX: Producing estimates for all length groups in combination with age and several 'GroupingVariables'. This may exhaust memory, consider the option 'CollapseLength'") + RstoxFDA:::removeTempDirReca(fpath) + + #context("test-StoxAnalysisFunctions: RunRecaModels with random effects in landings") + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + StoxBioticData$Haul$Gear <- StoxLandingData$Landing$Gear[sample.int(20,45, replace=T)] + + fpath <- RstoxFDA:::makeTempDirReca() + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Gear")) + + #check that seed works + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=111) + seed <- sample.int(.Machine$integer.max, 1) + results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) + paramOut2 <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=paramOut$GlobalParameters$GlobalParameters$seed) + results2 <- RstoxFDA:::RunRecaModels(paramOut2, StoxLandingData) + paramOut3 <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=paramOut$GlobalParameters$GlobalParameters$seed+1) + results3 <- RstoxFDA:::RunRecaModels(paramOut3, StoxLandingData) + expect_equal(results2, results) + expect_true(!all(results$CatchAtAge == results3$CatchAtAge)) + #/ seed + + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 50, 50, 1, fpath, Seed=100) + results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) + + expect_true("Gear" %in% names(paramOut$Landings$AgeLengthCov)) + expect_true("Age" %in% names(results$CatchAtAge)) + expect_true(RstoxFDA::is.RecaCatchAtAge(results)) + + paramOut <- RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, fpath, Seed=44) + results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = "Gear") + expect_true("Gear" %in% names(paramOut$Landings$AgeLengthCov)) + expect_true("Age" %in% names(results$CatchAtAge)) + expect_true(RstoxFDA::is.RecaCatchAtAge(results)) + + #context("RunRecaModels: Test collapse Length") + resultsWlength <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, GroupingVariables = "Gear", CollapseLength = F) + expect_true("Age" %in% names(resultsWlength$CatchAtAge)) + expect_true(RstoxFDA::is.RecaCatchAtAge(resultsWlength)) + expect_equal(nrow(results$CatchAtAge)*2*results$CatchAtAge$Length[1], nrow(resultsWlength$CatchAtAge)) + expect_equal(length(unique(results$CatchAtAge$Length)),1) + expect_true(length(unique(resultsWlength$CatchAtAge$Length))> 1) + + #context("RunRecaModels: Test collapse Length wo Aggregation") + results <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData) + expect_true("Age" %in% names(results$CatchAtAge)) + expect_true(RstoxFDA::is.RecaCatchAtAge(results)) + + resultsWlength <- RstoxFDA:::RunRecaModels(paramOut, StoxLandingData, CollapseLength = F) + expect_true("Age" %in% names(resultsWlength$CatchAtAge)) + expect_true(RstoxFDA::is.RecaCatchAtAge(resultsWlength)) + expect_equal(nrow(results$CatchAtAge)*2*results$CatchAtAge$Length[1], nrow(resultsWlength$CatchAtAge)) + expect_equal(length(unique(results$CatchAtAge$Length)),1) + expect_true(length(unique(resultsWlength$CatchAtAge$Length))> 1) + + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate missing arguments") + expect_error(RstoxFDA:::ParameterizeRecaModels(prep, 10, 50, 1, ResultDirectory = NULL), "Argument 'ResultDirectory' must be provided.") + + RstoxFDA:::removeTempDirReca(fpath) + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate simple case") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()) + expect_equal(length(prep$CovariateMaps$CovariateMaps_randomEffects_AgeLength_catchSample$values), length(unique(StoxBioticData$Individual$HaulKey))) + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c(), MinAge=1, MaxAge=30) + + #context("test-StoxAnalysisFunctions: RunRecaEstimate simple case") + expect_warning(result <- RstoxFDA::RunRecaEstimate(prep, 10, 50, Thin=1, Seed = 42)) + expect_true(all(c("input", "fit", "prediction", "covariateMaps") %in% names(result))) + expect_equal(dim(result$prediction$TotalCount)[3], 10) + + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate, missing sample dates") + StoxBioticData$Station$DateTime[1] <- NA + expect_error(suppressWarnings(RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c()))) + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate, stratified samples (nFish), missing CatchFractionNumber") + StoxBioticDataDelp <- readRDS(system.file("testresources","StoxBioticDelpr.rds", package="RstoxFDA")) + expect_error(suppressWarnings(RstoxFDA:::PrepareRecaEstimate(StoxBioticDataDelp, StoxLandingData, FixedEffects = c(), RandomEffects = c()))) + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate, stratified samples (nFish)") + StoxBioticDataDelp$Sample$CatchFractionNumber[2] <- 3000 + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticDataDelp, StoxLandingData, FixedEffects = c(), RandomEffects = c()) + + ##context("test-StoxAnalysisFunctions: RunRecaEstimate, stratified samples (nFish)") + #to few iterations to converge consistently. removing test + #est <- RunRecaEstimate(prep, 10, 200, 0) + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate with with random effect Area") + StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") + StoxBioticData <- readRDS(StoxBioticFile) + + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") + StoxLandingData <- readRDS(StoxLandingFile) + + StoxBioticData$Station$Area <- c(rep(StoxLandingData$Landing$Area[10], 20), rep(StoxLandingData$Landing$Area[20], 25)) + StoxBioticData$Station$GG <- c(rep(StoxLandingData$Landing$Gear[10], 20), rep(StoxLandingData$Landing$Gear[20], 25)) + StoxLandingData$Landing$GG <- StoxLandingData$Landing$Gear + + prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area")) + expect_true("Area" %in% names(prep$Landings$AgeLengthCov)) + + #context("test-StoxAnalysisFunctions: PrepareRecaEstimate cellEffect") + prepCell <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Area", "GG"), CellEffect = "All") + expect_equal(prepCell$AgeLength$info$interaction[prepCell$AgeLength$info$covariate=="Area"], 1) + expect_equal(prepCell$AgeLength$info$interaction[prepCell$AgeLength$info$covariate=="GG"], 1) + + fpath <- RstoxFDA:::makeTempDirReca() + paramOut <- RstoxFDA:::ParameterizeRecaModels(prepCell, 10, 50, 1, fpath, Seed = 451) + expect_true("cell" %in% names(paramOut$FitProportionAtAge)) + + RstoxFDA:::removeTempDirReca(fpath) + + #context("test-StoxAnalysisFunctions: RunRecaEstimate with random effect Area") + expect_warning(est <- RstoxFDA::RunRecaEstimate(prep, 10, 100, 0, Seed = 112)) + expect_true("Area" %in% names(est$fit$ProportionAtAge$Intercept$cov)) +} diff --git a/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R new file mode 100644 index 00000000..55e93bce --- /dev/null +++ b/inst/tinytest/test-StoxAnalyticalBaselineFunctions.R @@ -0,0 +1,73 @@ +designParamsFile <- system.file("testresources", "lotteryParameters", "lotteryDesignNSH.txt", package="RstoxFDA") + +#regular read: +designParams <- RstoxFDA:::DefinePSUSamplingParameters(NULL, "ResourceFile", designParamsFile) +expect_true(RstoxFDA:::is.PSUSamplingParametersData(designParams)) +expect_equal(nrow(designParams$SelectionTable), 64) +expect_equal(nrow(designParams$SampleTable), 1) +expect_equal(ncol(designParams$StratificationVariables), 1) +expect_equal(nrow(designParams$StratificationVariables), 1) +expect_equal(sum(designParams$SelectionTable$HTsamplingWeight), 1) +expect_equal(sum(designParams$SelectionTable$HHsamplingWeight), 1) + +# test assignment to data +expect_error(RstoxFDA:::AssignPSUSamplingParameters(designParams, RstoxFDA::CatchLotteryExample, "MissingAtRandom")) +designParamsCorrected <- RstoxFDA:::AssignPSUSamplingParameters(designParams, RstoxFDA::CatchLotteryExample, "HaulKey", "MissingAtRandom") +expect_equal(sum(designParamsCorrected$SelectionTable$HTsamplingWeight),1) +expect_equal(sum(designParamsCorrected$SelectionTable$HHsamplingWeight),1) +#HT should be approximately the same after non-response correction +expect_true(abs((sum(1/designParamsCorrected$SelectionTable$InclusionProbability)-sum(1/designParams$SelectionTable$InclusionProbability))/sum(1/designParamsCorrected$SelectionTable$InclusionProbability))<0.1) +#HH should be approximately the same after non-response correction +expect_true(abs((mean(1/designParamsCorrected$SelectionTable$InclusionProbability)-mean(1/designParams$SelectionTable$InclusionProbability))/sum(1/designParamsCorrected$SelectionTable$InclusionProbability))<0.1) + +#define from data +suppressWarnings(StoxBioticData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic(system.file("testresources", "biotic_v3_example.xml", package="RstoxFDA")))) +designParamsSB <- RstoxFDA:::DefinePSUSamplingParameters(NULL, "AdHocStoxBiotic", StoxBioticData=StoxBioticData, SamplingUnitId = "Individual", StratificationColumns = c("SpeciesCategoryKey")) +expect_true(RstoxFDA:::is.PSUSamplingParametersData(designParamsSB)) +#compare names of output with stratification variables to output without + +expect_true(all(names(designParamsSB$SampleTable) == names(designParams$SampleTable))) +expect_true(all(names(designParamsSB$SelectionTable) == names(designParams$SelectionTable))) +expect_equal(nrow(designParamsSB$SelectionTable), 75) +expect_equal(nrow(designParamsSB$SampleTable), 2) +expect_equal(ncol(designParamsSB$StratificationVariables), 2) +expect_equal(nrow(designParamsSB$StratificationVariables), 2) + +# +# Prepare dataset with sub-sampled parameters +# +ds <- RstoxFDA::StoxBioticDataExample +ds$Individual$IndividualAge[rep(c(TRUE,FALSE), nrow(ds$Individual)/2)] <- as.numeric(NA) +ds$Individual$IndividualRoundWeight[rep(c(TRUE,FALSE), nrow(ds$Individual)/2)] <- as.numeric(NA) +ds$Sample$CatchFractionNumber[is.na(ds$Sample$CatchFractionNumber)] <- 1000 + +#Define Individual design, SRS +expect_error(RstoxFDA:::DefineIndividualSamplingParameters(NULL, ds, "SRS")) +srs <- RstoxFDA:::DefineIndividualSamplingParameters(NULL, ds, "SRS", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight")) +expect_true(RstoxFDA:::is.IndividualSamplingParametersData(srs)) +weights <- srs$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")] +expect_true(all(abs(weights$meanN-1) < 1e-6)) +#Define Individual design, Length stratified +expect_error(RstoxFDA:::DefineIndividualSamplingParameters(NULL, ds, "LengthStratified", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight"), LengthInterval = 5), "'IndividualTotalLength' may not be among the variables in 'Parameters' for length-stratified sampling.") +ls<-RstoxFDA:::DefineIndividualSamplingParameters(NULL, ds, "LengthStratified", c("IndividualAge", "IndividualRoundWeight"), LengthInterval = 5) +expect_true(RstoxFDA:::is.IndividualSamplingParametersData(ls)) +weights <- ls$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")] +expect_true(all(abs(weights$meanN-1) < 1e-6)) + +#Define Individual design, stratified, setting strata by length as in Length stratified +bioStrat <- ds +bioStrat$Individual$LStrat <- as.character(cut(bioStrat$Individual$IndividualTotalLength, seq(0,max(bioStrat$Individual$IndividualTotalLength)+5,5), right = F)) +ss<-RstoxFDA:::DefineIndividualSamplingParameters(NULL, bioStrat, "Stratified", c("IndividualAge", "IndividualRoundWeight"), StratificationColumns = c("LStrat")) +expect_true(RstoxFDA:::is.IndividualSamplingParametersData(ss)) +weights <- ss$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")] +expect_true(all(abs(weights$meanN-1) < 1e-6)) + +#check that length stratified and stratified is consistent. +expect_equal(ss$SelectionTable$InclusionProbability[[4]], ls$SelectionTable$InclusionProbability[[4]]) +expect_true(srs$SelectionTable$InclusionProbability[[4]] != ls$SelectionTable$InclusionProbability[[4]]) +expect_equal(nrow(ss$SampleTable), nrow(ls$SampleTable)) + +#test estimate with HansenHurwitzDomainEstimate +data <- RstoxFDA::CatchLotteryExample +indSampling <- RstoxFDA:::DefineIndividualSamplingParameters(NULL, data, "SRS", c("IndividualAge")) +#domainEst <- HansenHurwitzDomainEstimate() \ No newline at end of file diff --git a/inst/tinytest/test-StoxDataTypes.R b/inst/tinytest/test-StoxDataTypes.R index 69cc6864..8739128a 100644 --- a/inst/tinytest/test-StoxDataTypes.R +++ b/inst/tinytest/test-StoxDataTypes.R @@ -11,5 +11,8 @@ expect_true(RstoxFDA:::is.POSIXct(as.POSIXct(Sys.Date()))) expect_true(RstoxFDA:::is.RecaPrediction(RstoxFDA::recaPrediction)) #context("Test is.RecaResult") +# ECA tests are only run if Reca is installed. +if (nchar(system.file(package="Reca"))>0){ suppressWarnings(ex<-RstoxFDA::RunRecaEstimate(RstoxFDA::recaDataExample, 100,100)) expect_true(RstoxFDA:::is.RecaResult(ex)) +} \ No newline at end of file diff --git a/inst/tinytest/test-StoxReportFunctions.R b/inst/tinytest/test-StoxReportFunctions.R index c5727f20..a3e27183 100644 --- a/inst/tinytest/test-StoxReportFunctions.R +++ b/inst/tinytest/test-StoxReportFunctions.R @@ -10,50 +10,6 @@ expect_true(!any(is.na(decomp$MeanWeightByAge$SD))) expect_true(!any(is.na(decomp$MeanWeightByAge$Low))) expect_true(!any(is.na(decomp$MeanWeightByAge$High))) -#context("Test StoxReportFunctions: ReportRecaParameterStatistics") -StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") -StoxLandingData <- readRDS(StoxLandingFile) -StoxBioticFile <- system.file("testresources","StoxBioticData.rds", package="RstoxFDA") -StoxBioticData <- readRDS(StoxBioticFile) -StoxBioticData$Individual <- StoxBioticData$Individual[StoxBioticData$Individual$IndividualAge<4,] -StoxBioticData$Haul$Gear <- StoxLandingData$Landing$Gear[c(1:20, 1:20, 1:5)] -StoxBioticData$Station$Area <- StoxLandingData$Landing$Area[c(1:20, 1:20, 1:5)] -prep <- RstoxFDA::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEffects = c(), RandomEffects = c("Gear", "Area"), CellEffect = c("All"), MinAge = 2, MaxAge = 3) - -fpath1 <- RstoxFDA:::makeTempDirReca("chain1") -fpath2 <- RstoxFDA:::makeTempDirReca("chain2") -fpath3 <- RstoxFDA:::makeTempDirReca("chain3") - -paramOut1 <- RstoxFDA::ParameterizeRecaModels(prep, 10, 50, 1, ResultDirectory = fpath1) -paramOut2 <- RstoxFDA::ParameterizeRecaModels(prep, 10, 50, 1, ResultDirectory = fpath2) - -paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1) -paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut2, paramSummary, AppendReport = TRUE) -expect_true(RstoxFDA::is.ParameterizationSummaryData(paramSummary)) - -RstoxFDA:::removeTempDirReca(fpath1) -RstoxFDA:::removeTempDirReca(fpath2) -RstoxFDA:::removeTempDirReca(fpath3) - -convergence <- RstoxFDA::ReportParameterConvergence(paramSummary) - -expect_true(RstoxFDA::is.ParameterConvergenceData(convergence)) -expect_true(nrow(convergence$ConvergenceReport) < 433) -expect_true(nrow(convergence$ConvergenceReport) > 0) - -#construct three identical chains, should signal convergence -paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1) -paramOut1$GlobalParameters$GlobalParameters$resultdir="B" -paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1, paramSummary, AppendReport = T) -paramOut1$GlobalParameters$GlobalParameters$resultdir="C" -paramSummary <- RstoxFDA::ReportRecaParameterStatistics(paramOut1, paramSummary, AppendReport = T) - -#context("Check Gelman-Rubin for equal chains") -convergence <- RstoxFDA::ReportParameterConvergence(paramSummary, Tolerance = 0) -expect_equal(nrow(convergence$ConvergenceReport), 433) -expect_true(all(abs(convergence$ConvergenceReport$GelmanRubinR-1)<.1)) - - #context("Test StoxReportFunctions: ReportRecaCatchStatistics") predictiondatafile <- system.file("testresources","stocksplitpred.rds", package="RstoxFDA") catchAtAgeFlat <- readRDS(system.file("testresources", "recaPredictionFlat.rds", package="RstoxFDA")) @@ -139,7 +95,7 @@ expect_true(sum(gReport$FisheriesSampling$Catches, na.rm=T) == sum(qgReport$Fish SamplingReport <- RstoxFDA::ReportFdaSampling(StoxBioticData, StoxLandingData, GroupingVariables = c("Quarter")) expect_true(abs(sum(StoxBioticData$Sample$CatchFractionWeight, na.rm=T) - sum(SamplingReport$FisheriesSampling$WeightOfSampledCatches)) / sum(SamplingReport$FisheriesSampling$WeightOfSampledCatches) < .01) expect_true(abs(sum(StoxLandingData$Landing$RoundWeight, na.rm=T) - sum(SamplingReport$FisheriesSampling$LandedRoundWeight)) / sum(SamplingReport$FisheriesSampling$LandedRoundWeight) < .01) -expect_true(RstoxFDA::is.ReportFdaSamplingData(SamplingReport)) +expect_true(RstoxFDA:::is.ReportFdaSamplingData(SamplingReport)) expect_true(all(!is.na(SamplingReport$FisheriesSampling$LandedRoundWeight))) expect_equal(RstoxData::getUnit(SamplingReport$FisheriesSampling$WeightOfSampledCatches), "mass-kg") expect_true(is.na(RstoxData::getUnit(SamplingReport$FisheriesSampling$Catches))) @@ -160,7 +116,7 @@ expect_true(all(SamplingReportRounded$FisheriesSampling$WeightOfSampledCatches ! # test one with NAs SamplingReportSV <- RstoxFDA::ReportFdaSampling(StoxBioticData, StoxLandingData, GroupingVariables = c("Quarter"), Unit="kiloton", Decimals = 6, SamplingVariables = c("IndividualSex")) -expect_true(is.ReportFdaSamplingData(SamplingReportSV)) +expect_true(RstoxFDA:::is.ReportFdaSamplingData(SamplingReportSV)) expect_true("SamplingVariables" %in% names(SamplingReportSV)) expect_true("IndividualSex" %in% names(SamplingReportSV$FisheriesSampling)) expect_equal(sum(is.na(SamplingReportSV$FisheriesSampling$IndividualSex)),2) @@ -209,8 +165,8 @@ catchAtAgeDecomp <- readRDS(system.file("testresources", "recaPredictionDecomp.r catchAtAgeReportDecomp <- RstoxFDA::ReportRecaCatchAtAge(catchAtAgeDecomp) catchAtAgeReportFlat <- RstoxFDA::ReportRecaCatchAtAge(catchAtAgeFlat) -expect_true(RstoxFDA::is.ReportFdaData(catchAtAgeReportDecomp)) -expect_true(RstoxFDA::is.ReportFdaData(catchAtAgeReportFlat)) +expect_true(RstoxFDA:::is.ReportFdaData(catchAtAgeReportDecomp)) +expect_true(RstoxFDA:::is.ReportFdaData(catchAtAgeReportFlat)) diff <- sum(catchAtAgeReportFlat$NbyAge$CatchAtAge) - sum(catchAtAgeReportDecomp$NbyAge$CatchAtAge) reldiff <- abs(diff/sum(catchAtAgeReportFlat$NbyAge$CatchAtAge)) @@ -294,7 +250,7 @@ expect_equal(catchAtAgeReportMi$NbyAge$SD[1:3]*1e6, catchAtAgeReportFlatPlusGr$N # Report Mean weight MeanWeightReportDecomp <- RstoxFDA::ReportRecaWeightAtAge(catchAtAgeDecomp, Decimals = 4, Unit = "kg") -expect_true(RstoxFDA::is.ReportFdaData(MeanWeightReportDecomp)) +expect_true(RstoxFDA:::is.ReportFdaData(MeanWeightReportDecomp)) expect_equal(RstoxData::getUnit(MeanWeightReportDecomp$MeanWeightByAge$MeanIndividualWeight), "mass-kg") MeanWeightReportDecimal <- RstoxFDA::ReportRecaWeightAtAge(catchAtAgeDecomp, Decimal=4) @@ -333,7 +289,7 @@ expect_true(all(MeanWeightReportDecompPlusGr$MeanWeightByAge$MeanIndividualWeigh # Report Mean length MeanLengthReportDecomp <- RstoxFDA::ReportRecaLengthAtAge(catchAtAgeDecomp, Unit="cm", Decimals=1) -expect_true(RstoxFDA::is.ReportFdaData(MeanLengthReportDecomp)) +expect_true(RstoxFDA:::is.ReportFdaData(MeanLengthReportDecomp)) expect_true(!all(nchar(as.character(MeanLengthReportDecomp$MeanLengthByAge$MeanIndividualLength[MeanLengthReportDecomp$MeanLengthByAge$MeanIndividualLength>0]))>5)) expect_equal(RstoxData::getUnit(MeanLengthReportDecomp$MeanLengthByAge$MeanIndividualLength), "length-cm") @@ -381,7 +337,7 @@ catchAtAgeReportDecompPlusGr <- RstoxFDA::ReportRecaCatchAtAge(catchAtAgeDecomp, MeanWeightReportDecompPlusGr <- RstoxFDA::ReportRecaWeightAtAge(catchAtAgeDecomp, PlusGroup=5, Decimals = 6, Threshold = 1000) expect_true(sum(is.na(MeanWeightReportDecompPlusGr$MeanWeightByAge$MeanIndividualWeight))>1) sopTabNa <- RstoxFDA::ReportFdaSOP(catchAtAgeReportDecompPlusGr, MeanWeightReportDecompPlusGr, StoxLandingData, GroupingVariables = c("Gear", "Area")) -expect_true(RstoxFDA::is.ReportFdaSOP(sopTabNa)) +expect_true(RstoxFDA:::is.ReportFdaSOP(sopTabNa)) sopTabNa <- sopTabNa$SopReport expect_true(any(sopTabNa$Difference<0)) @@ -391,7 +347,7 @@ catchAtAgeReportDecompPlusGr <- RstoxFDA::ReportRecaCatchAtAge(catchAtAgeDecomp, MeanWeightReportDecompPlusGr <- RstoxFDA::ReportRecaWeightAtAge(catchAtAgeDecomp, PlusGroup=5, Decimals = 6) sopTab <- RstoxFDA::ReportFdaSOP(catchAtAgeReportDecompPlusGr, MeanWeightReportDecompPlusGr, StoxLandingData, GroupingVariables = c("Gear", "Area")) sopTabKi <- RstoxFDA::ReportFdaSOP(catchAtAgeReportDecompPlusGrKi, MeanWeightReportDecompPlusGr, StoxLandingData, GroupingVariables = c("Gear", "Area"), UnitFraction = "%") -expect_true(RstoxFDA::is.ReportFdaSOP(sopTab)) +expect_true(RstoxFDA:::is.ReportFdaSOP(sopTab)) sopTab <- sopTab$SopReport sopTabKi <- sopTabKi$SopReport expect_true(all(abs(sopTab$RelativeDifference) < 0.02)) diff --git a/inst/tinytest/test-ecaOuputputConversion.R b/inst/tinytest/test-ecaOuputputConversion.R index 63a57132..98fcf1c7 100644 --- a/inst/tinytest/test-ecaOuputputConversion.R +++ b/inst/tinytest/test-ecaOuputputConversion.R @@ -1,3 +1,8 @@ + +# ECA tests are only run if Reca is installed. + +if (nchar(system.file(package="Reca"))>0){ + #context("test-StoxAnalysisFunctions: tests RecaResult conversion") ecaResult <- readRDS(system.file("testresources","ecaResult.rds", package="RstoxFDA")) @@ -66,7 +71,9 @@ prep <- RstoxFDA:::PrepareRecaEstimate(StoxBioticData, StoxLandingData, FixedEff fpath <- RstoxFDA:::makeTempDirReca() RecaData <- RstoxFDA::convertRecaData(prep, nSamples = 10, burnin = 50, thin=1, resultdir = fpath, delta.age = .001, fitfile = "fit", seed = 42, lgamodel = "log-linear") -sanitizeRecaInput(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters, stage="parameterize") -est<-Reca::eca.estimate(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters) +RstoxFDA::sanitizeRecaInput(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters, stage="parameterize") + +est<-RstoxFDA:::eca.estimate(RecaData$AgeLength, RecaData$WeightLength, RecaData$Landings, RecaData$GlobalParameters) RstoxFDA:::removeTempDirReca(fpath) +} diff --git a/inst/tinytest/test-lengthGroupCollapse.R b/inst/tinytest/test-lengthGroupCollapse.R index 46005730..104fd4cf 100644 --- a/inst/tinytest/test-lengthGroupCollapse.R +++ b/inst/tinytest/test-lengthGroupCollapse.R @@ -1,3 +1,7 @@ +# ECA tests are only run if Reca is installed. + +if (nchar(system.file(package="Reca"))>0){ + StoxLandingFile <- system.file("testresources","StoxLandingData.rds", package="RstoxFDA") StoxLandingData <- readRDS(StoxLandingFile) @@ -41,3 +45,5 @@ reportCAALwoLength <- RstoxFDA::ReportRecaCatchAtLengthAndAge(results) expect_equal(nrow(reportCAALwLength$NbyLength), 2158) expect_equal(nrow(reportCAALwoLength$NbyLength), 26) expect_equal(sum(reportCAALwoLength$NbyLength$CatchAtLength), sum(reportCAALwLength$NbyLength$CatchAtLength)) + +} diff --git a/man/CatchLotteryExample.Rd b/man/CatchLotteryExample.Rd new file mode 100644 index 00000000..1050927f --- /dev/null +++ b/man/CatchLotteryExample.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CatchLotteryExample-datadoc.R +\docType{data} +\name{CatchLotteryExample} +\alias{CatchLotteryExample} +\title{Data from the Norwegian catch lottery sampling program.} +\format{ +\code{\link[RstoxData]{StoxBioticData}} +} +\usage{ +data(CatchLotteryExample) +} +\description{ +Example of data formatted as \code{\link[RstoxData]{StoxBioticData}} +Hauls are primary sampling units, selected by Poission sampling with selection probabilities proportional to the catch size. +The data contain North Sea herring samples from catch lottery sampling in 2022. +} +\examples{ + RstoxFDA::plotArea(RstoxFDA::CatchLotteryExample$Station, + areaDef=RstoxFDA::mainareaFdir2018, + latCol = "Latitude", + lonCol = "Longitude", + areaLabels = TRUE) +} +\concept{Analytical estimation} +\keyword{datasets} diff --git a/man/CatchLotterySamplingExample.Rd b/man/CatchLotterySamplingExample.Rd new file mode 100644 index 00000000..70e67ede --- /dev/null +++ b/man/CatchLotterySamplingExample.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CatchLotterySamplingExample-datadoc.R +\docType{data} +\name{CatchLotterySamplingExample} +\alias{CatchLotterySamplingExample} +\title{Sampling parameters from the Norwegian catch lottery sampling program.} +\format{ +\code{\link[RstoxData]{StoxBioticData}} +} +\usage{ +data(CatchLotterySamplingExample) +} +\description{ +Example of data formatted as \code{\link[RstoxFDA]{PSUSamplingParametersData}} +Hauls are primary sampling units, selected by Poission sampling with selection probabilities proportional to the catch size. +The data contain sampling parameters for North Sea herring samples from catch lottery sampling in 2022. +} +\details{ +The corresponding samples are provided in \code{\link[RstoxFDA]{CatchLotteryExample}} +} +\examples{ + #all selected PSU that where actuall sampled are provided in CatchLotteryExample + sum(!is.na(CatchLotterySamplingExample$SelectionTable$SamplingUnitId)) + sum(CatchLotterySamplingExample$SelectionTable$SamplingUnitId \%in\% + CatchLotteryExample$Haul$HaulKey) +} +\concept{Analytical estimation} +\keyword{datasets} diff --git a/man/IndividualSamplingParametersData.Rd b/man/IndividualSamplingParametersData.Rd new file mode 100644 index 00000000..f5a2084f --- /dev/null +++ b/man/IndividualSamplingParametersData.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/StoxDataTypes.R +\name{IndividualSamplingParametersData} +\alias{IndividualSamplingParametersData} +\title{Individual Sub-Sampling Design Parameters} +\description{ +Sampling parameters for selection of a sub-sample of individuals +} +\details{ +Encodes information about the selection of a sub-sample of observations from individuals, used in analytical design based estimation. + A sub-sample is simply a sample of a sample. This data type is intended to represent the final stage of sampling in multi-stage sampling, + and therefor has a reference to the Sample it was taken from ('SampleId'). Apart from that there is no principal difference from single + stage sampling. All stratification is specified within the sample identifed by 'SampleId', and all sampling probabilites are specified within strata. + + The SampleTable encodes information about the sample of sampling units: + \describe{ + \item{SampleId}{Mandatory, chr: Identifies the sample the sub-sample is taken from.} + \item{Stratum}{Mandatory, chr: Identifies the within-sample stratum the sub-sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} + \item{N}{Optional, num: The total number of individuals in Stratum. For unstratified sampling, the total number of individuals in the sample the sub-sample is taken from.} + \item{n}{Optional, num: The number of individuals selected from the Stratum} + \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} + \item{SampleDescription}{Optional, chr: Free text field describing the sample that is subsampled.} + } + + The SelectionTable encodes information abut the selection of sampling units for sampling: + \describe{ + \item{SampleId}{Mandatory, chr: Identifies the sample the sub-sample is taken from.} + \item{Stratum}{Mandatory, chr: Identifies the within sample-stratum the individual is taken from.} + \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} + \item{IndividualId}{Optional, chr: Identifes individual. NA encodes non-response / observation failure} + \item{InclusionProbability}{Optional, num: The inclusion probability of the individual} + \item{HTsamplingWeight}{Optional, num: The normalized Horvitz-Thompson sampling weight of the individual} + \item{SelectionProbability}{Optional, num: The selection probability of the individual} + \item{HHsamplingWeight}{Optional, num: The normalized Hansen-Hurwitz sampling weight of the individual} + \item{SelectionDescription}{Optional, chr: Free text description of sampling unit.} + } + + The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): + \describe{ + \item{SampleId}{Mandatory, chr: Identifies the sample the stratification applies to} + \item{Stratum}{Mandatory, chr: Identifies the within-sample stratum. In addition the Stratum is identified by the combination of all other columns on this table.} + \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} + } + +Optional columns may be NA. + +The selection methods available for 'SelectionMethod' are explained here: +\describe{ + \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} + \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} + \item{FSWOR}{Fixed sample size without replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection should be specified in the 'selectionTable'} +} + +The SelectionProbability is defined as: The probability of selecting the sampling unit when it was selected from the population. +The HHsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sampled unit when estimating with the Hansen-Hurwitz strategy: 1 / (SelectionProbability*Q) , where Q is the sum of the reciprocal of the SelectionProbabilites for the sampled units. For equal probability sampling with replacement, this is simply 1/n, where n i sample size. +The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. +The HTsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sample when estimating with the Horvitz-Thompson strategy: 1 / (InclusionProbability*P), where P is the sum of the reciprocal of the InclusionProbabilites for the sampled units. For equal probability sampling without replacement, this is simply 1/n, where n is sample size. +} +\concept{Analytical estimation} +\concept{Data types} diff --git a/man/PSUSamplingParametersData.Rd b/man/PSUSamplingParametersData.Rd new file mode 100644 index 00000000..15e6db95 --- /dev/null +++ b/man/PSUSamplingParametersData.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/StoxDataTypes.R +\name{PSUSamplingParametersData} +\alias{PSUSamplingParametersData} +\title{PSU Sampling Design Parameters} +\description{ +Sampling parameters for selection of Primary Sampling Units +} +\details{ +Encodes information about the selection of Primary Sampling Units in multi-stage sampling, used in analytical design based estimation. + Information is encoded in three tables. + + The SampleTable encodes information about the sample of sampling units: + \describe{ + \item{Stratum}{Mandatory, chr: Identifies the stratum the sample is taken from. Treat unstratified sample as single-stratum sampling (provide only one stratum.} + \item{N}{Optional, num: The total number of PSUs in Stratum (total available for selection, not total selected)} + \item{n}{Optional, num: The number of PSUs selected from the Stratum} + \item{SelectionMethod}{Mandatory, chr: 'Poission', 'FSWR' or 'FSWOR'. The manner of selection for use in bootstrap or inference of inclusionProbabilities, selectionProbabilites, co-inclusion probabilities or co-selection probabilities.} + \item{FrameDescription}{Optional, chr: Free text field describing the sampling frame.} + } + + The SelectionTable encodes information abut the selection of sampling units for sampling: + \describe{ + \item{Stratum}{Mandatory, chr: Identifies the stratum the PSU is taken from.} + \item{Order}{Optional, num: Identifes the order of seleciton. May be necessary for inference when selections are not independent (e.g. FSWOR)} + \item{SamplingUnitId}{Optional, chr: Identifes PSU. NA encodes non-response} + \item{InclusionProbability}{Optional, num: The inclusion probability of the PSU} + \item{HTsamplingWeight}{Optional, num: The normalized Horvitz-Thompson sampling weight of the PSU} + \item{SelectionProbability}{Optional, num: The selection probability of the PSU} + \item{HHsamplingWeight}{Optional, num: The normalized Hansen-Hurwitz sampling weight of the PSU} + \item{SelectionDescription}{Optional, chr: Free text description of the PSU.} + } + + The StratificationVariables table encodes information about which columns in the sampleTable are stratification variables (if any): + \describe{ + \item{Stratum}{Mandatory, chr: Identifies the stratum. In addition the Stratum is identified by the combination of all other columns on this table.} + \item{...}{Mandatory if present (may not contain NAs), chr: Additional columns in the sampleTable that are stratification variables.} + } + + The Assignment encodes which identifier in sample records (e.g. \code{\link[RstoxData]{StoxBioticData}}) correspond to the SamplingUnitId + \describe{ + \item{DataRecordsId}{Optional, character. The identifier in data records that correspond to SamplingUnitId} + } + This field is optional, since SamplingParameters may be subject to processing before they are assigned to data records. + +Optional columns may be NA. + +The selection methods available for 'SelectionMethod' are explained here: +\describe{ + \item{Poission}{Poission sampling. Selection is performed randomly without replacement, and each selection is performed individually. Sample size is not fixed, and 'n' represents the expected sample size.} + \item{FSWR}{Fixed sample size with replacement. A random selection of a fixed sample size 'n' is chosen with replacement} + \item{FSWOR}{Fixed sample size without replacement. A random selection of a fixed sample size 'n' is chosen without replacement. Order of selection could be specified in the 'selectionTable'} +} + +The SelectionProbability is defined as: The probability of selecting the sampling unit when it was selected from the population. +The HHsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sampled unit when estimating with the Hansen-Hurwitz strategy: 1 / (SelectionProbability*Q) , where Q is the sum of the reciprocal of the SelectionProbabilites for the sampled units. For equal probability sampling with replacement, this is simply 1/n, where n i sample size. +The InclusionProbability is defined as: The probability of the sampling unit being included in the sample. +The HTsamplingWeight: The normalized sampling weight, or the fraction of the stratum represented by the sample when estimating with the Horvitz-Thompson strategy: 1 / (InclusionProbability*P), where P is the sum of the reciprocal of the InclusionProbabilites for the sampled units. For equal probability sampling without replacement, this is simply 1/n, where n is sample size. +} +\concept{Analytical estimation} +\concept{Data types} diff --git a/tests/testAgainstStoxPrep.R b/tests/testAgainstStoxPrep.R index d1e8febb..4205d8cc 100644 --- a/tests/testAgainstStoxPrep.R +++ b/tests/testAgainstStoxPrep.R @@ -5,6 +5,11 @@ library(data.table) # Prepare data based on StoXexport in "old" prepECA and compare results # +# ECA tests are only run if Reca is installed. + +if (nchar(system.file(package="Reca"))>0){ + + stoxRobj <- readRDS(system.file(package = "RstoxFDA", "testresources", "oldstoxprepreca")) samples <- data.table(catchId = as.character(stoxRobj$StoxExport$biotic$serialnumber), @@ -38,3 +43,4 @@ tabStox <- RstoxFDA::makeResultTableRECA(stoxRecaResults$prediction) #RstoxFDA::plotCatchAtAge(prepRecaResults$prediction, title="RecaPrep results") #RstoxFDA::plotCatchAtAge(stoxRecaResults$prediction, title="StoxPrep results") +}