# NM2_MRS_import_v2.r # V2 uses 'metabolite' rather than 'analyte' # Excel data set uses empty cells for implied content: # The following utility fills in spaces sequentially using # a 'previous value if missing' rule. fill.empty <- function(x) { # Takes a data frame and fills in missing NON-NUMERIC values # with the preceding value fill1 <- function(x1) { if(!is.numeric(x1)) { for(i in 2:length(x1)) if(is.na(x1[i])) x1[i] <- x1[i-1] } x1 } as.data.frame(lapply(x, fill1)) } # Data sets are complete with headers and no surplus information, # so defaults are sufficient for raw data input # Subject field should be factor, but automatic entry # reads as numeric; converted explicitly as the location in data set varies, # making specified column types # Pulse "Data" column is spectral intensity (see 2020-10-16_LR-SLRE.pdf) concentrations.pp <- within(read_xls(params$concentrations.pp), Subject <- factor(Subject)) concentrations.pp <- fill.empty(concentrations.pp) pulse.pp <- within(read_xls(params$pulse.pp), Subject <- factor(Subject)) pulse.pp <- fill.empty(pulse.pp) names(pulse.pp)[ncol(pulse.pp)] <- "Intensity" concentrations.nm <- within(read_xls(params$concentrations.nm), Subject <- factor(Subject)) concentrations.nm <- fill.empty(concentrations.nm) pulse.nm <- within(read_xls(params$pulse.nm), Subject <- factor(Subject)) pulse.nm <- fill.empty(pulse.nm) names(pulse.nm)[ncol(pulse.nm)] <- "Intensity" # Session field includes session and replicate number. In addition, # the subjects were repositioned between replicates in session 1 to # allow estimations of between-location variance. # The following code recodes Session to separate Session ID, Position (A, B) # and (although unnecessary for stats) Replicate within Session decode.session <- function(x) { runID <- as.character( x$Session ) x$Session <- factor(gsub("(.)_.", "\\1", runID)) x$Replicate <- factor(gsub("._(.)", "\\1", runID)) x$Position <- factor( ifelse(x$Session=="1" & x$Replicate=="2", "B", "A") ) # Arrange for 'tidy' column ordering (categorical first) which.num <- unlist(lapply(x, is.numeric)) cbind(x[!which.num], x[which.num]) } concentrations.nm <- decode.session(concentrations.nm) pulse.nm <- decode.session(pulse.nm) concentrations.pp <- decode.session(concentrations.pp) pulse.pp <- decode.session(pulse.pp) # Finally, zero concentrations should be treated as missing values - # mark as NA drop.zero <- function(x) { dz1 <- function(x1, tol=1e-6) { if(!is.numeric(x1)) { x1 } else { ifelse(x1 < tol, NA, x1 ) } } as.data.frame(lapply(x, dz1)) } concentrations.nm <- drop.zero(concentrations.nm) pulse.nm <- drop.zero(pulse.nm) concentrations.pp <- drop.zero(concentrations.pp) pulse.pp <- drop.zero(pulse.pp) # Retain a list of metabolite column names (common to both concentration data sets) metabolite.cols <- which( unlist(lapply(concentrations.pp, is.numeric)) ) metabolites <- names(concentrations.pp)[metabolite.cols]