diff --git a/NM2_MRS_import_v2.r b/NM2_MRS_import_v2.r new file mode 100644 index 0000000000000000000000000000000000000000..4e2a8fe46cd727199612fd94641b744f3e49a875 --- /dev/null +++ b/NM2_MRS_import_v2.r @@ -0,0 +1,92 @@ +# NM2_MRS_import_v2.r + +# V2 uses 'metabolite' rather than 'analyte' + +# Excel data set uses empty cells for implied content: +# The following utility fills in spaces sequentially using +# a 'previous value if missing' rule. + +fill.empty <- function(x) { + # Takes a data frame and fills in missing NON-NUMERIC values + # with the preceding value + fill1 <- function(x1) { + if(!is.numeric(x1)) { + for(i in 2:length(x1)) if(is.na(x1[i])) x1[i] <- x1[i-1] + } + x1 + } + + as.data.frame(lapply(x, fill1)) +} + +# Data sets are complete with headers and no surplus information, +# so defaults are sufficient for raw data input +# Subject field should be factor, but automatic entry +# reads as numeric; converted explicitly as the location in data set varies, +# making specified column types + +# Pulse "Data" column is spectral intensity (see 2020-10-16_LR-SLRE.pdf) + +concentrations.pp <- within(read_xls(params$concentrations.pp), + Subject <- factor(Subject)) +concentrations.pp <- fill.empty(concentrations.pp) + +pulse.pp <- within(read_xls(params$pulse.pp), + Subject <- factor(Subject)) +pulse.pp <- fill.empty(pulse.pp) +names(pulse.pp)[ncol(pulse.pp)] <- "Intensity" + + +concentrations.nm <- within(read_xls(params$concentrations.nm), + Subject <- factor(Subject)) +concentrations.nm <- fill.empty(concentrations.nm) + +pulse.nm <- within(read_xls(params$pulse.nm), + Subject <- factor(Subject)) +pulse.nm <- fill.empty(pulse.nm) +names(pulse.nm)[ncol(pulse.nm)] <- "Intensity" + +# Session field includes session and replicate number. In addition, +# the subjects were repositioned between replicates in session 1 to +# allow estimations of between-location variance. +# The following code recodes Session to separate Session ID, Position (A, B) +# and (although unnecessary for stats) Replicate within Session + +decode.session <- function(x) { + runID <- as.character( x$Session ) + x$Session <- factor(gsub("(.)_.", "\\1", runID)) + x$Replicate <- factor(gsub("._(.)", "\\1", runID)) + x$Position <- factor( ifelse(x$Session=="1" & x$Replicate=="2", "B", "A") ) + # Arrange for 'tidy' column ordering (categorical first) + which.num <- unlist(lapply(x, is.numeric)) + cbind(x[!which.num], x[which.num]) +} + +concentrations.nm <- decode.session(concentrations.nm) +pulse.nm <- decode.session(pulse.nm) +concentrations.pp <- decode.session(concentrations.pp) +pulse.pp <- decode.session(pulse.pp) + +# Finally, zero concentrations should be treated as missing values - +# mark as NA +drop.zero <- function(x) { + dz1 <- function(x1, tol=1e-6) { + if(!is.numeric(x1)) { + x1 + } else { + ifelse(x1 < tol, NA, x1 ) + } + } + + as.data.frame(lapply(x, dz1)) +} + +concentrations.nm <- drop.zero(concentrations.nm) +pulse.nm <- drop.zero(pulse.nm) +concentrations.pp <- drop.zero(concentrations.pp) +pulse.pp <- drop.zero(pulse.pp) + +# Retain a list of metabolite column names (common to both concentration data sets) + +metabolite.cols <- which( unlist(lapply(concentrations.pp, is.numeric)) ) +metabolites <- names(concentrations.pp)[metabolite.cols] \ No newline at end of file