From 4aafd29d98535eeb5771ea000963126182f44930 Mon Sep 17 00:00:00 2001
From: Layla Riemann <layla.riemann@ptb.de>
Date: Mon, 2 Aug 2021 09:38:45 +0000
Subject: [PATCH] R-code to obtain REML analysis

---
 NM2_MRS_import_v2.r | 92 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 NM2_MRS_import_v2.r

diff --git a/NM2_MRS_import_v2.r b/NM2_MRS_import_v2.r
new file mode 100644
index 0000000..4e2a8fe
--- /dev/null
+++ b/NM2_MRS_import_v2.r
@@ -0,0 +1,92 @@
+# NM2_MRS_import_v2.r
+
+# V2 uses 'metabolite' rather than 'analyte'
+
+# Excel data set uses empty cells for implied content: 
+# The following utility fills in spaces sequentially using
+# a 'previous value if missing' rule.
+
+fill.empty <- function(x) {
+  # Takes a data frame and fills in missing NON-NUMERIC values 
+  # with the preceding value
+  fill1 <- function(x1) {
+    if(!is.numeric(x1)) {
+      for(i in 2:length(x1)) if(is.na(x1[i])) x1[i] <- x1[i-1] 
+    } 
+    x1
+  }
+  
+  as.data.frame(lapply(x, fill1))
+}
+
+# Data sets are complete with headers and no surplus information,
+# so defaults are sufficient for raw data input
+# Subject field should be factor, but automatic entry 
+# reads as numeric; converted explicitly as the location in data set varies,
+# making specified column types
+
+# Pulse "Data" column is spectral intensity (see 2020-10-16_LR-SLRE.pdf)
+
+concentrations.pp <- within(read_xls(params$concentrations.pp),
+                            Subject <- factor(Subject))
+concentrations.pp  <- fill.empty(concentrations.pp)
+
+pulse.pp <- within(read_xls(params$pulse.pp),
+                   Subject <- factor(Subject))
+pulse.pp  <- fill.empty(pulse.pp)
+names(pulse.pp)[ncol(pulse.pp)] <- "Intensity"
+
+
+concentrations.nm <- within(read_xls(params$concentrations.nm),
+                            Subject <- factor(Subject))
+concentrations.nm  <- fill.empty(concentrations.nm)
+
+pulse.nm <- within(read_xls(params$pulse.nm),
+                   Subject <- factor(Subject))
+pulse.nm  <- fill.empty(pulse.nm)
+names(pulse.nm)[ncol(pulse.nm)] <- "Intensity"
+
+# Session field includes session and replicate number. In addition,
+# the subjects were repositioned between replicates in session 1 to
+# allow estimations of between-location variance.
+# The following code recodes Session to separate Session ID, Position (A, B) 
+# and (although unnecessary for stats) Replicate within Session
+
+decode.session <- function(x) {
+  runID <- as.character( x$Session )
+  x$Session <- factor(gsub("(.)_.", "\\1", runID))
+  x$Replicate <- factor(gsub("._(.)", "\\1", runID))
+  x$Position <- factor( ifelse(x$Session=="1" & x$Replicate=="2", "B", "A") )
+  # Arrange for 'tidy' column ordering (categorical first)
+  which.num <- unlist(lapply(x, is.numeric))
+  cbind(x[!which.num], x[which.num])
+}  
+
+concentrations.nm <- decode.session(concentrations.nm)
+pulse.nm <- decode.session(pulse.nm)
+concentrations.pp <- decode.session(concentrations.pp)
+pulse.pp <- decode.session(pulse.pp)
+
+# Finally, zero concentrations should be treated as missing values - 
+# mark as NA
+drop.zero <- function(x) {
+  dz1 <- function(x1, tol=1e-6) {
+    if(!is.numeric(x1)) {
+      x1  
+    } else {
+      ifelse(x1 < tol, NA, x1 )
+    }
+  }
+  
+  as.data.frame(lapply(x, dz1))
+}
+
+concentrations.nm <- drop.zero(concentrations.nm)
+pulse.nm <- drop.zero(pulse.nm)
+concentrations.pp <- drop.zero(concentrations.pp)
+pulse.pp <- drop.zero(pulse.pp)
+
+# Retain a list of metabolite column names (common to both concentration data sets)
+
+metabolite.cols <- which( unlist(lapply(concentrations.pp, is.numeric)) )
+metabolites <- names(concentrations.pp)[metabolite.cols]
\ No newline at end of file
-- 
GitLab