############################################################ #################### 60 Characters Wide #################### # Script file for `Loosening the CLAMP' # tested in R version 2.1.1, 2.0.0 # see also Elevation.Rscript.txt and 'AGU poster', 2005 ############################################################ # Walton A. Green # Department of Geology and Geophysics # Yale University # P. O. Box 208109, Yale Station # New Haven, Connecticut 06520 # walton.green@yale.edu ############################################################ # Code intended for free redistributuion under copyleft # GNU version 2 or later ############################################################ #Preprocessing data #Read in the raw files Wolfe173ALL <- read.table("/Users/wag/Archive/Active/Ecophenetics and Comp. Index/CLAMP Database/Wolfe173ALL.txt", sep = '\t', header = TRUE, colClasses = 'character') JacobsALL <- read.table("/Users/wag/Archive/Active/Ecophenetics and Comp. Index/CLAMP Database/JacobsALL.txt", sep = '\t', header = TRUE, colClasses = 'character') GregoryALL <- read.table("/Users/wag/Archive/Active/Ecophenetics and Comp. Index/CLAMP Database/GregoryALL.txt", sep = '\t', header = TRUE, colClasses = 'character') KowalskiALL <- read.table("/Users/wag/Archive/Active/Ecophenetics and Comp. Index/CLAMP Database/KowalskiALL.txt", sep = '\t', header = TRUE, colClasses = 'character') # Abbreviations for the CLAMP variables CLAMP.vars.abbrev31 <- c('Lobd', 'Entr', 'TReg', 'TCls', 'TRnd', 'TAcu', 'TCmp', 'ZNan', 'ZLe1', 'ZLe2', 'ZMi1', 'ZMi2', 'ZMi3', 'ZMe1', 'ZMe2', 'ZMe3', 'AEmg', 'ARnd', 'AAcu', 'AAtn', 'BCor', 'BRnd', 'BAcu', 'Rlt1', 'Rb12', 'Rb23', 'Rb34', 'Rgt4', 'SObo', 'SElp', 'SOvt') # Make the CLAMP codes numeric in the Wolfe data set Wolfe173CLAMP <- Wolfe173ALL[,1:31] for(i in 1:31){ Wolfe173CLAMP[,i] <- as.numeric(Wolfe173CLAMP[,i]) } # Reorder and make the CLAMP codes numeric in the Jacobs # data set JacobsCLAMP29 <- JacobsALL[,c(3, 2, 4, 8, 6, 7, 5, 24:30, 47, 10, 12, 11, 15, 13, 14, 19:23, 17, 18, 16)] for(i in 1:29){ JacobsCLAMP29[,i] <- 100 * as.numeric(JacobsCLAMP29[,i]) } JacobsCLAMP <- cbind(JacobsCLAMP29[,c(1:7)], rep(NA, 30), JacobsCLAMP29[,c(8:14)], rep(NA, 30), JacobsCLAMP29[,c(15:29)]) # Make the CLAMP codes numeric in the Gregory data set GregoryCLAMP <- GregoryALL[,20:50] for(i in 1:31){ GregoryCLAMP[,i] <- as.numeric(GregoryCLAMP[,i]) } # Make the CLAMP codes numeric in the Kowalski data set KowalskiCLAMP <- KowalskiALL[,1:31] for(i in 1:31){ KowalskiCLAMP[,i] <- 100 * as.numeric(KowalskiCLAMP[,i]) } # Give all the names the same abbreviations names (Wolfe173CLAMP) <- CLAMP.vars.abbrev31 names (JacobsCLAMP29) <- CLAMP.vars.abbrev31[c(1:7, 9:15, 17:31)] names (JacobsCLAMP) <- CLAMP.vars.abbrev31 names (GregoryCLAMP) <- CLAMP.vars.abbrev31 names (KowalskiCLAMP) <- CLAMP.vars.abbrev31 # Add row names to the Jacobs data row.names (JacobsCLAMP29) <- c('Mpanga Forest, UG', 'Combretaceous Woodland, UG', 'Arabuko-Sokoke Natural Reserve, KE', 'Mbololo Teita, KE', 'Mramagambo, UG', 'Bakundu, CAM', 'Tree Savanna, UG', 'Mt. Kenya, Meru, KE', 'Magamba Forest, TZ', 'Nidrowa, Okomu, NI', 'Budongo, UG', 'Lupa, TZ', 'Kibwezi, KE', 'Albizia Woodland, UG', 'Lengwe, MAL', 'Mwaniliana, Lowland, TZ', 'Usambara, Submontane, TZ', 'Kora, KE', 'St. Florais, CAR', 'Buganda Thicket, UG', 'Yangambi, CO', 'Madabu, CO', 'Kade, CO', 'Lobe, GAB', 'Kahuzi, CO', 'Mole, GH', 'Kaniama, CO', 'Wamba, CO', 'Manyara, TZ', 'Koitoboss, Elgon, KE') row.names (JacobsCLAMP) <- row.names (JacobsCLAMP29) # Probably unnecessary row.names (KowalskiCLAMP) <- row.names(KowalskiALL) # Put together the CLAMP and MAT data ALLCLAMP <- rbind (Wolfe173CLAMP, JacobsCLAMP, GregoryCLAMP, KowalskiCLAMP) ALLMAT <- as.numeric(c(Wolfe173ALL[,33], JacobsALL[,32], GregoryALL[,13], KowalskiALL[,36])) # Read in the supplementary data from Wolfe 1993 Wolfe1993 <- read.table("/Users/wag/Archive/Active/Ecophenetics and Comp. Index/CLAMP Database/Wolfe1993Data.txt", sep = '\t', header = TRUE, colClasses = 'character') # Create factor to indicate which study each flora came from Study <- as.factor(c(rep (1, 173), rep (2, 30), rep (3, 12), rep (4,30))) levels(Study) <- c('Wolfe173', 'Jacobs', 'Gregory', 'Kowalski') WolfeComp <- data.frame(P = Wolfe173CLAMP$Entr, MAT = as.numeric(Wolfe173ALL$MAT), n = as.numeric(Wolfe1993$n)) row.names(WolfeComp) <- row.names(Wolfe173ALL) #pdf(file = 'Phist.pdf', family = 'Times', width = 5, # height = 5) hist(WolfeComp$P, main = 'Histogram of Percentage Entire(P)') #dev.off() #pdf(file = 'MAThist.pdf', family = 'Times', width = 5, # height = 5) hist(WolfeComp$MAT, main = 'Histogram of Mean Annual Temperature (MAT)') #dev.off() #pdf(file = 'Transformations.pdf', family = 'Times', # width = 8, height = 8) par(mfrow = c(2,3)) qqnorm(ALLCLAMP$Entr, main = 'P untransformed') qqnorm(sqrt(ALLCLAMP$Entr), main = 'Square Root P') qqnorm(asin(ALLCLAMP$Entr/100), main = 'Arcsine P') qqnorm(ALLMAT, main = 'MAT untransformed') qqnorm(sqrt(ALLMAT), main = 'Square Root MAT') qqnorm(sqrt(ALLMAT), main = 'Log MAT') #dev.off() #pdf(file = 'Ntable.pdf', family = 'Times', width = 5, # height = 5) plot(table(WolfeComp$n), xlab = 'Number of Species in the Flora', ylab = 'Number of Floras with Indicated Number of species') #dev.off() # show variable names and abbreviations cat (paste(CLAMP.vars31, ' & ', CLAMP.vars.abbrev31, ' & ', sep = ''), sep = '\n') # predicting MAT from P (leaf margin analysis) nindicator <- as.numeric(!is.na (WolfeComp$n)) nabe <- 100 * ((WolfeComp$P / 100) * (1 - (WolfeComp$P / 100)) / sqrt(WolfeComp$n)) WolfeComp <- cbind (WolfeComp, nindicator) Index <- as.factor(c(rep (1, 173), rep (2, 30), rep (3, 12), rep (4,30))) # possible outliers/influential points ALLCLAMP[c(168, 172, 173, 198),] # Hotelling's T^2 Test ALL.manova <- manova (as.matrix(ALLCLAMP) ~ Index) summary (ALL.manova, test = 'Hotelling') rm (ALL.manova) ############################################################ # Code for Hotelling's T^2 test modified from Peter B. # Mandeville (mandevip@148.224.17.2) in R-help archive: # Mon Nov 16 1998 - 15:05:01 CET as modified from # Steve Selvin # 1998 Modern Applied Biostatistical Methods Using S-Plus # Monographs in Epidemiology and Biostatistics Volume 28 # Oxford University Press, pages 54-57 hotelling <- function(d1,d2){ k <- ncol(d1) n1 <- nrow(d1) n2 <- nrow(d2) xbar1 <- apply(d1,2,mean) xbar2 <- apply(d2,2,mean) dbar <- xbar2-xbar1 v <- ((n1-1)*var(d1)+(n2-1)*var(d2))/(n1+n2-2) t2 <- n1*n2*dbar%*%solve(v)%*%dbar/(n1+n2) f <- (n1+n2-k-1)*t2/((n1+n2-2)*k) cat("F:",f,"\n") cat("PROBABILITY:",1-pf(f,k,n1+n2-k-1),"\n") } ############################################################ hotelling(ALLCLAMP[1:173,c(-8, -16)], ALLCLAMP[174:203,c(-8, -16)]) hotelling(ALLCLAMP[1:173,], ALLCLAMP[204:215,]) hotelling(ALLCLAMP[1:173,], ALLCLAMP[216:245,]) hotelling(ALLCLAMP[174:203,c(-8, -16)], ALLCLAMP[204:215,c(-8, -16)]) hotelling(ALLCLAMP[174:203,c(-8, -16)], ALLCLAMP[216:245,c(-8, -16)]) hotelling(ALLCLAMP[204:215,], ALLCLAMP[216:245,]) # Regression.... summary (ALL.lm <- lm (ALLMAT ~ ALLCLAMP$Entr)) summary (ALL.CLAMP.lm <- lm (ALLMAT[1:173] ~ ALLCLAMP$Entr[1:173])) summary (ALL.Jacobs.lm <- lm (ALLMAT[174:203] ~ ALLCLAMP$Entr[174:203])) summary (ALL.Gregory.lm <- lm (ALLMAT[204:215] ~ ALLCLAMP$Entr[204:215])) summary (ALL.Kowalski.lm <- lm (ALLMAT[216:245] ~ ALLCLAMP$Entr[216:245])) # new start: Aug 29, 2005-- ALLn <- as.numeric(c(WolfeComp$n, JacobsALL[,43], GregoryALL[,1], KowalskiALL[,41])) # plotting regressions plot.default (ALLCLAMP[,2], ALLMAT, pch = '.', col = as.numeric(Index), xlab = 'Percentage of Untoothed leaves (P)', ylab = 'Celsius Mean Annual Temperature (MAT)', main = 'Comparison of Studies') abline(ALL.CLAMP.lm$coef, col = 1, lwd = 3) abline(ALL.Jacobs.lm$coef, col = 2, lwd = 3) abline(ALL.Gregory.lm$coef, col = 3, lwd = 3) abline(ALL.Kowalski.lm$coef, col = 4, lwd = 3) ALL.All.lm <- lm (ALLMAT ~ ALLCLAMP$Entr) abline(ALL.All.lm$coef, lty = 2) sigma <- sqrt((ALLCLAMP[,2]/100) * (1 - (ALLCLAMP[,2]/100)) / ALLn) segments (ALLCLAMP[,2], ALLMAT, ALLCLAMP[,2] + 100 * sigma, ALLMAT, col = as.numeric(Index), lwd = 0.3) segments (ALLCLAMP[,2], ALLMAT, ALLCLAMP[,2] - 100 * sigma, ALLMAT, col = as.numeric(Index), lwd = 0.3) symbols(ALLCLAMP[,2], ALLMAT, circles = ALLn/max(na.omit(ALLn)), inches = FALSE, add = TRUE, bg = as.numeric(Index), fg = as.numeric(Index)) abline(v = min(ALLCLAMP[,2][Index==1]), col = 1, lwd = 0.5) abline(v = max(ALLCLAMP[,2][Index==1]), col = 1, lwd = 0.5) abline(v = min(ALLCLAMP[,2][Index==2]), col = 2, lwd = 0.5) abline(v = max(ALLCLAMP[,2][Index==2]), col = 2, lwd = 0.5) abline(v = min(ALLCLAMP[,2][Index==3]), col = 3, lwd = 0.5) abline(v = max(ALLCLAMP[,2][Index==3]), col = 3, lwd = 0.5) abline(v = min(ALLCLAMP[,2][Index==4]), col = 4, lwd = 0.5) abline(v = max(ALLCLAMP[,2][Index==4]), col = 4, lwd = 0.5) abline(h = min(ALLMAT[Index==1]), col = 1, lwd = 0.5) abline(h = max(ALLMAT[Index==1]), col = 1, lwd = 0.5) abline(h = min(ALLMAT[Index==2]), col = 2, lwd = 0.5) abline(h = max(ALLMAT[Index==2]), col = 2, lwd = 0.5) abline(h = min(ALLMAT[Index==3]), col = 3, lwd = 0.5) abline(h = max(ALLMAT[Index==3]), col = 3, lwd = 0.5) abline(h = min(ALLMAT[Index==4]), col = 4, lwd = 0.5) abline(h = max(ALLMAT[Index==4]), col = 4, lwd = 0.5) # horiz. ranges segments (min(ALLCLAMP[,2][Index==1]), -3, max(ALLCLAMP[,2][Index==1]), -3, lwd = 2, col = 1) segments (min(ALLCLAMP[,2][Index==2]), -2, max(ALLCLAMP[,2][Index==2]), -2, lwd = 2, col = 2) segments (min(ALLCLAMP[,2][Index==3]), -1, max(ALLCLAMP[,2][Index==3]), -1, lwd = 2, col = 3) segments (min(ALLCLAMP[,2][Index==4]), 0, max(ALLCLAMP[,2][Index==4]), 0, lwd = 2, col = 4) # vert. ranges segments (-1, min(ALLMAT[Index==1]), -1, max(ALLMAT[Index==1]), lwd = 2, col = 1) segments (0, min(ALLMAT[Index==2]), 0, max(ALLMAT[Index==2]), lwd = 2, col = 2) segments (1, min(ALLMAT[Index==3]), 1, max(ALLMAT[Index==3]), lwd = 2, col = 3) segments (2, min(ALLMAT[Index==4]), 2, max(ALLMAT[Index==4]), lwd = 2, col = 4) hist (ALLMAT, breaks = 30) hist (ALLCLAMP[,2], breaks = 30) # hca ############################################################ # THIS LINE DOWNLOADS A HACKED FUNCTION FOR PLOTTING # DENDROGRAMS WITH COLORED TWIGS FROM THE INDICATED URL source('http://www.stat.yale.edu/~wag6/R-Yale/WAGfcts/my.dend.plot') ############################################################ ALL.hclust <- hclust (dist(ALLCLAMP), method = 'complete') par(mfcol= c(2,1)) my.dend.plot(as.dendrogram(ALL.hclust), twig.col = as.numeric(Study)[ALL.hclust$order], lab.col = as.numeric(Study)[ALL.hclust$order], leaflab = 'none', main = 'Complete Linkage Dendrogram') ALL.hclust <- hclust (dist(ALLCLAMP), method = 'single') my.dend.plot(as.dendrogram(ALL.hclust), twig.col = as.numeric(Study)[ALL.hclust$order], lab.col = as.numeric(Study)[ALL.hclust$order], leaflab = 'none', main = 'Single Linkage Dendrogram') # pca/cca library(vegan) pc29 <- princomp(as.matrix(ALLCLAMP[,c(-8, -16)])) pc31 <- princomp(as.matrix(ALLCLAMP[c(1:173,204:245),])) cc29 <- cca(ALLCLAMP[,c(-8,-16)], ALLMAT) cc31 <- cca(ALLCLAMP[c(1:173,204:245),], ALLMAT[c(1:173,204:245)]) cbind(pc29$loadings[,1], pc29$loadings[,2]) cbind(pc31$loadings[,1], pc31$loadings[,2]) pdf(height = 7, width = 7) par(mfrow= c(2,2)) plot(pc31$scores[,1], pc31$scores[,2], col = as.numeric(Index[c(1:173,204:245)]), sub = '31-variable data set, Jacobs data omitted', xlab = 'PC1', ylab = 'PC2', pch = 2) plot(pc29$scores[,1], pc29$scores[,2], col = as.numeric(Index), sub = '29-variable data set, all studies', xlab = 'PC1', ylab = 'PC2', pch = 2) plot(cc31, type = 'none', sub = '31-variable data set, Jacobs data omitted') points(cc31, col = as.numeric(Index[c(1:173,204:245)]), pch = 2) plot(cc29, type = 'none', sub = '29-variable data set, all studies') points(cc29, col = as.numeric(Index), pch = 2) dev.off() #pdf(height = 6, width = 6, family = 'Times') pairs(cbind(P = ALLCLAMP$Entr, CCA1 = scores(cc29)$sites[,1], CA1 = scores(cc29)$sites[,2], MAT = ALLMAT, PC1 = pc29$scores[,1], PC2 = pc29$scores[,2]), col = as.numeric(Study), pch = 2, cex = 0.6) #dev.off() ############################################################ ############################################################ # NOTA BENE: USE DEVICE DRIVERS TO REDUCE FILE SIZE ############################################################ ############################################################ # very big file (275 Mb) if cut and pasted from graphics # window; otherwise only 3 MB pdf(file = '/Users/wag/Desktop/all.pdf', family = 'Times', width = 7, height = 7) #postscript(file = '/Users/wag/Desktop/all.pdf', # family = 'Times', width = 7, height = 7) pairstats(cbind(ALLCLAMP, ALLMAT), gap = 0, pch = '.', col = as.numeric(Index)) dev.off() # selection pdf(file = '/Users/wag/Desktop/selected.pdf', family = 'Times', width = 7, height = 7) pairstats(cbind(ALLCLAMP[,c(1,2,9,20,21,24,25)], ALLMAT), gap = 0, pch = '.', col = as.numeric(Index)) dev.off() # teeth pairstats(cbind(ALLCLAMP[,1:7], ALLMAT), gap = 0, pch = '.', col = as.numeric(Index)) # sizes pairstats(cbind(ALLCLAMP[,8:16], ALLMAT), gap = 0, pch = '.', col = as.numeric(Index)) # base, apex pairstats(cbind(ALLCLAMP[,17:23], ALLMAT), gap = 0, pch = '.', col = as.numeric(Index)) # aspect ratio, shape pairstats(cbind(ALLCLAMP[,24:31], ALLMAT), gap = 0, pch = '.', col = as.numeric(Index)) summary (ALL.lm <- lm (ALLMAT ~ ALLCLAMP$Lobd + ALLCLAMP$Entr + ALLCLAMP$ZLe1 + ALLCLAMP$AAtn + ALLCLAMP$BCor + ALLCLAMP$Rlt1 + ALLCLAMP$Rb12)) summary (ALL.lm <- lm (ALLMAT ~ ALLCLAMP$Lobd + ALLCLAMP$Entr + ALLCLAMP$ZLe1 + ALLCLAMP$AAtn + ALLCLAMP$BCor + ALLCLAMP$Rlt1 + ALLCLAMP$Rb12 + Index))