#script file for reanalysis of Kirk Johnson's thesis data by CIC b <- read.delim('Hobson:Users:wag:Archive:Active:CITrends:PaleobioRev:Reanal.ofJohnsonsDiss.:JohnsonThesisData.txt', colClasses = 'character') dim (b) names(b) b[1:10,] hc1 <- table(b$CIC[b$Comments == '' & b$HC1 == '1']) hc2 <- table(b$CIC[b$Comments == '' & b$HC2 == '1']) hc3 <- table(b$CIC[b$Comments == '' & b$HC3 == '1']) fu1 <- table(b$CIC[b$Comments == '' & b$FU1 == '1']) sort(unique (b$CIC[b$Comments == ''])) CICs <- c(100:155, 160:164, 170:172, 180:186, 190, 200, 210:220, 230:238, 240, 300, 350, 400, 500:509, 600, 700, 710, 800, 900, 910, 920, 930, 940, 950, 990) KJframe <- data.frame(matrix(0, nrow = 4, ncol = 118)) names(KJframe) <- CICs row.names(KJframe) <- c('hc1', 'hc2', 'hc3', 'fu1') l <- list (hc1, hc2, hc3, fu1) for (i in 1:4){ for (j in CICs){ foo <- as.numeric(l[[i]][(names(l[[i]]) == paste (j))]) if (length (foo) != 0){ KJframe[i, names(KJframe) == paste(j)] <- foo } } } pdf(file = 'JohnsonProfiles.pdf', family = 'Times', width = 7, height = 4) plot.default(as.numeric(KJframe[1,]), ylim = c(0, 40), type = 'l', lty = 3, main = 'Comparison of Johnsons Boundary Section Biozones', xlab = 'CICs', ylab = 'Frequency', axes = FALSE) axis(1, at = 1:118, labels = CICs, cex.axis = 0.6, las = 2) axis(2, at = c(1, 5, 11, 15, 21, 25, 31, 35), labels = c(1, 5, 1, 5, 1, 5, 1, 5), cex.axis = 0.8, las = 1, lwd = 0.5) axis(2, at = c(0, 10, 20, 30), tck = 1, col.axis = 'white', cex.axis = 0.8, las = 1, lwd = 0.5) lines(as.numeric(KJframe[2,]) + 10, lty = 3) lines(as.numeric(KJframe[3,]) + 20, lty = 3) lines(as.numeric(KJframe[4,]) + 30) text(rep(56, 4), c(5, 15, 25, 35), labels = c('HC1', 'HC2', 'HC3', 'FU1')) dev.off() KJframe.jitter <- matrix(jitter(as.numeric(as.matrix(KJframe))), nrow = 4) names(KJframe.jitter) <- names(KJframe) row.names(KJframe.jitter) <- row.names(KJframe) pairstats(t(KJframe.jitter[4:1,])) KJframe2 <- KJframe[,(colSums(KJframe) != 0)] KJframe2.jitter <- matrix(jitter(as.numeric(as.matrix(KJframe2))), nrow = 4) names(KJframe2.jitter) <- names(KJframe2) row.names(KJframe2.jitter) <- row.names(KJframe2) pairstats(t(KJframe2.jitter[4:1,])) pdf(file = 'JohnsonPairs.pdf', family = 'Times', width = 5, height = 5) KJframe3 <- KJframe[,(colSums(KJframe) > 1)] KJframe3.jitter <- matrix(jitter(as.numeric(as.matrix(KJframe3))), nrow = 4) names(KJframe3.jitter) <- names(KJframe3) row.names(KJframe3.jitter) <- row.names(KJframe3) pairstats(t(KJframe3.jitter[4:1,])) dev.off() KJframe4 <- KJframe KJframe4[KJframe4 == 0] <- NA KJframe4 <- KJframe4[,!is.na(colSums(KJframe4))] KJframe4.jitter <- matrix(jitter(as.numeric(as.matrix(KJframe4))), nrow = 4) names(KJframe4.jitter) <- names(KJframe4) row.names(KJframe4.jitter) <- row.names(KJframe4) pairstats(t(KJframe4.jitter[4:1,])) library(cluster) foo <- rbind(as.matrix(KJframe[,1:56]), as.matrix(CICdata3[c(1, 25, 40, 86, 100), 7:62])) plot(hclust(dist(foo))) cluscomp <- function(x, perpage = 15, pre = c('none', 'sqrt', 'twowaynorm'), metric = c('euclidean'), method = c('complete', 'average')){ #pre <- c('none', 'log', 'sqrt', 'rownorm', 'colnorm', 'twowaynorm') #metric <- c("euclidean", "maximum", "manhattan", "canberra", "binary") #method <- c("ward", "single", "complete", "average", "mcquitty", "median", "centroid") # pretreatment(6): none, log, sqrt, rownorm, colnorm, twowaynorm # dist(5): # # hclust(7): # agnes(5): # diana(2): nplots <- length(pre) * length(metric) * length(method) par(mfrow = c(3, 5)) plots <- 0 for(m.pre in pre){ if(m.pre == 'none'){ y <- x }else if(m.pre == 'log'){ y <- log(x) }else if(m.pre == 'sqrt'){ y <- sqrt(x) }else if(m.pre == 'rownorm'){ y <- normalize(x) }else if(m.pre == 'colnorm'){ warning('colnorm not implemented') }else if(m.pre == 'twowaynorm'){ warning('twowaynorm not implemented') } for(m.dist in metric){ for(m.hclust in method){ plot(hclust(dist(y, method = m.dist), method = m.hclust)) plots <- plots + 1 if(plots >= perpage){ return() } } for(m.agnes in c("average", "single", "complete", "ward", "weighted")){ plot(agnes(dist(y, method = m.dist), diss = TRUE, method = m.agnes), which.plots = 2) plots <- plots + 1 if(plots >= perpage){ return() } } for(m.diana in c("euclidian", "manhattan")){ plot(diana(dist(y, method = m.dist), diss = TRUE, metric = m.diana), which.plots = 2) plots <- plots + 1 if(plots >= perpage){ return() } } } } } # End of function profiles <- function(x){ par(mfrow = c((nrow(x) + 2), 1), mar = c(0,3,0,15)) for(i in 1:(nrow(x) - 1)){ plot(as.numeric(x[i,]), type = 'l', xaxt = 'n', yaxt = 'n') axis(2, at = (max(x[i,]) / 2), labels = round(max(x[i,])), las = 1, tick = FALSE) mtext(paste(' ', row.names(x)[i]), side = 4, las = 1, adj = 0, cex = 0.75) } plot(as.numeric(x[nrow(x),]), type = 'l', xaxt = 'n', yaxt = 'n') axis(2, at = max(x[nrow(x),] / 2), labels = round(max(x[nrow(x),])), las = 1, tick = FALSE) mtext(paste(' ', row.names(x)[nrow(x)]), side = 4, las = 1, adj = 0, cex = 0.75) axis(1, at = 1:56, labels = names(x), las = 2) } # End of function pdf(file = 'Profiles1.pdf', family = 'Times', width = 7, height = 10) profiles(foo[1:50,]) dev.off() pdf(file = 'Profiles2.pdf', family = 'Times', width = 7, height = 10) profiles(foo[51:100,]) dev.off() pdf(file = 'Profiles3.pdf', family = 'Times', width = 7, height = 10) profiles(foo[101:150,]) dev.off() pdf(file = 'Heat.pdf', family = 'Times', width = 7, height = 7) heatmap(foo) dev.off() foo <- rbind(as.matrix(KJframe[,1:56]), as.matrix(CICdata3[c(1:4, (1:20 *7)), 7:62])) pdf(file = 'Oneof7Clus.pdf', family = 'Times', width = 7, height = 6) plot(hclust(dist(normalize(foo)))) dev.off() foo <- rbind(as.matrix(KJframe[,1:56]), as.matrix(CICdata3[c(1:4, ((1:20 *7) + 2)), 7:62])) pdf(file = 'Oneof7Clus2.pdf', family = 'Times', width = 7, height = 6) plot(hclust(dist(normalize(foo)))) dev.off() rm (b, hc1, hc2, hc3, fu1, i, j, KJframe, KJframe.jitter, KJframe2, KJframe2.jitter, KJframe3, KJframe3.jitter, KJframe4, KJframe4.jitter)