# 09-03-13: new start processing the Little data.... #################### 60 Characters Wide #################### ########## PRELIMINARIES.... # shapefiles at: # http://esp.cr.usgs.gov/data/atlas/little/ # zipped shapefile URL looks like: # http://esp.cr.usgs.gov/data/atlas/little/abieamab.zip DATE <- date() print(paste('Running the Little Data processing script on', DATE)) REMOTE <- TRUE NEW <- TRUE DRAW <- FALSE PATH <- '/Users/wag/Archive/Active/LittleCont/09-03-13workingdir/' INDEX_URL <- 'http://esp.cr.usgs.gov/data/atlas/little/' if(REMOTE){ cat('checking index page...\n') sp.page <- scan(INDEX_URL, sep = '\n', what = '') start.data <- grep('Latin Name', sp.page) + 1 #beginning of old stuff # sp.data <- sp.page[start.data:length(sp.page)] # sp.data <- gsub('<[^<>]*>', 'XxX', sp.page, # perl = TRUE, useBytes = TRUE) # sp.tableA <- sp.page[seq(2,1358, by = 2)] # sp.tableA <- matrix(unlist(strsplit(sp.tableA, # split = 'XxX')), ncol = 17, byrow = TRUE) # sp.tableA <- sp.tableA[,c(12,15,17)] # sp.tableB <- sp.page[seq(3,1359, by = 2)] # sp.tableB <- matrix(unlist(strsplit(sp.tableB, # split = 'XxX')), ncol = 4, byrow = TRUE) # sp.tableB <- sp.tableB[,3] # sp.table <- cbind(sp.tableA, sp.tableB) # colnames(sp.table) <- c('ScientificName', 'CommonName', # 'Reference', 'MapPlate') # genera <- unlist(strsplit(sp.table[,1], # ' '))[seq(1, nrow(sp.table) * 2 - 1, by = 2)] # spp <- unlist(strsplit(sp.table[,1], # ' '))[seq(2, nrow(sp.table) * 2, by = 2)] # sp.codes <- tolower(paste(substr(genera, 1, 4), # substr(spp, 1, 4), sep = '')) # sp.codes[30] <- 'acersacr' # sp.codes[91] <- 'canoholo' # sp.codes[399] <- 'pinumtez' # sp.codes[417] <- 'pinustrb' # sp.codes[574] <- 'salilaso' # sp.table <- cbind(sp.table, genus = genera, sp = spp, # sp.code = sp.codes) # Write the species table out.... # print(paste('there are', nrow(sp.table), # 'species at the website')) # write(sp.table, file = paste(PATH, 'sp.list.txt', sep = '')) #end of old stuff sp.codes <- sp.page[start.data:length(sp.page)] sp.codes <- gsub('^[^"]*', '', sp.codes, perl = TRUE, useBytes = TRUE) sp.codes <- gsub('.pdf.*$', '', sp.codes, perl = TRUE, useBytes = TRUE) sp.codes <- gsub('\"', '', sp.codes, perl = TRUE, useBytes = TRUE) sp.codes <- sp.codes[!sp.codes == ''] sp.codes <- sp.codes[1:(length(sp.codes)-2)] if(length(sp.codes) != length(unique(sp.codes))){ warning('there are non-unique 8-letter species codes') } cat(paste('there are', length(sp.codes), 'species listed on the index page\n')) # Read in a hand-corrected species table.... # sp.table <- read.table(paste(PATH, '09-02-10LittleSpp.csv', # sep = ''), sep = '\t', header = TRUE) # there are 679 spp. as of 09-03-13 # Download zipped shape files (with index page) oldwd <- getwd() setwd(PATH) if(!'zipped_shpfiles' %in% dir()){ system('mkdir zipped_shpfiles') } cat('downloading index page\n') system(paste('/sw/bin/wget -N ', INDEX_URL, sep = '')) setwd(paste(PATH, 'zipped_shpfiles', sep = '')) for(i in 1:length(sp.codes)){ filename <- paste(sp.codes[i], '.zip', sep = '') if(!filename %in% dir()){ cat(paste('downloading zipped shapefile ', sp.codes[i], '...\n', sep = '')) try(system(paste('/sw/bin/wget ', INDEX_URL, filename, sep = ''))) } } zippedfiles <- dir() setwd(oldwd) }else{ zippedfiles <- dir(paste(PATH, 'zipped_shpfiles', sep = '')) } cat(paste('there are', length(zippedfiles), 'zipped shape files in the right place\n')) system('touch processed.txt') processed <- scan(paste(PATH, 'processed.txt', sep = ''), what = 'character') cat(paste('according to the log file,', length(processed), 'have been processed\n')) if(length(sp.codes) != length(zippedfiles)) stop('wrong number of zipped files for number of species') sp.list <- sp.codes # A character vector of the species codes to process #sp.list <- scan(paste(PATH, 'sp.list.txt', sep = ''), what = '') #print(paste('there are', length(sp.list), # 'entries in the saved species list')) #################### 60 Characters Wide #################### # Global variables # Must all be round degrees LATMIN <- 15 LATMAX <- 75 LONGMIN <- -170 LONGMAX <- -30 # Determines the grain (resolution) of the grid (in degrees) GRAIN <- 0.5 # The points at which to construct species lists xs <- seq(from = LONGMIN, to = LONGMAX, by = GRAIN) xs <- xs + GRAIN / 2 ys <- seq(from = LATMIN, to = LATMAX, by = GRAIN) ys <- ys + GRAIN / 2 xs <- xs[-length(xs)] ys <- ys[-length(ys)] #################### 60 Characters Wide #################### # Subroutines vectorize <- function(x,y){ x.v <- rep(x, length(y)) y.v <- as.numeric(matrix(y, nrow = length(x), ncol = length(y), byrow = TRUE)) return(data.frame(x = x.v, y = y.v)) } isEven <- function(x){ return(as.logical(x %% 2 == 0)) } #################### 60 Characters Wide #################### # Main # The points in the form of coordinate vectors xypts <- vectorize(xs, ys) # A list of the species lists for the point grid (to be # wrapped up into a matrix) sp.lists.list <- vector(mode = 'list', length = length(xs) * length(ys)) # Read in the shapefiles....If necessary, install packages # 'sp', 'shapefiles', and 'gstat' including dependencies: # SparseM, Matrix, spam, fields, RColorBrewer library(sp) library(shapefiles) library(gstat) setwd(PATH) # Unzip zipped shapefiles #for(i in 1:length(sp.list)){ #for(i in 1:3){ # if(sp.list[i] %in% dir(paste(PATH, 'shpfiles', sep = ''))) break() # setwd('./shpfiles') # system(paste('/usr/bin/unzip zipped_shpfiles/', # sp.list[i], '.zip -d ', sp.list[i], sep = '')) # setwd('..') #} #if(length(dir(paste(PATH, 'shapefiles', # sep = ''))) != length(sp.list)) # stop('wrong number of shapefiles') #if(length(dir(paste(PATH, # 'esp.cr.usgs.gov/data/atlas/little/', # sep = ''))) != length(sp.list)) # stop('wrong number of zipped shapefiles') # This line is to test the script without it taking all night.... #sp.list <- sp.list[1:3] #################### 60 Characters Wide #################### # Loop through species for(i in 1:length(sp.list)){ if(sp.list[i] %in% processed){ cat(paste('skipping ', sp.list[i], ', which has already been processed\n', sep = '')) next } cat(paste(i, ' processing species ', sp.list[i], ' ', date(), '\n', sep = '')) # Unzip zipped shapefile system(paste('/usr/bin/unzip zipped_shpfiles/', sp.list[i], '.zip -d zipped_shpfiles/', sp.list[i], sep = '')) # Read shapefile into R shpfile <- read.shapefile(paste(PATH, 'zipped_shpfiles/', sp.list[i], '/', sp.list[i], sep = '')) # Remove unzipped shapefile system(paste('rm -rf ', PATH, 'zipped_shpfiles/', sp.list[i], sep = '')) #names(shpfile) #names(shpfile$shp) #names(shpfile$shp$shp[[1]]) #################### 60 Characters Wide #################### # Loop though polygons in shapefile sp.matrix <- matrix(0, nrow = length(ys), ncol = length(xs)) for(j in 1:length(shpfile$shp$shp)){ if(shpfile$shp$shp[[j]]$num.parts > 1){ first.part.pts <- 1:shpfile$shp$shp[[j]]$parts[2] verts <- shpfile$shp$shp[[j]]$points[first.part.pts,] }else{ verts <- shpfile$shp$shp[[j]]$points } if(DRAW){ if(j == 1){ plot.default(verts, pch = '.', xlim = c(shpfile$shp$header$xmin, shpfile$shp$header$xmax), ylim = c(shpfile$shp$header$ymin, shpfile$shp$header$ymax), type = 'n') plot.default(verts, pch = '.', xlim = c(LONGMIN, LONGMAX), ylim = c(LATMIN, LATMAX), type = 'n') } polygon(verts, col = NA) } sp.matrix <- sp.matrix + matrix(point.in.polygon( point.x = xypts$x, point.y = xypts$y, pol.x = verts$X, pol.y = verts$Y), nrow = length(ys), ncol = length(xs)) } if(DRAW) points(xypts, col = as.numeric(sp.matrix) + 1, pch = '+') # The following loop adds the sp. code to sp.lists.list for(k in 1:length(sp.lists.list)){ if(as.vector(sp.matrix)[k]){ sp.lists.list[[k]] <- c(sp.lists.list[[k]], sp.list[i]) } } # Done looping through polygons save(sp.lists.list, file = paste(PATH, 'sp.lists.list.temp', sep = '')) sink(file = paste(PATH, 'processed.txt', sep = ''), append = TRUE) cat(sp.list[i]) cat('\n') sink() } # End loop through species #################### 60 Characters Wide #################### # Create a matrix of the species lists.... # Note that the t() is needed so that lat. and long. are # correctly associated with columns and rows of matrix sp.lists.mat <- t(matrix(sp.lists.list, nrow = length(ys), ncol = length(xs), byrow = TRUE)) # Create a matrix of the total species counts sp.count.mat <- matrix(0, nrow = length(xs), ncol = length(ys)) for(i in 1:length(xs)){ for(j in 1:length(ys)){ sp.count.mat[i,j] <- length(sp.lists.mat[[i,j]]) } } # Saving what has been done so far... dput(sp.lists.mat, file = 'sp.lists.mat.Rdput') write.table(sp.count.mat, file = paste(PATH, 'sp.count.mat.asc', sep = '')) save(sp.lists.mat, file = paste(PATH, 'sp.lists.mat.Rdata', sep = '')) setwd(oldwd) # And restoring from the saved version... setwd(PATH) sp.lists.mat <- dget(paste(PATH, 'sp.lists.mat.Rdput', sep = '')) sp.count.mat <- as.matrix(read.table(paste(PATH, 'sp.count.mat.asc', sep = ''))) #load(file = paste(PATH, 'sp.lists.mat.Rdata', sep = '')) #sp.lists.list <- as.list(sp.lists.mat) # A simple alpha diversity plot... library(lattice) levelplot(sp.count.mat, main = 'Alpha diversity of woody N. Amer. endemic spp. on 0.5 deg. raster', xlab = 'West longitude', ylab = 'North latitude', row.values = (seq(LONGMIN, LONGMAX, by = 0.5) + 0.25)[-1], column.values = (seq(LATMIN, LATMAX, by = 0.5) + 0.25)[-1], col.regions = hsv(h = 1, s = seq(0,1,length.out = 50), v = 1)) #image(sp.count.mat, main = 'Alpha diversity of woody N. Amer. endemic spp. on 0.5 deg. raster', xlab = 'West longitude', ylab = 'North latitude', # xaxt = 'n', yaxt = 'n', # col = hsv(h = 1, s = seq(0,1,length.out = 50), v = 1)) #axis(1, at = seq(from = 0, to = 1, # length.out = length(seq(from = LONGMIN, # to = LONGMAX, by = 20))), # labels = seq(from = LONGMIN, to = LONGMAX, by = 20)) #axis(2, at = seq(from = 0, to = 1, # length.out = length(seq(from = LATMIN, # to = LATMAX, by = 20))), # labels = seq(from = LATMIN, to = LATMAX, by = 20)) #################### 60 Characters Wide #################### # Beta diversity sp.char.mat <- matrix(lapply(sp.lists.mat, function(x) paste(x, collapse = ',')), nrow = nrow(sp.count.mat), ncol = ncol(sp.count.mat)) beta9.mat <- sp.count.mat for(i in 2:(nrow(sp.count.mat)-1)){ for(j in 2:(ncol(sp.count.mat)-1)){ lcl.sp.list <- unlist(sp.char.mat[(i-1):(i+1),(j-1):(j+1)]) lcl.sp.list <- paste(lcl.sp.list, collapse = ',') lcl.sp.list <- unlist(strsplit(lcl.sp.list, split = ',')) beta9.mat[i,j] <- length(unique(lcl.sp.list)) } } levelplot(beta9.mat, main = 'Beta diversity in 3 by 3 neighborhood', xlab = 'West longitude', ylab = 'North latitude', row.values = (seq(LONGMIN, LONGMAX, by = 0.5) + 0.25)[-1], column.values = (seq(LATMIN, LATMAX, by = 0.5) + 0.25)[-1], col.regions = hsv(h = 0.67, s = seq(0,1,length.out = 50), v = 1)) beta25.mat <- sp.count.mat for(i in 3:(nrow(sp.count.mat)-2)){ for(j in 3:(ncol(sp.count.mat)-2)){ lcl.sp.list <- unlist(sp.char.mat[(i-2):(i+2),(j-2):(j+2)]) lcl.sp.list <- paste(lcl.sp.list, collapse = ',') lcl.sp.list <- unlist(strsplit(lcl.sp.list, split = ',')) beta25.mat[i,j] <- length(unique(lcl.sp.list)) } } levelplot(beta25.mat, main = 'Beta diversity in 5 by 5 neighborhood', xlab = 'West longitude', ylab = 'North latitude', row.values = (seq(LONGMIN, LONGMAX, by = 0.5) + 0.25)[-1], column.values = (seq(LATMIN, LATMAX, by = 0.5) + 0.25)[-1], col.regions = hsv(h = 0.67, s = seq(0,1,length.out = 50), v = 1)) beta49.mat <- sp.count.mat for(i in 4:(nrow(sp.count.mat)-3)){ for(j in 4:(ncol(sp.count.mat)-3)){ lcl.sp.list <- unlist(sp.char.mat[(i-3):(i+3),(j-3):(j+3)]) lcl.sp.list <- paste(lcl.sp.list, collapse = ',') lcl.sp.list <- unlist(strsplit(lcl.sp.list, split = ',')) beta49.mat[i,j] <- length(unique(lcl.sp.list)) } } levelplot(beta49.mat, main = 'Beta diversity in 7 by 7 neighborhood', xlab = 'West longitude', ylab = 'North latitude', row.values = (seq(LONGMIN, LONGMAX, by = 0.5) + 0.25)[-1], column.values = (seq(LATMIN, LATMAX, by = 0.5) + 0.25)[-1], col.regions = hsv(h = 0.67, s = seq(0,1,length.out = 50), v = 1)) beta81.mat <- sp.count.mat for(i in 5:(nrow(sp.count.mat)-4)){ for(j in 5:(ncol(sp.count.mat)-4)){ lcl.sp.list <- unlist(sp.char.mat[(i-4):(i+4),(j-4):(j+4)]) lcl.sp.list <- paste(lcl.sp.list, collapse = ',') lcl.sp.list <- unlist(strsplit(lcl.sp.list, split = ',')) beta81.mat[i,j] <- length(unique(lcl.sp.list)) } } levelplot(beta81.mat, main = 'Beta diversity in 9 by 9 neighborhood', xlab = 'West longitude', ylab = 'North latitude', row.values = (seq(LONGMIN, LONGMAX, by = 0.5) + 0.25)[-1], column.values = (seq(LATMIN, LATMAX, by = 0.5) + 0.25)[-1], col.regions = hsv(h = 0.67, s = seq(0,1,length.out = 50), v = 1)) # for 3.5 degree squares.... beta7 <- log(beta49.mat / sp.count.mat) #beta7 <- (sp.count.mat / beta49.mat) #beta7 <- (beta49.mat - sp.count.mat) levelplot(beta7, main = 'Beta diversity in 3.5 deg. squares (log units)', xlab = 'West longitude', ylab = 'North latitude', row.values = (seq(LONGMIN, LONGMAX, by = 0.5) + 0.25)[-1], column.values = (seq(LATMIN, LATMAX, by = 0.5) + 0.25)[-1], col.regions = hsv(h = 0.67, s = seq(0,1,length.out = 50), v = 1)) # for 0.5 degree squares.... beta3 <- log(beta9.mat / sp.count.mat) levelplot(beta3, main = 'Beta diversity in 1.5 deg. squares (log units)', xlab = 'West longitude', ylab = 'North latitude', row.values = (seq(LONGMIN, LONGMAX, by = 0.5) + 0.25)[-1], column.values = (seq(LATMIN, LATMAX, by = 0.5) + 0.25)[-1], col.regions = hsv(h = 0.67, s = seq(0,1,length.out = 50), v = 1)) #################### 60 Characters Wide #################### # Spp.-area curves # for center point sp.count.mat[140, 60] gammas <- rep(0, 30) for(i in 1:30){ thisgamma <- unlist(sp.char.mat[(140-i):(140+i),(60-i):(60+i)]) thisgamma <- paste(thisgamma, collapse = ',') thisgamma <- unlist(strsplit(thisgamma, split = ',')) gammas[i] <- length(unique(thisgamma)) } date() sp.count.mat[20, 90] gammas2 <- rep(0, 19) for(i in 1:19){ thisgamma <- unlist(sp.char.mat[(20-i):(20+i),(90-i):(90+i)]) thisgamma <- paste(thisgamma, collapse = ',') thisgamma <- unlist(strsplit(thisgamma, split = ',')) gammas2[i] <- length(unique(thisgamma)) } date() plot((1:length(gammas))^2, gammas, type = 'l', main = 'Species-area curve', xlab = 'Area in square degrees', ylab = 'Number of species', xlim = c(0, 9540000 / 3600), ylim = c(0,680)) text((length(gammas))^2, gammas[length(gammas)], '(140,60)', adj = 0) lines((1:length(gammas2))^2, gammas2) text((length(gammas2))^2, gammas2[length(gammas2)], '(20,90)', adj = 0) abline(h = 679, lty = 2) text(200, 660, 'Total spp. = 679') abline(v = 9540000 / 3600, lty = 2) text(2600, 120, 'Total area = 2650 sq. deg.', srt = 90) mtext('square-included areas')