# R script to process Little data
#################### 60 Characters Wide ####################
########## PRELIMINARIES....

PATH <- '/Users/wag/Archive/Active/LittleData/'

# All round degrees
LATMIN <- 20 # should be 15
LATMAX <- 80 # should be 75
LONGMIN <- -180 # should be -170
LONGMAX <- -40 # should be -30

# Determines the grain (resolution) of the grid (in degrees)
GRAIN <- 0.5

# The points at which to construct species lists
xs <- seq(from = LONGMIN, to = LONGMAX, by = GRAIN)
xs <- xs + GRAIN / 2
ys <- seq(from = LATMIN, to = LATMAX, by = GRAIN)
ys <- ys + GRAIN / 2
xs <- xs[-length(xs)]
ys <- ys[-length(ys)]

vectorize <- function(x,y){
  x.v <- rep(x, length(y))
  y.v <- as.numeric(matrix(y, nrow = length(x),
                            ncol = length(y), byrow = TRUE))
  return(data.frame(x = x.v, y = y.v))
}

# The points in the form of coordinate vectors
xypts <- vectorize(xs, ys)

# A character vector of the species codes to process
sp.list <- scan(paste(PATH, 'LittleShapeFiles/sp.codes.txt',
                                       sep = ''), what = '')
#sp.list <- sp.list[1:3]

# A list of the species lists for the point grid (to be
#  wrapped up into a matrix)
sp.lists.list <- vector(mode = 'list',
                           length = length(xs) * length(ys))

#################### 60 Characters Wide ####################
########## READ IN THE SHAPEFILES....

# If necessary, install packages 'sp', 'shapefiles', and 'gstat'

library(sp)
library(shapefiles)
library(gstat)

PATHh <- paste(PATH, 'LittleShapeFiles/climchange.cr.usgs.gov/data/atlas/little/', sep = '')

#################### 60 Characters Wide ####################
# Loop through species
for(i in 1:length(sp.list)){
  print(paste(i, '\tprocessing species ', sp.list[i], '\t',
                                          date(), sep = ''))
  shpfile <- read.shapefile(paste(PATHh, sp.list[i], '/',
                                      sp.list[i], sep = ''))

#names(shpfile)
#names(shpfile$shp)
#names(shpfile$shp$shp[[1]])

#################### 60 Characters Wide ####################
# Loop though polygons in shapefile
  sp.matrix <- matrix(0, nrow = length(ys),
                         ncol = length(xs))
  for(j in 1:length(shpfile$shp$shp)){
    if(shpfile$shp$shp[[j]]$num.parts > 1){
      first.part.pts <- 1:shpfile$shp$shp[[j]]$parts[2]
      verts <- shpfile$shp$shp[[j]]$points[first.part.pts,]
    }else{
      verts <- shpfile$shp$shp[[j]]$points
    }
    if(j == 1){
#      plot.default(verts, pch = '.',
#          xlim = c(shpfile$shp$header$xmin,
#                                 shpfile$shp$header$xmax),
#          ylim = c(shpfile$shp$header$ymin,
#                                 shpfile$shp$header$ymax),
#                   type = 'n')
#      plot.default(verts, pch = '.',
#          xlim = c(LONGMIN, LONGMAX),
#          ylim = c(LATMIN, LATMAX),
#                   type = 'n')
    }
#    polygon(verts, col = NA)
    sp.matrix <- sp.matrix + matrix(point.in.polygon(
                                     point.x = xypts$x,
                                     point.y = xypts$y,
                                     pol.x = verts$X,
                                     pol.y = verts$Y),
                    nrow = length(ys), ncol = length(xs))
  }
#points(xypts, col = as.numeric(sp.matrix) + 1, pch = '+')
  for(k in 1:length(sp.lists.list)){
    if(as.vector(sp.matrix)[k]){
      sp.lists.list[[k]] <-
                           c(sp.lists.list[[k]], sp.list[i])
    }
  } # End loop through polygons
  save(sp.lists.list, file = paste(PATH,
                            'sp.lists.list.temp', sep = ''))
} # End loop through species
#################### 60 Characters Wide ####################

# Create a matrix of the species lists....
#  Note that the t() is needed so that lat. and long. are
#  correctly associated with columns and rows of matrix
sp.lists.mat <- t(matrix(sp.lists.list, nrow = length(ys),
                            ncol = length(xs), byrow = TRUE))

# Create a matrix of the total species counts
sp.count.mat <- matrix(0, nrow = length(xs),
                          ncol = length(ys))
for(i in 1:length(xs)){
  for(j in 1:length(ys)){
    sp.count.mat[i,j] <- length(sp.lists.mat[[i,j]])
  }
}

# Saving what has been done so far....
save(sp.lists.mat, file = paste(PATH,
                            'sp.lists.mat.Rdata', sep = ''))
write.table(sp.count.mat, file = paste(PATH,
                              'sp.count.mat.asc', sep = ''))

# And restoring from the saved version....
# Note that the following three lines are a short-cut
#  that saves the ~20 hour processing in the beginning
#  of LittleData.Rscript.txt
sp.count.mat <- read.table(paste(PATH, 'sp.count.mat.asc',
                                                  sep = ''))
load(file = paste(PATH, 'sp.lists.mat.Rdata', sep = ''))
sp.lists.list <- as.list(sp.lists.mat)

# Now have to break up the sp. lists by
#  angiosperm/gymnosperm, etc.

#Index of conifer species
#c(1:7, 100:101, 153:165, 167:169, 200:207, 209:268,
#  406:409, 411, 413:416)

# Class is a factor the length of the number of species
#  that has level 'a' for angiosperms, 'c' for conifers
Class <- rep('a', length(sp.list))
Class[c(1:7, 100:101, 153:165, 167:169, 200:207, 209:268,
                              406:409, 411, 413:416)] <- 'c'

angio.lists.list <- sp.lists.list
gymno.lists.list <- sp.lists.list

for(i in 1:length(sp.lists.list)){
  if(is.null(sp.lists.list[[i]])){
    next()
  }
  angio.lists.list[[i]] <- as.character(na.omit(sp.list[Class == 'a'][charmatch(sp.lists.list[[i]], sp.list[Class == 'a'])]))

  gymno.lists.list[[i]] <- as.character(na.omit(sp.list[Class == 'c'][charmatch(sp.lists.list[[i]], sp.list[Class == 'c'])]))
}

# Create a matrix of the angiosperm species lists
angio.lists.mat <- matrix(angio.lists.list, nrow = length(xs),
                            ncol = length(ys), byrow = FALSE)

# Create a matrix of the gymnosperm species lists
gymno.lists.mat <- matrix(gymno.lists.list, nrow = length(xs),
                            ncol = length(ys), byrow = FALSE)

# Create a matrix of the angiosperm species counts
angio.count.mat <- matrix(0, nrow = length(xs),
                          ncol = length(ys))
for(i in 1:length(xs)){
  for(j in 1:length(ys)){
    angio.count.mat[i,j] <- length(angio.lists.mat[[i,j]])
  }
}

# Create a matrix of the gymnosperm species counts
gymno.count.mat <- matrix(0, nrow = length(xs),
                          ncol = length(ys))
for(i in 1:length(xs)){
  for(j in 1:length(ys)){
    gymno.count.mat[i,j] <- length(gymno.lists.mat[[i,j]])
  }
}

image(angio.count.mat)
image(gymno.count.mat)

# A matrix of the angiosperm:gymnosperm ratio
a2g.mat <- angio.count.mat/gymno.count.mat
image(a2g.mat)

# A matrix of the gymnosperm:angiosperm ratio
g2a.mat <- gymno.count.mat / angio.count.mat
image(g2a.mat)

#################### 60 Characters Wide ####################
# More elaborate plots...

sp.count.mat[sp.count.mat == 0] <- NA

min.diversity <- min(sp.count.mat, na.rm = TRUE)
max.diversity <- max(sp.count.mat, na.rm = TRUE)

angio.count.mat[angio.count.mat == 0] <- NA
gymno.count.mat[gymno.count.mat == 0] <- NA

# Plot of diversity
levs = 5
image(xs, ys, sp.count.mat, col = hsv(1,seq(from = 0.1, to = 1,
  length.out = levs),1), ylab = '(North) Latitude',
  xlab = '(West) Longitude', breaks = c(0, 1, 10, 50, 100,
  115), main = 'Tree Species Diversity')
points(rep(-160, levs), seq(from = 40, to = 30,
  length.out = levs), col = hsv(1,seq(from = 0.1, to = 1,
  length.out = levs)), pch = 20, cex = 2)
text(rep(-157, levs), seq(from = 40, to = 30,
  length.out = levs), labels = c('1', '2-10', '11-50',
  '51-100', '>100'), cex = 0.7)

# Plot of diversity using lattice/grid
library(grid)
library(lattice)

levelplot(sp.count.mat, ylab = 'Row', xlab = 'Column',
  col.regions = c('white', 'palegreen', 'green', 'green3', 'green4'),
  at = c(0,1.5,9.5,49.5,99.5,116),
  colorkey = list(space = 'bottom',
    at = c(0,2,10,50,100,116),
    labels = as.character(c(0,2,10,50,100,116))))
#grid.xaxis(at = seq(LONGMIN, LONGMAX, by = 10))
#grid.yaxis(at = seq(LATMIN, LATMAX, by = 10))

(North) Latitude
(West) Longitude

# How do you set the tick mark size?
#  trellis.par.get()$axis.components$top$tck
#  trellis.par.set()

#levs <- 12
#image(xs, ys, map, col = hsv(1,seq(from = 0.1, to = 1,
#  length.out = levs),1), ylab = 'Latitude',
#  xlab = '(West) Longitude')
#points(rep(-160, levs), seq(from = 40, to = 30,
#  length.out = levs), col = hsv(1,seq(from = 0.1, to = 1,
#  length.out = levs)), pch = 20, cex = 2)
#text(rep(-157, levs), seq(from = 40, to = 30,
#  length.out = levs), labels = round(seq(from = 0.1,
#  to = 1, length.out = levs) * max(map, na.rm = TRUE)),
#  cex = 0.7)

############################################################
#################### 60 Characters Wide ####################
############################################################
########## READ IN THE MORPHLOGICAL DATA....

# Read in the morphological data file
mdata <- read.csv(paste(PATH, 'Little.asc', sep = ''),
        sep = '\t', header = TRUE, colClasses = 'character')
mdata <- t(mdata)
mvar.names <- mdata[1,]
mdata <- as.data.frame(mdata[-1,])
names(mdata) <- mvar.names

# Convert columns of mdata to numeric or character
#  variables as appropriate
for(i in c(1:32)){
  mdata[,i] <- as.numeric(as.character(mdata[,i]))
}
for(i in c(33:35)){
  mdata[,i] <- as.character(mdata[,i])
}

# Create the species codes used to label the sp.lists.mat
#  matrix by concatenating the first four letters of the
#  lowercase genus and species names.
sp.code <- paste(
  substr(matrix(unlist(strsplit(tolower(rownames(mdata)),
    split = '.', fixed = TRUE)), ncol = 2, byrow = TRUE)[,1],
  1, 4),
  substr(matrix(unlist(strsplit(tolower(rownames(mdata)),
    split = '.', fixed = TRUE)), ncol = 2, byrow = TRUE)[,2],
  1, 4), sep = '')
mdata <- cbind (I(sp.code), mdata)

# Produce a matrix of the P values (percentage entire
#  angiosperm leaves
P.mat <- matrix(0, nrow = length(xs),
                          ncol = length(ys))
for(i in 1:length(xs)){
  for(j in 1:length(ys)){
    match.index <- match(angio.lists.mat[[i,j]], mdata$sp.code)
    P.mat[i,j] <- mean(mdata$NO_TEETH[na.omit(match.index)])
  }
}

# ...which is only relevant when there are more than 10
#  angiosperm species
P.mat10plus <- round(P.mat, digits = 2)
P.mat10plus[angio.count.mat < 10] <- NA
P.mat20plus <- round(P.mat, digits = 2)
P.mat20plus[angio.count.mat < 20] <- NA
P.mat30plus <- round(P.mat, digits = 2)
P.mat30plus[angio.count.mat < 30] <- NA

############################################################
# Produce a matrix of leaf sizes---not yet working

Z.weights <- rep(1, 10)

Z.mat <- matrix(0, nrow = length(xs),
                          ncol = length(ys))
for(i in 1:length(xs)){
  for(j in 1:length(ys)){
    if(is.null(angio.lists.mat[[i,j]])) next()
    cat(i, j)
    match.index <- match(angio.lists.mat[[i,j]],
                                              mdata$sp.code)
    weighted.mat <- Z.weights %*%
                 as.matrix(mdata[na.omit(match.index),9:18])
    #Z.mat[i,j] <- rowSums(weighted.mat) / 10
  }
}

# Produce a map of P (Percentage Entire)
levs = 5
image(xs, ys, P.mat, col = hsv(1,seq(from = 0.1, to = 1,
  length.out = levs),1), ylab = '(North) Latitude',
  xlab = '(West) Longitude', breaks = c(0, 0.2, 0.4, 0.6, 0.8,
  1), main = 'Percentage Entire Leaves')
points(rep(-160, levs), seq(from = 40, to = 30,
  length.out = levs), col = hsv(1,seq(from = 0.1, to = 1,
  length.out = levs)), pch = 20, cex = 2)
text(rep(-157, levs), seq(from = 40, to = 30,
  length.out = levs), labels = c('0-0.2', '0.2-0.4', '0.4-0.6',
  '0.6-0.8', '0.8-1'), cex = 0.7)

# ...using lattice/grid

# New Haven, Conn. 	41 	19 	72 	55
points(-72, 42)
sp.lists.mat[45,216]
P.mat[45,216]

# END SCRIPT
############################################################
#################### 60 Characters Wide ####################
############################################################

# EARLIER ABORTIVE ATTEMPT TO DO THIS VIA THE MAPTOOLS
#  PACKAGE

# The output matrix of species lists: a matrix of character
#   vectors assign with sp.lists.matrix[[i,j]] <- ???
#sp.lists.matrix <- matrix(data = vector(mode = 'list'),
#                      nrow = length(ys), ncol = length(xs))

########## READ IN THE SHAPEFILES....
# if necessary, install maptools and shapefiles

library(maptools)

?readShapeLines

sp.list <- 'abieamab'

sp.paths <- paste(PATH, 'LittleShapeFiles/climchange.cr.usgs.gov/data/atlas/little/', sp.list, '/', sp.list, '.shp', sep = '')

shpfiles <- vector(mode = 'list', length = length(sp.paths))
for(i in length(sp.paths)){
  shpfiles[[i]] <- readShapeLines(sp.paths)
}

########## CONVERT THE SHAPEFILES TO POLYGONS

polys <- vector(mode = 'list', length = length(sp.list))
for(i in 1:length(attributes(shpfile2)$lines)){

  xverts[[i]] <- 
  yverts[[i]] <- 

  lines(x = slot(slot(attributes(shpfile[1])$lines[[i]],
    'Lines')[[1]], 'coords')[,1],
    y = slot(slot(attributes(shpfile[1])$lines[[i]],
    'Lines')[[1]], 'coords')[,2], col = 'red')
}

########## CHECK IF EACH POINT IS IN EACH POLYGON
# if necessary, install sp and gstat

library(sp)
library(gstat)

?point.in.polygon

for(k in 1:length(sp.list)){
  newsp <- matrix(data = '', nrow = length(ys), ncol = length(xs))
  newsp[point.in.polygon(point.x = xs, point.y = ys,
    pol.x = xverts[[k]], pol.y = yverts[[k]]) > 0] <- sp.list[k]
  sp.lists.matrix[[i,j]] <- c(sp.lists.matrix[[i,j]], newsp[i,j])
}