#Set of functions to import eBird dataset for geographic range and sampling calculations #Written by James Boyle May 20th, 2016 #############Sequence of functions for pre-processing PBDB brachiopod dataset of Boyle 2017################### #install.packages("GeoRange") #library(GeoRange) ##Load in eBird csv file, first 50000 lines ##First read in just a few lines from the file to look through the structure #bird<-read.delim(file.choose(),nrows=5,sep=",") ##Identify the class of each column and then assign them before loading in the file #classes <- sapply(bird, class) #nClasses<-length(classes) ##Reassign all classes to character for ease of processing #classes[1:nClasses]<-"character" ##Reassign latitude and longitude class to numeric for further analyses #classes[3:4]<-"numeric" #bird<-read.delim(file.choose(),colClasses = classes,sep=",",nrows=50000) ##Load in IUCN Red List file #red<-read.csv(file.choose()) ##Create list of red list status of bird species #redBinom<-RedListBinomen(red) ##############Sequence of functions to perform eBird analyses in Boyle 2017######################################### ##Calculate the measures of geographic range for all bird species and find their associated Red List status #birdGeo<-EBirdGeo(redBinom,bird) ##Calculate the skewness and coefficient of variation of the distribution of geographic range measures ##Have to exclude IUCN status column #birdSkewCV<-GeoPerformance_SkewCV(birdGeo[,1:8]) ##Perform a rarefaction analysis of the eBird dataset to calculate 6 measures of geographic range at various sample sizes #eBirdRareTest<-GeoRarefaction_MultiTaxa(nLocCut=50,bird,TaxaStart=3,iter=20,steps=c(1,500,400,300,200,100,50,25,10),replacePts=TRUE) ##PEE calculations for birds #PEETesteBird_AllTaxa<-PEE_MultiTaxa(eBirdRareTest) ##Run pairwise t-tests and wilcox test between geographic range measures for each geographic range measure ##First reduce dataset to birds seen in at least three locations to avoid errors #TrimBird<-bird[,which(birdGeo[,2]>2] #WTtestBird_NObs<-PairWiseGeoTest(BirdGeo=TrimBird,Measure=1) #WTtestBird_NLocs<-PairWiseGeoTest(BirdGeo=TrimBird,Measure=2) #WTtestBird_MST<-PairWiseGeoTest(BirdGeo=TrimBird,Measure=3) #WTtestBird_CH<-PairWiseGeoTest(BirdGeo=TrimBird,Measure=4) #WTtestBird_MPD<-PairWiseGeoTest(BirdGeo=TrimBird,Measure=5) #WTtestBird_LatRg<-PairWiseGeoTest(BirdGeo=TrimBird,Measure=6) #WTtestBird_LongRg<-PairWiseGeoTest(BirdGeo=TrimBird,Measure=7) #WTtestBird_CellCount<-PairWiseGeoTest(BirdGeo=TrimBird,Measure=8) #################################################FUNCTIONS#################################################### #Function to create full binomen of IUCN red list sheet as a new column #Used for EBirdGeo to tie conservation status to geo. range measures RedListBinomen<-function(RedList){ #Block to concantenate IUCN red list genus and species names together #so they can be matched to the geographic range calculations nRedList<-length(RedList[,1]) Binomen<-c() for(j in 1:nRedList){ Binomen<-c(Binomen,paste(RedList$Genus[j],"_",RedList$Species[j],sep="")) } RedList<-cbind(RedList,Binomen) names(RedList[length(RedList[1,])])<-"Binomen" return(RedList) } #redBinom<-RedListBinomen(red) ############################################################################################################################ #Function to tabulate number of occurrences/locations for each species in dataset #RedList parameter is the output from the RedListBinomen() function EBirdGeo<-function(RedList,EBird,cellsize=5){ nTaxa<-length(EBird[1,]) #Taxa records starts at column 20 TNames<-names(EBird[1,])[20:nTaxa] EBird_mat<-data.frame(matrix(NA,nrow=nTaxa-19,ncol=10),row.names=TNames) #Loop for geographic range calculations for(i in 20:nTaxa){ print("#############") print(i) taxPos<-which(EBird[,i]!=0) EBird_mat[i-19,1]<-length(taxPos) if(length(taxPos)==0){ EBird_mat[i-19,2]<-0 } else{ longs<-EBird$LONGITUDE[taxPos] lats<-EBird$LATITUDE[taxPos] #print("MST") MSTCalc<-MSTDist(longs,lats) EBird_mat[i-19,2]<-length(MSTCalc$Latitude) #print("CH") EBird_mat[i-19,3]<-MSTCalc$MST_km EBird_mat[i-19,4]<-CHullAreaEarth(longs,lats) #print("GCD") if(length(taxPos)>1){ EBird_mat[i-19,5]<-max(MSTCalc$MST_DistMat,na.rm=TRUE) } else{ EBird_mat[i-19,5]<-NA } #print("LatRg") LatRgCalc<-LatRg(lats) #print("LongRg") EBird_mat[i-19,6]<-LatRgCalc$KmSpan LongRgCalc<-LongRg(longs) #print("CellCount") EBird_mat[i-19,7]<-LongRgCalc$KmSpan CellCountCalc<-CellCount_v2(longs,lats,cellsize=cellsize) EBird_mat[i-19,8]<-CellCountCalc$NumCellsOcc } #Looks for RedListPos<-which(RedList$Binomen==TNames[i-19]) if(length(RedListPos)!=0){ EBird_mat[i-19,9]<-as.character(RedList$Red.List.status[RedListPos]) EBird_mat[i-19,10]<-as.character(RedList$Red.List.criteria[RedListPos]) } else{ EBird_mat[i-19,9]<-"NE" EBird_mat[i-19,10]<-"" } } ColHeads<-c("NObs","NLocs","MST","CH","GCD","LatRg","LongRg","CellCount","RedListStatus","RedListCriteria") names(EBird_mat)<-ColHeads return(EBird_mat) } #test_EBirdGeo<-EBirdGeo(RedList=redBinom,EBird=bird,cellsize=5) ############################################################################## #Function to create a pairwise matrix of p-values for t-test and wilcox test #between IUCN categories of each geographic range measure #Measure parameter indicates which measure is compared #2=NObs,3=NLocs,3=MST,4=CH,5=GCD,6=LatRg,7=LongRg,8=CellCount PairWiseGeoTest<-function(BirdGeo,Cats=c("LC","NT","VU","EN","CR"),Measure=2){ GeoPairTest_mat<-data.frame(matrix(NA,nrow=length(Cats),ncol=length(Cats)),row.names=c("LC","NT","VU","EN","CR")) #GeoPairTest_mat<-data.frame(matrix(NA,nrow=length(Cats),ncol=length(Cats))) nCat<-length(Cats) for(i in 1:nCat){ ICat<-which(BirdGeo$RedListStatus==Cats[i]) for(j in 1:nCat){ JCat<-which(BirdGeo$RedListStatus==Cats[j]) if(ij){ WTest<-wilcox.test(BirdGeo[JCat,Measure],BirdGeo[ICat,Measure]) GeoPairTest_mat[i,j]<-WTest$p.value } } } return(GeoPairTest_mat) } #WTtestBird_MST<-PairWiseGeoTest(BirdGeo=TrimBird,Measure=3)