#1) Install relevant libraries -- only need to do step #1 once per computer
install.packages("raster") 
install.packages("randomForest")
install.packages("sp")
install.packages("rgdal")
install.packages("caret")


#2) Load necessary libraries   ## need to do this every time.  
library(raster)
library(randomForest)
library(sp)
library(rgdal)
library(caret)


#3) Set the working directory
setwd('F:/RF') 
#in this case, my RF folder is stored on my F drive
#if your folder is stored in a different location, change the line above


#4) Create a raster object
inraster = raster::stack('rasters/Alfred.pix')


#5) Set the path to the training data
# these datasets are csv files containing class labels (ClassName) 
# and Easting (POINT_X) and Northing (POINT_Y) information
Training = read.csv('TRAINING.csv', header=TRUE, sep = ",")  
Validation = read.csv('TESTING.csv', header=TRUE, sep = ",")   

### to simplify things I removed steps 6 and 7 
## the only reason you would want to run these steps is to display the spatial location of the training data
## or to perform a spatial extraction but this is not necessary here!  


#8)Select which variables to use in your model
Selection = c(7:119)  
# column numbers 7 - 119 represent all of the channels 
# names(Training)  will show you the names and corresponding numbers of the colummns
# use ListOfVariables.xlsx to determine what subsets of variables you want to use in classification 
Predictor_Data = Training[,Selection]  # Predictor_Data contains only the variables you select above
Training_Response = Training$ClassName # sincce we are doing classification, the response variable is the "class" at each point

#9) Create and save the forest
r_tree = randomForest(data.frame(Predictor_Data), y=Training_Response, ntree = 1000, importance = TRUE)


#10) See the Out of Bag Confusion Matrix
r_tree 


#11) Print the variable importance (Mean Decrease in Accuracy; 
# for Gini Index type = 2; 
# leaving type blank gives you the per-class importances (MDA))
imp = importance(r_tree, type = 1) 
# imp = importance(r_tree, type = 2) 
# imp = importance(r_tree) 
imp  
  

#12) Classify the independent validation data
Validation_Predictions = predict(r_tree,data.frame(Validation))


#13) Generate a confusion matrix from the independent validation data
Validation_Response = as.factor(Validation$ClassName)
confusionMatrix <- table(Validation_Predictions,Validation_Response)
confusionMatrix 


#14) Calcualte overall accuracy, user's and producer's accuracy and kappa statistic
n_obs <- length(Validation_Response) # number of observation in validation set
n_classes <- length(levels(Validation$ClassName)) # number of classes
overallAccuracy <- sum(diag(confusionMatrix ))/n_obs
classAccuracy <- matrix(NA,nrow=2,ncol=n_classes,dimnames=list(c('users','producers'),levels(Validation$ClassName)))

for (c in 1:n_classes){
  classAccuracy['users',c]     <- confusionMatrix[c,c]/sum(confusionMatrix[c, ])
  classAccuracy['producers',c] <- confusionMatrix[c,c]/sum(confusionMatrix[ ,c])
}
rowColSumProdSum <- sum(apply(confusionMatrix,2,sum)*apply(confusionMatrix,1,sum))
kappa <- ( n_obs*sum(diag(confusionMatrix ))-rowColSumProdSum ) / ( n_obs^2-rowColSumProdSum )

classAccuracy
overallAccuracy
kappa


#15) Classify the whole raster
OutputRaster = 'rasters/output_classification.tif'   ## change this filename each time you want to export a new classification
predictions = predict(inraster, r_tree, filename=OutputRaster,format="GTiff", progress="text", type="response") 









