install.packages("xlsx")
library(xlsx)
library("xlsx")
paste("hello", "world")
?paste
paste("hello", "world", sep = ",")
getwd()
#1) Install relevant libraries
install.packages("raster", "randomForest", "sp", "rgdal", "caret")
#1) Install relevant libraries
install.packages("raster", "randomForest", "sp", "rgdal", "caret")
library(raster)
#2) Load necessary libraries
library(raster)
library(randomForest)
library(sp)
library(rgdal)
library(caret)
#2) Load necessary libraries
library(raster)
library(randomForest)
library(sp)
library(rgdal)
library(caret)
install.packages("caret"0)
install.packages("caret")
install.packages("caret")
#1) Install relevant libraries
install.packages("raster")
install.packages("sp")
install.packages("rgdal")
#2) Load necessary libraries
library(raster)
library(randomForest)
library(sp)
library(rgdal)
library(caret)
#3) Set the working directory
setwd('F:/RF')
#in this case, my RF folder is stored on my F drive
#if your folder is stored in a different location, change the line above
#4) Create a raster object
inraster = raster::stack('rasters/Alfred.pix')
#5) Set the path to the training data
# these datasets are csv files containing class labels (ClassName)
# and Easting (POINT_X) and Northing (POINT_Y) information
Training = read.csv('TRAINING.csv', header=TRUE, sep = ",")
Validation = read.csv('TESTING.csv', header=TRUE, sep = ",")
#8)Select which variables to use in your model
Selection = c(7:119)
# column numbers 7 - 119 represent all of the channels
# names(Training)  will show you the names and corresponding numbers of the colummns
# use ListOfVariables.xlsx to determine what subsets of variables you want to use in classification
Predictor_Data = Training[,Selection]  # Predictor_Data contains only the variables you select above
Training_Response = Training$ClassName
head(Predictor_Data)
#9) Create and save the forest
r_tree = randomForest(data.frame(Predictor_Data), y=Training_Response, ntree = 1000, importance = TRUE)
#10) See the Out of Bag Confusion Matrix
r_tree
#11) Print the variable importance (Mean Decrease in Accuracy;
# for Gini Index type = 2;
# leaving type blank gives you the per-class importances (MDA))
imp = importance(r_tree, type = 1)
# imp = importance(r_tree, type = 2)
# imp = importance(r_tree)
imp
#12) Classify the independent validation data
Validation_Predictions = predict(r_tree,data.frame(Validation))
#13) Generate a confusion matrix from the independent validation data
Validation_Response = as.factor(Validation$ClassName)
confusionMatrix <- table(Validation_Predictions,Validation_Response)
confusionMatrix
#14) Calcualte overall accuracy, user's and producer's accuracy and kappa statistic
n_obs <- length(Validation_Response) # number of observation in validation set
n_classes <- length(levels(Validation$ClassName)) # number of classes
overallAccuracy <- sum(diag(confusionMatrix ))/n_obs
classAccuracy <- matrix(NA,nrow=2,ncol=n_classes,dimnames=list(c('users','producers'),levels(Validation$ClassName)))
for (c in 1:n_classes){
classAccuracy['users',c]     <- confusionMatrix[c,c]/sum(confusionMatrix[c, ])
classAccuracy['producers',c] <- confusionMatrix[c,c]/sum(confusionMatrix[ ,c])
}
rowColSumProdSum <- sum(apply(confusionMatrix,2,sum)*apply(confusionMatrix,1,sum))
kappa <- ( n_obs*sum(diag(confusionMatrix ))-rowColSumProdSum ) / ( n_obs^2-rowColSumProdSum )
classAccuracy
overallAccuracy
kappa
#15) Classify the whole raster
OutputRaster = 'rasters/output_classification.tif'
predictions = predict(inraster, r_tree, filename=OutputRaster,format="GTiff", progress="text", type="response")
predictions = predict(inraster, r_tree, filename=OutputRaster,format="GTiff", progress="text", type="response")
