
import RF_functions_20190712 as rff
import os
import sys
import json
import re
import numpy as np
import pandas as pd
import time
from collections import OrderedDict
from datetime import datetime

####################################
## this code chunk just deals with how things are printed to the screen.  Not essential though.
# pandas options: https://pandas.pydata.org/pandas-docs/stable/user_guide/options.html#available-options
# pd.get_option('display.width')
pd.set_option('display.width',200)
pd.set_option('display.max_columns',20)
####################################     

print("\nScript started at:  "+datetime.now().ctime()+"\n")

### SET WORKING DIR
os.chdir(r'Y:\IGARSS\RF_Var_Reduction_Alfred\PV_TP')
### View the current working dir
os.getcwd()

# get a copy of the documentation dictionary
doc = rff.documentation()

# get settings dictionary, with all settings as default
# - settings are stored in two sub-dictionaries:
#     'data' (data settings)   and   'rf' (random forest model settings)
sett= rff.prep_sett()

# view the current state of the settings
rff.print_sett(sett)

# documentation for individual settings can be accessed by passing the setting name
# as a key to the documentation dictionary.  For example:
print doc['load_from_json']
print doc['inRefCSV']
print doc['FN_classnum']
print doc['n_estimators']

# a text file representing all of the documentation can be generated with:
doc = rff.documentation(outTextFile='Output_py/RandomForestFunctions_documentation.txt')

# adjust settings as desired
sett['data']['inRefCSV']   = r"Input\Training_Validation_Unbalanced.csv"
sett['data']['FN_pointID'] = 'FID'
sett['data']['FN_classnum']= 'ClassID'
sett['data']['FN_classlab']= 'Class'
sett['data']['FN_xy']      = ( 'POINT_X','POINT_Y')
sett['data']['inRastPath'] = r"Input\Alfred_2013_2014.pix"
sett['data']['consol_outCSV_dir'] = 'Output_py'
sett['data']['consol_outCSV_basename'] = '_consol_'
sett['data']['impute_strategy'] = None
#
sett['rf']['n_estimators'] = 1000
sett['rf']['oob_score']    = True

# view the current state of the settings after adjustments
rff.print_sett(sett)


# optionally, save a copy of the current state of the settings
sett['data']['save_to_json'] = 'Output_py/_myDataSettings.json'
sett['rf']['save_to_json']   = 'Output_py/_myRandomForestSettings.json'
rff.save_sett(sett) # save

# read and prep data
# print doc['a_initialDataPrep']
refdat,rast,chaninfo = rff.a_initialDataPrep(sett)


# select 15 variables using point biserial correlation
# print doc['pointBiserialCorr']
pbc = rff.pointBiserialCorr(refdat,n=15,saveToCSV=True,outCSV_dir='Output_py')


refdat_REDUC = rff.subsetVariables(refdat,channels=pbc['top_vars']['channel_numbers'])


# impute data
# print doc['b_imputeDataAndSplitIndValidSet']
refdat,impdat = rff.b_imputeDataAndSplitIndValidSet(refdat,sett,verbose=True)

refdat_REDUC, impdat_REDUC = rff.b_imputeDataAndSplitIndValidSet(refdat_REDUC,sett,verbose=False)

# iterations
rf_dict    = {}
valid_dict = {}
rf_dict_REDUC    = {}
valid_dict_REDUC = {}
for i in range(1,11):
    print(str(i))
    # save a copy of reference data and imputed reference data on iteration 1
    sett['data']['saveDataCSV'] = 'Output_py/refdat_iter1.csv' if i==1 else None
    sett['data']['saveImputedDataCSV'] =  'Output_py/impdat_iter1.csv' if i==1 else None
    # prep random forest
    # print doc['c_prepRandomForest']
    refdat_predict,impdat_predict,rf_dict[i] = rff.c_prepRandomForest(refdat,impdat,sett)

    sett['data']['saveDataCSV'] = 'Output_py/refdat_REDUC_iter1.csv' if i==1 else None
    sett['data']['saveImputedDataCSV'] =  'Output_py/impdat_REDUC_iter1.csv' if i==1 else None
    refdat_predict_REDUC,impdat_predict_REDUC,rf_dict_REDUC[i] = rff.c_prepRandomForest(refdat_REDUC,impdat_REDUC,sett)
    # independent validation
    # print doc['d_independentValidation']
    valid_dict[i] = rff.d_independentValidation(refdat_predict,sett)
    valid_dict_REDUC[i] = rff.d_independentValidation(refdat_predict_REDUC,sett)
# consolidate results across iterations
# print doc['e_consolidateArossIterations']
consol = rff.e_consolidateArossIterations(refdat_predict,sett,valid=valid_dict,rf=rf_dict)
sett['data']['consol_outCSV_basename'] = '_consol_REDUC_'
consol_REDUC = rff.e_consolidateArossIterations(refdat_predict_REDUC,sett,valid=valid_dict_REDUC,rf=rf_dict_REDUC)

# view consolidated output
consol.keys() # the keys of the consol dictionary
consol['accStats']

consol_REDUC.keys() # the keys of the consol dictionary
consol_REDUC['accStats']


print("\nScript completed at:  "+datetime.now().ctime()+"\n")

