library(seqinr) library(protr) library(party) # never actually used library(BioSeqClass) library(randomForest) library(lme4) library(caret) library(adabag) library(tree) library(rpart) library(DMwR) library(e1071) ### "Refresh" seed - not really necessary: set.seed(0) ### Read in ready-to-go data: DATA = read.csv("C:/Users/jdmunyon/Desktop/Dropbox/Senior_Project/data_and_code/50_50.csv", header = TRUE, row.names = 1) ### Set a seed for reproducability (since random sampling is coming up within the cross-validation): set.seed(69) ### Determine optimal SVM parameters using ten-fold cross-validation tune = tune.svm(x = DATA[,-1], y = DATA[,1], gamma = 2^(seq(from = -5, to = 15, by = 2)), cost = 2^(seq(from = -15, to = 3, by = 2)), tunecontrol = tune.control(sampling = "cross", cross = 10)) ### Save the optimal parameters: parameters = unlist(tune$best.parameters) ### Save a list of numbers, from 1 to the number of proteins in the dataset: nums = 1:nrow(DATA) ### Create (3002) SVM models, each one using all but one left-out protein ### Then predict the left-out protein's location ### (this is jackknife validation): svms = lapply(nums, function(z){ a = svm(x = DATA[-z,-1], y = DATA[-z,1], cost = parameters["cost"], gamma = parameters["gamma"]) b = predict(object = a, newdata = DATA[z,-1]) return(prediction = b) }) ### Unlist svms (turn R object from a list to a vector): svms = unlist(svms) ### Save actual locations of full dataset: ACTUAL = DATA[,1] ### Calculate error rate of classification: error_svms = 1 - sum(svms == ACTUAL)/nrow(DATA) ### Create confusion matrix and performance statistics for each location: confusion_svms = confusionMatrix(data = svms, reference = ACTUAL) ### Save SVM results to machine write.csv(as.data.frame.matrix(confusion_svms$table), file = "C:/Users/jdmunyon/Desktop/Dropbox/Senior_Project/data_and_code/results/50_50/svms_confusion.csv") write.csv(as.data.frame.matrix(confusion_svms$byClass), file = "C:/Users/jdmunyon/Desktop/Dropbox/Senior_Project/data_and_code/results/50_50/svms_stats.csv") write.table(error_svms, file = "C:/Users/jdmunyon/Desktop/Dropbox/Senior_Project/data_and_code/results/50_50/svms_stats.csv", append = TRUE) ### Save R workspace save.image("C:/Users/jdmunyon/Desktop/Dropbox/Senior_Project/data_and_code/results/50_50/SVM_results_from_lab.RData")