load("C:/Users/jdmunyon/Desktop/Dropbox/Senior_Project/data_and_code/results/50_50/MIXTURE.RData") library(caret) ### Only need to consider numbers one less than each number of classes, ### and also each number of classes (so can see the increase/decrease each increment): nums = c(nobs - 1, nobs) ### Sory by increasing: nums = nums[order(nums)] ### Obtain predictions based on if/else statement ### Also, if location is one of the six that CDA fails on, ### use SVMs no matter what so there is a chance of good predictions: predictions = lapply(nums, function(z){ a = vector(mode = "character", length = 3002) for(i in 1:3002){ if((nobs[which(ACTUAL[[i]] == names(nobs))] <= z) & (is.element(which(ACTUAL[[i]] == names(nobs)), c(5, 10, 11, 12, 13, 15))) == FALSE){ a[i] = as.character(CDA[i]) } else{a[i] = as.character(svms[i])} } return(a) }) ### Make predictions factors, not characters: new_nums = 1:(2*length(nobs)) predictions = lapply(new_nums, function(z){ return(as.factor(predictions[[z]])) }) ### SAME WORK-AROUND IDEA SEEN BEFORE TO ENSURE 16 FACTOR LEVELS: predictions = lapply(new_nums, function(z){ predictions[[z]] = as.character(predictions[[z]]) ### Save real predictions for first 16 proteins: first_16 = predictions[[z]][1:16] ### Save 16 fake predictions, one for each location that never gets predicted: fake_16 = levels(ACTUAL) ### Set predictions for first 16 proteins as the fake ones: predictions[[z]][1:16] = fake_16 ### Make predictions a factor - this will have the 16 needed levels: predictions[[z]] = as.factor(predictions[[z]]) ### Put correct predictions back in for the first 16 proteins ### Number of levels will stay at 16 like we need! ### (Probably exists a better work-around: let me know!): predictions[[z]][1:16] = first_16 return(predictions[[z]]) }) ### Save confusion matrices and stats: confusions = lapply(new_nums, function(z){ return(confusionMatrix(data = predictions[[z]], reference = ACTUAL)) }) ### Save classification accuracies: accuracies = lapply(new_nums, function(z){ return(confusions[[z]]$overall[1]) }) ### Make a vector, not a list: accuracies = unlist(accuracies) ### Give appropriate names: names(accuracies) = as.character(nums) ### Scatterplot of nums and accuracies: plot(nums, accuracies) ### Get indices of best (should be two of them): indices = which(accuracies == max(accuracies)) ### Best values and classification accuracies: best = accuracies[indices] ### Take average of the two: average = (as.numeric(names(best)[1]) + as.numeric(names(best)[2]))/2 ### Save best confusion matrices and stats (are both the same): best_confusions = list(confusions[[indices[1]]], confusions[[indices[2]]]) ### Save workspace to machine: save.image("C:/Users/jdmunyon/Desktop/Dropbox/Senior_Project/data_and_code/results/50_50/MIXTURE.RData") ### Save results to machine: write.csv(as.data.frame.matrix(best_confusions[[1]]$table), file = "C:/Users/jdmunyon/Desktop/Dropbox/Senior_Project/data_and_code/results/50_50/mixture_confusion.csv") write.csv(as.data.frame.matrix(round((best_confusions[[1]]$byClass), digits = 2)), file = "C:/Users/jdmunyon/Desktop/Dropbox/Senior_Project/data_and_code/results/50_50/mixture_stats_round.csv") ### Set c as a confusion matrix, previously calculated and stored ### (this confusion matrix has rows as predictions and columns as actuals): c = as.matrix(as.data.frame.matrix(best_confusions[[1]]$table)) ### Initialize length 16 MCC vector: MCC = vector(mode = "numeric", length = 16) ### For each location: for(k in 1:16){ A = as.double(c[k,k]) ### A = #(true positives) B = as.double(sum(c[k,]) - c[k,k]) ### B = #(false positives) C = as.double(sum(c[,k]) - c[k,k]) ### C = #(false negatives) D = as.double(sum(c) - A - B - C) ### D = #(true negatives) if((((A + B)*(A + C)*(D + B)*(D + C))^.5) == 0){ ### if denominator is zero, arbitrarily set as one - MCC will come out to be zero MCC[k] = (A*D) - (B*C) } else{MCC[k] = ((A*D) - (B*C))/(((A + B)*(A + C)*(D + B)*(D + C))^.5)} ### MCC value by formula rm(A,B,C,D) } ### Rounding, setting as column vector: MCC = round(matrix(MCC, nrow = 16, ncol = 1), digits = 2) ### Set appropriate column name: colnames(MCC) = "MCC" ### Save results to machine (to be appended to pre-existing statistics by class table): write.table(MCC, file = "C:/Users/jdmunyon/Desktop/Dropbox/Senior_Project/data_and_code/results/50_50/mixture_MCCs.csv", sep = ",", row.names = FALSE, col.names = TRUE) rm(c, MCC)