# this script assumes that the data is space separated, and that # all fields except the last are numeric BEGIN {records = 0; bins =10;} { records++; if(records==1) {numfields = NF;} for(i=1;i<=NF;i++) {data[records,i] = $i;} } END { train = int(0.8*records); print train; ##### initialise max and mins for(i=1;imax[i]) {max[i] = data[r,i];} } } #### calculate ranges for(i=1;i= mini) && (x = mini) && (x prob[bestclass]){ bestclass=c; } } if(data[r,numfields]==classes[bestclass]){ correct++; } ##### CHANGE #find the position of the guessed class in classes position = 0; target = data[r,numfields]; for(i=1;i<=nclasses;i++){ if(classes[i] == data[r,numfields]){ position = i; break; } } # CHANGE #use position along with the index of the best class to increment a portion of the confusion matrix. confused[position,bestclass]++; } printf("overall accuracy on test set is %g per cent\n", 100*correct/(records- train)); ####### CHANGE -- Display the values of the class as well, makes the matrix harder to read ####### but also makes it easier to understand the results in the long term. printf("\nconfusion matrix\n predicted class\n"); for(c=1;c<=nclasses;c++) { printf("actual class %d (value=%s): ",c,classes[c]); for(d=1;d<=nclasses;d++) {printf(" %d ", confused[c,d]);} printf("\n"); } }