00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 package de.picana.math;
00014
00015 import weka.core.*;
00016
00017
00024 public class Stats {
00025
00026
00032 public static double[] getMean(Instances set) {
00033
00034 double[] mean = new double[set.numAttributes()];
00035
00036 for (int att=0; att < set.numAttributes(); att++)
00037 mean[att] = set.meanOrMode(att);
00038
00039 return mean;
00040 }
00041
00047 public static double getEmpVar(Instances set) {
00048
00049 int num_attributes = set.numAttributes();
00050 double[] mean = getMean(set);
00051
00052 double emp_var = 0.0;
00053 double xi_x = 0.0;
00054
00055 for (int i=0; i < set.numInstances(); i++) {
00056 for (int att=0; att < num_attributes; att++) {
00057 xi_x = set.instance(i).value(att) - mean[att];
00058 emp_var += xi_x * xi_x;
00059 }
00060 }
00061
00062 return emp_var;
00063 }
00064
00070 public static double getSST(Instances set) {
00071 return getEmpVar(set) / set.numInstances();
00072 }
00073
00080 public static double getSSB(Instances set, Instances[] clusters) {
00081
00082 double[] mean = getMean(set);
00083 double ssb = 0.0;
00084 double yi_y = 0.0;
00085 double acc_freq = 0.0;
00086
00087 for (int cl=0; cl < clusters.length; cl++) {
00088 if (clusters[cl] != null) {
00089 acc_freq += clusters[cl].numInstances();
00090 double[] mean_cl = getMean(clusters[cl]);
00091 for (int att=0; att < mean_cl.length; att++) {
00092 yi_y = mean_cl[att] - mean[att];
00093 ssb += clusters[cl].numInstances() * (yi_y * yi_y);
00094 }
00095 }
00096 }
00097
00098 return ssb / acc_freq;
00099 }
00100
00106 public static double getSSW(Instances[] clusters) {
00107
00108 double ssw = 0.0;
00109 double yi_y = 0.0;
00110 double acc_freq = 0.0;
00111
00112 for (int cl=0; cl < clusters.length; cl++) {
00113 if (clusters[cl] != null) {
00114 acc_freq += clusters[cl].numInstances();
00115 double[] mean_cl = getMean(clusters[cl]);
00116 for (int i=0; i < clusters[cl].numInstances(); i++) {
00117 for (int att=0; att < mean_cl.length; att++) {
00118 yi_y = clusters[cl].instance(i).value(att) - mean_cl[att];
00119 ssw += yi_y * yi_y;
00120 }
00121 }
00122 }
00123 }
00124
00125 return ssw / acc_freq;
00126 }
00127 }