Hauptseite   Packages   Klassenhierarchie   ?bersicht   Auflistung der Dateien   Datenstruktur-Elemente  

Stats.java

gehe zur Dokumentation dieser Datei
00001 /*
00002  * $Source: /shared/cvsroot/diplom/app/src/java/de/picana/math/Stats.java,v $
00003  * $Author: mstolpe $
00004  * $Date: 2003/04/22 09:51:29 $
00005  * $Revision: 1.3 $
00006  * $Release$
00007  *
00008  * Created on 21. November, 19:28
00009  *
00010  * Copyright 2002 by Marco Stolpe
00011  */
00012 
00013 package de.picana.math;
00014 
00015 import weka.core.*;
00016 
00017 
00024 public class Stats {
00025 
00026     
00032     public static double[] getMean(Instances set) {
00033         
00034         double[] mean = new double[set.numAttributes()];
00035 
00036         for (int att=0; att < set.numAttributes(); att++)
00037             mean[att] = set.meanOrMode(att);
00038         
00039         return mean;
00040     }
00041     
00047     public static double getEmpVar(Instances set) {
00048 
00049         int num_attributes = set.numAttributes();
00050         double[] mean = getMean(set);
00051         
00052         double emp_var = 0.0;
00053         double xi_x = 0.0;
00054         
00055         for (int i=0; i < set.numInstances(); i++) {
00056             for (int att=0; att < num_attributes; att++) {
00057                 xi_x = set.instance(i).value(att) - mean[att];
00058                 emp_var += xi_x * xi_x;
00059             }
00060         }
00061         
00062         return emp_var;
00063     }
00064     
00070     public static double getSST(Instances set) {
00071         return getEmpVar(set) / set.numInstances();    
00072     }
00073     
00080     public static double getSSB(Instances set, Instances[] clusters) {
00081         
00082         double[] mean = getMean(set);
00083         double ssb = 0.0;
00084         double yi_y = 0.0;
00085         double acc_freq = 0.0;
00086         
00087         for (int cl=0; cl < clusters.length; cl++) {
00088             if (clusters[cl] != null) {
00089                 acc_freq += clusters[cl].numInstances();
00090                 double[] mean_cl = getMean(clusters[cl]);
00091                 for (int att=0; att < mean_cl.length; att++) {
00092                     yi_y = mean_cl[att] - mean[att];
00093                     ssb += clusters[cl].numInstances() * (yi_y * yi_y);
00094                 }
00095             }
00096         }
00097     
00098         return ssb / acc_freq;
00099     }
00100     
00106     public static double getSSW(Instances[] clusters) {
00107         
00108         double ssw = 0.0;
00109         double yi_y = 0.0;
00110         double acc_freq = 0.0;
00111         
00112         for (int cl=0; cl < clusters.length; cl++) {
00113             if (clusters[cl] != null) {
00114                 acc_freq += clusters[cl].numInstances();
00115                 double[] mean_cl = getMean(clusters[cl]);
00116                 for (int i=0; i < clusters[cl].numInstances(); i++) {
00117                     for (int att=0; att < mean_cl.length; att++) {
00118                         yi_y = clusters[cl].instance(i).value(att) - mean_cl[att];
00119                         ssw += yi_y * yi_y;
00120                     }
00121                 }
00122             }
00123         }
00124     
00125         return ssw / acc_freq;
00126     }
00127 }

Erzeugt am Tue Apr 22 11:22:56 2003 f?r Picana von doxygen1.2.18