Hauptseite   Packages   Klassenhierarchie   ?bersicht   Auflistung der Dateien   Datenstruktur-Elemente  

Distance.java

gehe zur Dokumentation dieser Datei
00001 /*
00002  * $Source: /shared/cvsroot/diplom/app/src/java/de/picana/math/Distance.java,v $
00003  * $Author: mstolpe $
00004  * $Date: 2003/04/22 09:51:29 $
00005  * $Revision: 1.7 $
00006  * $Release$
00007  *
00008  * Created on 18. November, 23:47
00009  *
00010  * Copyright 2002 by Marco Stolpe
00011  */
00012 
00013 package de.picana.math;
00014 
00015 import weka.core.*;
00016 
00017 import java.util.*;
00018 
00019 
00026 public class Distance {
00027 
00037     public static double getMMD(Instances[] clusters, double[] min_dists, int[] min_cls) {
00038         
00039         double avg_min_dist = 0.0;
00040         
00041         for (int cl1=0; cl1 < clusters.length; cl1++) {
00042             if (clusters[cl1] != null) {
00043                 double[] mean_cl1 = Stats.getMean(clusters[cl1]);
00044                 double min_dist = Double.MAX_VALUE;
00045                 int min_cl = -1;
00046                 
00047                 for (int cl2=0; cl2 < clusters.length; cl2++) {
00048                     if (clusters[cl2] != null) {
00049                         double[] mean_cl2 = Stats.getMean(clusters[cl2]);
00050                         double dist = euklidian(mean_cl1, mean_cl2);
00051                         if ((cl1 != cl2) && (dist < min_dist)) {
00052                             min_dist = dist;
00053                             min_cl = cl2;
00054                         }
00055                     }
00056                 }
00057                 avg_min_dist += min_dist;
00058                 min_dists[cl1] = min_dist;
00059                 min_cls[cl1] = min_cl;
00060             }
00061         }
00062         
00063         return avg_min_dist / clusters.length;
00064     }
00065     
00066     
00079     public static double getGMMD(Instances[] clusters, double a,
00080                                  double[] min_dists, int[] min_cls,
00081                                  double[] stats) {
00082         
00083         double gmmd_di = 0.0;
00084         double gini = 0.0;
00085         
00086         double[] ordered_min_dists = new double[clusters.length];
00087         int[] unordered_min_cls = new int[clusters.length];
00088         getMMD(clusters, ordered_min_dists, unordered_min_cls);
00089         
00090         Arrays.sort(ordered_min_dists);
00091         
00092         double weight_lower = 0.0;
00093         double q = (double)clusters.length;
00094         double D1 = (1-a) / ((q+1)/2 - 1);
00095         double D2 = (a-1) / (q - (q+1)/2);
00096         
00097         for (int j=1; j <= q; j++) {
00098                     
00099             if ((double)j <= (q/2)) 
00100                 weight_lower += D1 * j + a - D1;
00101             else
00102                 weight_lower += D2 * j + 1 - ((q+1)/2) * D2;
00103         }
00104         
00105         double[] weights = new double[(int)q];
00106         
00107         for (int i=1; i <= q; i++) {
00108             
00109             if ((double)i <= (q/2))
00110                 weights[i-1] = (D1 * i + a - D1) / weight_lower;
00111             else
00112                 weights[i-1] = (D2 * i + 1 - ((q+1)/2) * D2) / weight_lower;
00113         }
00114         
00115         for (int i=1; i <= q; i++) {
00116             gmmd_di += weights[i-1] * ordered_min_dists[i-1];    
00117         }
00118         
00119         stats[0] = gmmd_di;
00120         
00121         double sum_di = 0.0;
00122         
00123         for (int i=1; i <= q; i++)
00124             sum_di += ordered_min_dists[i-1];
00125         
00126         for (int i=1; i <= q; i++) {
00127             double temp1 = ((double)(i-1))/q + ((double)i)/q;
00128             double temp2 = ordered_min_dists[i-1] / sum_di;
00129             gini += temp1 * temp2;
00130         }
00131         
00132         gini = gini - 1;
00133         
00134         stats[1] = gini;
00135         
00136         return gmmd_di / Math.sqrt(gini);
00137     }
00138     
00145     public static double euklidian(double[] a1, double[] a2) {
00146         double dist = 0.0;
00147         for (int i=0; i < a1.length; i++)
00148             dist += (a1[i] - a2[i]) * (a1[i] - a2[i]);
00149         return Math.sqrt(dist);
00150     }
00151     
00158     public static double euklidian(double[] a1, Instance inst) {
00159         double dist = 0.0;
00160         for (int i=0; i < a1.length; i++)
00161             dist += (a1[i] - inst.value(i)) * (a1[i] - inst.value(i));
00162         return Math.sqrt(dist);
00163     }
00164     
00175     public static double weighted_euklidian(double[] a1, int freq1,
00176                                             double[] a2, int freq2,
00177                                             double rho, int size) {
00178                                                 
00179         double lambda = rho * ((double)freq1 * (double)freq2) /
00180             ((double)size * (double)size) + (1.0 - rho);
00181         return Math.pow(lambda,rho) * euklidian(a1, a2);
00182     }   
00183     
00193     public static double weighted_euklidian(double[] a1, int freq1,
00194                                             double[] a2, double rho, int size) {
00195                                                 
00196         double lambda = rho * ((double)freq1 / (double)size) + (1.0 - rho);
00197         return Math.pow(lambda,rho) * euklidian(a1, a2);
00198     }   
00199 }

Erzeugt am Tue Apr 22 11:22:55 2003 f?r Picana von doxygen1.2.18