00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 package de.picana.math;
00014
00015 import weka.core.*;
00016
00017 import java.util.*;
00018
00019
00026 public class Distance {
00027
00037 public static double getMMD(Instances[] clusters, double[] min_dists, int[] min_cls) {
00038
00039 double avg_min_dist = 0.0;
00040
00041 for (int cl1=0; cl1 < clusters.length; cl1++) {
00042 if (clusters[cl1] != null) {
00043 double[] mean_cl1 = Stats.getMean(clusters[cl1]);
00044 double min_dist = Double.MAX_VALUE;
00045 int min_cl = -1;
00046
00047 for (int cl2=0; cl2 < clusters.length; cl2++) {
00048 if (clusters[cl2] != null) {
00049 double[] mean_cl2 = Stats.getMean(clusters[cl2]);
00050 double dist = euklidian(mean_cl1, mean_cl2);
00051 if ((cl1 != cl2) && (dist < min_dist)) {
00052 min_dist = dist;
00053 min_cl = cl2;
00054 }
00055 }
00056 }
00057 avg_min_dist += min_dist;
00058 min_dists[cl1] = min_dist;
00059 min_cls[cl1] = min_cl;
00060 }
00061 }
00062
00063 return avg_min_dist / clusters.length;
00064 }
00065
00066
00079 public static double getGMMD(Instances[] clusters, double a,
00080 double[] min_dists, int[] min_cls,
00081 double[] stats) {
00082
00083 double gmmd_di = 0.0;
00084 double gini = 0.0;
00085
00086 double[] ordered_min_dists = new double[clusters.length];
00087 int[] unordered_min_cls = new int[clusters.length];
00088 getMMD(clusters, ordered_min_dists, unordered_min_cls);
00089
00090 Arrays.sort(ordered_min_dists);
00091
00092 double weight_lower = 0.0;
00093 double q = (double)clusters.length;
00094 double D1 = (1-a) / ((q+1)/2 - 1);
00095 double D2 = (a-1) / (q - (q+1)/2);
00096
00097 for (int j=1; j <= q; j++) {
00098
00099 if ((double)j <= (q/2))
00100 weight_lower += D1 * j + a - D1;
00101 else
00102 weight_lower += D2 * j + 1 - ((q+1)/2) * D2;
00103 }
00104
00105 double[] weights = new double[(int)q];
00106
00107 for (int i=1; i <= q; i++) {
00108
00109 if ((double)i <= (q/2))
00110 weights[i-1] = (D1 * i + a - D1) / weight_lower;
00111 else
00112 weights[i-1] = (D2 * i + 1 - ((q+1)/2) * D2) / weight_lower;
00113 }
00114
00115 for (int i=1; i <= q; i++) {
00116 gmmd_di += weights[i-1] * ordered_min_dists[i-1];
00117 }
00118
00119 stats[0] = gmmd_di;
00120
00121 double sum_di = 0.0;
00122
00123 for (int i=1; i <= q; i++)
00124 sum_di += ordered_min_dists[i-1];
00125
00126 for (int i=1; i <= q; i++) {
00127 double temp1 = ((double)(i-1))/q + ((double)i)/q;
00128 double temp2 = ordered_min_dists[i-1] / sum_di;
00129 gini += temp1 * temp2;
00130 }
00131
00132 gini = gini - 1;
00133
00134 stats[1] = gini;
00135
00136 return gmmd_di / Math.sqrt(gini);
00137 }
00138
00145 public static double euklidian(double[] a1, double[] a2) {
00146 double dist = 0.0;
00147 for (int i=0; i < a1.length; i++)
00148 dist += (a1[i] - a2[i]) * (a1[i] - a2[i]);
00149 return Math.sqrt(dist);
00150 }
00151
00158 public static double euklidian(double[] a1, Instance inst) {
00159 double dist = 0.0;
00160 for (int i=0; i < a1.length; i++)
00161 dist += (a1[i] - inst.value(i)) * (a1[i] - inst.value(i));
00162 return Math.sqrt(dist);
00163 }
00164
00175 public static double weighted_euklidian(double[] a1, int freq1,
00176 double[] a2, int freq2,
00177 double rho, int size) {
00178
00179 double lambda = rho * ((double)freq1 * (double)freq2) /
00180 ((double)size * (double)size) + (1.0 - rho);
00181 return Math.pow(lambda,rho) * euklidian(a1, a2);
00182 }
00183
00193 public static double weighted_euklidian(double[] a1, int freq1,
00194 double[] a2, double rho, int size) {
00195
00196 double lambda = rho * ((double)freq1 / (double)size) + (1.0 - rho);
00197 return Math.pow(lambda,rho) * euklidian(a1, a2);
00198 }
00199 }