Hauptseite   Packages   Klassenhierarchie   ?bersicht   Auflistung der Dateien   Datenstruktur-Elemente  

AML.java

gehe zur Dokumentation dieser Datei
00001 /*
00002  * $Source: /shared/cvsroot/diplom/app/src/java/de/picana/clusterer/AML.java,v $
00003  * $Author: mstolpe $
00004  * $Date: 2003/04/22 09:51:27 $
00005  * $Revision: 1.3 $
00006  * $Release$ 
00007  *
00008  * Created on 5. M?rz 2003, 21:50
00009  *
00010  * Copyright 2002 by Marco Stolpe
00011  */
00012 
00013 package de.picana.clusterer;
00014 
00015 import de.picana.control.*;
00016 import de.picana.logging.*;
00017 import de.picana.math.*;
00018 
00019 import java.io.*;
00020 import java.util.*;
00021 
00022 import weka.core.*;
00023 
00024 
00031 public class AML extends GenericML {
00032     
00033     private double rho;
00034     private boolean pruning = true;
00035  
00036 
00038     public AML() {
00039     }
00040     
00041     
00042     public void init(ParameterSet params, Logger logger) {
00043         
00044         super.init(params, logger);
00045         
00046         rho = 0.0;
00047         try {
00048             rho = Double.parseDouble((String)params.getParameter("rho"));
00049         } catch (NumberFormatException nfe) {}   
00050     }
00051 
00052     
00053     protected void buildFirst() {
00054         
00055         statwriter.println("algo_clusterer_name:AML");
00056         statwriter.println("algo_clusterer_rho:" + rho);
00057                 
00058         int i, j;
00059         MLVector vec, vec_i, vec_j;
00060         HashMap max_index = new HashMap();
00061         
00062         double max_dist = 0.0;
00063         double dist = 0.0;
00064         
00065         for (i=0; i < freq_table.size(); i++) {
00066             
00067             vec_i = (MLVector)freq_table.get(i);
00068                
00069             for (j=0; j < i; j++) {
00070         
00071                 vec_j = (MLVector)freq_table.get(j);
00072         
00073                 dist = Distance.weighted_euklidian(
00074                        vec_i.value, vec_i.freq, vec_j.value, vec_j.freq,
00075                        rho, training_set.numInstances());
00076                 
00077                 //dist = Distance.euklidian(vec_i.value, vec_j.value);
00078                 
00079                 if (dist > max_dist) {
00080                     max_index.clear();
00081                     max_dist = dist;
00082             
00083                     max_index.put(new IntegerPair(i, j), new Integer(1));
00084                     logger.info(LOGSRC, "Found new maximum distance " + max_dist);
00085                    
00086                 } else if (dist == max_dist) {
00087                     max_index.put(new IntegerPair(i, j), new Integer(1));
00088                 }
00089             }
00090         }
00091         
00092         Iterator keys = max_index.keySet().iterator();
00093         while (keys.hasNext()) {
00094             IntegerPair pair = (IntegerPair)keys.next();
00095             MLVector vec_a = (MLVector)freq_table.get(pair.a);
00096             MLVector vec_b = (MLVector)freq_table.get(pair.b);
00097             logger.debug(LOGSRC, "(" + pair.a + ") " + vec_a.toString() + " - " +
00098                          "(" + pair.b + ") " + vec_b.toString() + " = " + max_dist); 
00099         }
00100 
00101         IntegerPair pair = (IntegerPair)getRandomElement(max_index.keySet());
00102         MLVector vec_a = (MLVector)freq_table.get(pair.a);
00103         MLVector vec_b = (MLVector)freq_table.get(pair.b);
00104         centroids.add(vec_a);
00105         logger.info(LOGSRC, "centroid[0] = " + vec_a.toString());
00106         centroids.add(vec_b);
00107         logger.info(LOGSRC, "centroid[1] = " + vec_b.toString());
00108     }
00109     
00110 
00111     protected void buildRest() {
00112         
00113         int i, j, k;
00114         MLVector vec;
00115         MLVector vec_a;
00116         MLVector vec_b;
00117         List max_index = new ArrayList();
00118         
00119         double max_dist;
00120         double min_dist;
00121         double act_dist;
00122                 
00123         for (i=0; i < num_clusters-2; i++) {
00124         
00125             max_dist = 0.0;
00126             max_index.clear();
00127             
00128             for (j=0; j < freq_table.size(); j++) {
00129             
00130                 vec_a = (MLVector)freq_table.get(j);
00131                
00132                 min_dist = Double.MAX_VALUE;
00133                 
00134                 for (k=0; k < centroids.size(); k++) {
00135                     
00136                     vec_b = (MLVector)centroids.get(k);
00137                     
00138                     act_dist = Distance.weighted_euklidian(
00139                        vec_a.value, vec_a.freq, vec_b.value,
00140                        rho, training_set.numInstances());
00141                     
00142                     //act_dist = Distance.euklidian(vec_a.value, vec_b.value);
00143                     
00144                     if (act_dist < min_dist)
00145                         min_dist = act_dist;
00146                 }
00147                 
00148                 if (min_dist > max_dist) {
00149                     
00150                     max_index.clear();
00151                     max_index.add(new Integer(j));
00152                     max_dist = min_dist;
00153                     
00154                 } else if (min_dist == max_dist) {
00155 
00156                     max_index.add(new Integer(j));
00157                 }                
00158             }
00159             
00160             Integer index = (Integer)getRandomElement(max_index);
00161             vec = (MLVector)freq_table.get(index.intValue());
00162             centroids.add(vec);
00163             logger.info(LOGSRC, "centroid[" + (i+2) + "] = " + vec.toString());
00164         }
00165     }
00166 }

Erzeugt am Tue Apr 22 11:22:55 2003 f?r Picana von doxygen1.2.18