1dinvga.cpp

Go to the documentation of this file.
00001 /*!
00002  * \file
00003  * The program 1dinvga is the core joint inversion program for MT, receiver functions
00004  * and surface wave data. It reads in its settings from the configuration file <CODE> 1dinvga.conf </CODE>, described
00005  * below, and produces several log files and the final data, model(s) and plot file(s) for each
00006  * inverted dataset.
00007  *
00008  <H2> General Configuration in 1dinvga.conf </H2>
00009  The options in <CODE> 1dinvga.conf </CODE> are as follows:
00010  <H3> General program options </H3>
00011  <TABLE>
00012  <TR> <TD> <B>Name</B> </TD> <TD><B> Default</B></TD> <TD><B>Description</B></TD>
00013  <TR> <TD>verbose</TD> <TD> false </TD> <TD> Print detailed information about all models used in the invbersion, only useful for debugging</TD></TR>
00014  <TR> <TD>outputbase</TD> <TD> </TD> <TD> The start of the name of all logfiles. This can also be specified on the command lin.e</TD> </TR>
00015  <TR> <TD>threads</TD><TD> 1 </TD> <TD>If the program has been compiled with openmp support, this specifies the number of simultaneous threads. </TD></TR>
00016  </TABLE>
00017 
00018  <H3> Genetic algorithm options </H3>
00019  <TABLE>
00020  <TR> <TD> <B>Name</B> </TD> <TD><B> Default</B></TD> <TD><B>Description</B></TD>
00021  <TR> <TD>gatype</TD> <TD> </TD> <TD> Can be either <CODE>anneal</CODE> or <CODE> pareto</CODE>. If set to <CODE>anneal</CODE> we use an annealing type GA that minimizes
00022  a weighted sum of the objective functions with the weights specified below. For this type of GA we also have to specify inittemp and coolingratio.
00023  If set to <CODE> pareto</CODE>, we use NSGA-II as described by Deb et al. (2002). In this case inittemp and coolingratio are ignored. </TD></TR>
00024  <TR> <TD>inittemp</TD><TD> </TD> <TD>(For GA type <CODE>anneal</CODE>)The initial temperature in the cooling schedule. High values mean that there will be no big differences in
00025  probability between well fitting models and bad fitting models. Low values make those differences much more severe. If set to negative values the temperature
00026  will be determined from the average misfit values in the first iteration.   </TD> </TR>
00027  <TR> <TD>annealinggeneration</TD><TD> </TD><TD>(For GA type <CODE>anneal</CODE>) The generation number when the initial tempature is divided by coolingratio to enforce stronger
00028  discrimination between good and bad fitting models.</TD></TR>
00029  <TR> <TD>coolingratio</TD><<TD> </TD><TD>(For GA type <CODE>anneal</CODE>) The value by which inittemp is divided at generation <CODE>annealinggeneration</CODE>
00030  Higher values mean stronger discrimination.</TD>/TR>
00031  <TR> <TD>popsize</TD><TD> </TD><TD>The population size for the genetic algorithm. Typical values 50...1000 </TD></TR>
00032  <TR> <TD>generations</TD><<TD> </TD><TD>The number of generations for the GA to run.Typical values 50...500.  </TD>/TR>
00033  <TR> <TD>mutationprob</TD><TD> </TD><TD>The probability for a mutation within one member, typically 0.1...0.2. The probability for mutation of
00034  individual bits is calculated from this number depending on the length of the encoding specified below. </TD></TR>
00035  <TR> <TD>crossoverprob</TD><TD> </TD><TD>The probability for two model to perform crossover. Typically 0.4...0.7. </TD>/TR>
00036  <TR> <TD>elitist</TD><TD> </TD><TD>Do we want to keep the best model(s)  from the last generation (true), or fully depend
00037  on crossover and mutation with the risk of loosing good models (false).  </TD></TR>
00038  </TABLE>
00039  <H3> Data information</H3>
00040 
00041  <TABLE>
00042  <TR> <TD> <B>Name</B> </TD> <TD><B> Default</B></TD> <TD><B>Description</B></TD>
00043  <TR> <TD>recinfofile</TD><TD> </TD><TD>The file containing information about the receiver function data. This file is decribed below. </TD></TR>
00044  <TR> <TD>mtinputdata</TD><TD> </TD><TD>The name of the file containing the MT impedances </TD></TR>
00045  <TR> <TD>dispdata</TD><TD> </TD><TD>The name of the file containing the Rayleigh wave dispersion data</TD></TR>
00046  </TABLE>
00047 
00048  <H3> Error floor information </H3>
00049  <TABLE>
00050  <TR> <TD> <B>Name</B> </TD> <TD><B> Default</B></TD> <TD><B>Description</B></TD>
00051  <TR> <TD>tensorerror</TD> <TD> 0.02</TD> <TD>Error floor for MT impedances, if selected below </TD></TR>
00052  <TR> <TD>reserror  </TD> <TD>0.02 </TD> <TD>Error floor for MT apparent resistivities, if selected below </TD></TR>
00053  <TR> <TD>phaseerror </TD> <TD>0.02 </TD><TD>Error floor for MT phase, if selected below </TD></TR>
00054  <TR> <TD>surferror </TD><TD>0.02 </TD><TD>Error floor for Rayleigh wave dispersion data </TD></TR>
00055  </TABLE>
00056 
00057  <H3> Global receiver function parameters </H3>
00058  <TABLE>
00059  <TR> <TD> <B>Name</B> </TD> <TD><B> Default</B></TD> <TD><B>Description</B></TD>
00060  <TR> <TD>poisson</TD><TD> </TD><TD> Poissons ratio, we calculate Vp from Vs using this ratio</TD></TR>
00061  <TR> <TD>normrec</TD><TD> true </TD><TD> Are we inverting receiver functions that have been normalized to a maximum amplitude of 1 ?</TD></TR>
00062  <TR> <TD>starttime</TD><TD> </TD><TD>Start of the fit window in seconds. Any data in the receiver function before this time is ignored. </TD></TR>
00063  <TR> <TD>endtime</TD><TD> </TD><TD>End of the fit window in seconds. Any data in the receiver function after this time is ignored. </TD></TR>
00064  <TR> <TD>recmethod</TD><TD> </TD><TD>Method used to calculate the measured data. Can be <CODE>specdiv</CODE> for spectral division,
00065  or <CODE>iterdecon</CODE> for iterative deconvolution. </TD></TR>
00066  </TABLE>
00067 
00068  <H3> MT data parameters </H3>
00069  <TABLE>
00070  <TR> <TD> <B>Name</B> </TD> <TD><B> Default</B></TD> <TD><B>Description</B></TD>
00071  <TR> <TD>mode</TD><TD> </TD><TD>Which component of the tensor to fit. Can be <CODE>xy</CODE> or <CODE>yx</CODE>. </TD></TR>
00072  <TR> <TD>mtfit</TD><TD> </TD><TD>Which aspect of the MT data to fit. Possible values are shown in the table below.  </TD></TR>
00073  </TABLE>
00074 
00075  <H3> Possible mtfit values </H3>
00076  <TABLE>
00077  <TR> <TD> <B>Name</B> </TD>  <TD><B>Description</B></TD> </TR>
00078  <TR> <TD> appres </TD>  <TD>Fit only apparent resistivity of selected mode</TD> </TR>
00079  <TR> <TD> phase </TD>  <TD>Fit only phase of selected mode</TD> </TR>
00080  <TR> <TD> resphase </TD>  <TD>Fit apparent resistivity  and phase of selected mode</TD> </TR>
00081  <TR> <TD> berd </TD>  <TD>Fit Berdichevskiy invariant. This is independent of selected mode</TD> </TR>
00082  <TR> <TD> det</TD>  <TD>Fit determinant of impedance tensor. This is independent of selected mode.</TD> </TR>
00083  </TABLE>
00084 
00085  <H3> Regularization and fit information </H3>
00086  <TABLE>
00087  <TR> <TD> <B>Name</B> </TD> <TD><B> Default</B></TD> <TD><B>Description</B></TD>
00088  <TR> <TD>usevrefmodel </TD><TD> false </TD><TD>Do we want to regularize the seismic parameters by the difference to a reference model (true/false) ? </TD></TR>
00089  <TR> <TD>vrefmodel</TD><TD> </TD><TD>If true above, the name of the reference model file. </TD></TR>
00090  <TR> <TD>mtfitexponent </TD><TD> 2</TD><TD>The exponent for the difference  between measured and calculated data. Higher exponents penalize
00091  any departure from the measured data. This ensures a tighter fit, but also increases the influence of noise.</TD></TR>
00092  <TR> <TD>recfitexponent </TD><TD>2 </TD><TD>Same for receiver function data. </TD></TR>
00093  <TR> <TD>surffitexponent </TD><TD> 2</TD><TD>Same for dispersion data. </TD></TR>
00094  </TABLE>
00095 
00096 
00097  <H3> Parameter encoding </H3>
00098  The number of layers is determined by the number of entries, it has to be equal for all
00099  encoding parameters.
00100  <TABLE>
00101  <TR> <TD> <B>Name</B> </TD> <TD><B> Default</B></TD> <TD><B>Description</B></TD>
00102  <TR> <TD>thickbase</TD><TD> </TD><TD>Minimum thickness for each layer in km </TD></TR>
00103  <TR> <TD>thickstep</TD><TD> </TD><TD>Stepsize for each layer in km </TD></TR>
00104  <TR> <TD>thicksizes</TD><TD> </TD><TD>Number of bits used to encode the thickness of each layer </TD></TR>
00105  <TR> <TD>resbase</TD><TD> </TD><TD>Base 10 logarithm of the minimum resistivity, i.e. 0 corresponds to 1 Ohm.m </TD></TR>
00106  <TR> <TD>resstep</TD><TD> </TD><TD>Stepsize for base 10 logarithm of resistivity  </TD></TR>
00107  <TR> <TD>ressizes</TD><TD> </TD><TD>Number of bits used to encode resistivity </TD></TR>
00108  <TR> <TD>svelbase</TD><TD> </TD><TD>Minimum S-Wave velocity for each layer in km/s </TD></TR>
00109  <TR> <TD>svelstep</TD><TD> </TD><TD>Stepsize for S_Wave velocity for each layer </TD></TR>
00110  <TR> <TD>svelsizes</TD><TD> </TD><TD>Number of bits used to encode Vs </TD></TR>
00111  </TABLE>
00112 
00113  <H3> Weighting of datasets </H3>
00114  <TABLE>
00115  <TR> <TD> <B>Name</B> </TD> <TD><B> Default</B></TD> <TD><B>Description</B></TD>
00116  <TR> <TD>weights</TD> <TD> </TD>  <TD>These are the weights for the different objective functions. The current version needs exactly 5 weights.
00117  They correspond to MT misfit, Receiver function misfit, Surface Wave Misfit, Seismic Regularization and MT regularization, respectively.
00118  If a weight is set to zero the calculation of the misfit is completely disabled and the inversion thus runs faster. For pareto type inversion
00119  the value of weight is irrelevant as long as it is different from zero. For annealing it determines the influence of the corresponding dataset. </TD></TR>
00120  </TABLE>
00121 
00122  <H2> Recinfo file </H2>
00123  The recinfo file contains information about the different receiver functions we are trying to fit. Each
00124  line in the file corresponds to one receiver function and requires the following entries
00125  <TABLE>
00126  <TR> <TD>Ray parameter in s/km </TD>
00127  <TD>\f$ \sigma\f$ for gaussian filter </TD> <TD> Waterlevel </TD> <TD>Relative error </TD> <TD>Shift in s </TD> <TD> Type of data </TD>
00128  <TD>Filename </TD></TR>
00129  </TABLE>
00130  Type of data is a single character. If it is 'a' we are also fitting absolute velocity information as calculated by the
00131  program rfvel and described in Svenningsen, GJI 170, 2007. In this case the following
00132  additional fields are needed
00133  <TABLE>
00134  <TR> <TD>Weight for RF </TD> <TD>Weight for absolute velocities </TD> <TD>Filename for absolute velocities </TD> </TR>
00135  </TABLE>
00136  If data type is any other character we only fit receiver function information and the absolute velocity information cannot be there.
00137  <H2> Other features</H2>
00138  - You can gracefully stop the inversion anytime by creating a file called "abort" in the working directory.
00139  The inversion will stop after the current iteration and will write out the usual information about best models etc.
00140  This is useful when the chosen number of generations was too high and the algorithm has already reached an acceptable solution.
00141  The file is automatically deleted.
00142  */
00143 
00144 #include <iostream>
00145 #include <iomanip>
00146 #include <fstream>
00147 #include <algorithm>
00148 #include <numeric>
00149 #include <sstream>
00150 #include <string>
00151 #include "GA.h"
00152 #include "Iso1DMTObjective.h"
00153 #include "MTStation.h"
00154 #include "C1dInvGaConf.h"
00155 #include "AbsVelRecObjective.h"
00156 #include "Multi1DRecObjective.h"
00157 #include "SeismicDataComp.h"
00158 #include "CombinedRoughness.h"
00159 #include "SeismicModelDiff.h"
00160 #include "ResPkModel.h"
00161 #include "SurfaceWaveObjective.h"
00162 #include "SurfaceWaveData.h"
00163 #include "Adaptors.h"
00164 #include "MTFitSetup.h"
00165 #include <boost/bind.hpp>
00166 #include <boost/shared_ptr.hpp>
00167 #include <boost/filesystem.hpp>
00168 #include <boost/date_time/posix_time/posix_time.hpp>
00169 #include "Util.h"
00170 #include "MTRoughness.h"
00171 #include "SurfaceWaveData.h"
00172 #include "FileAbort.h"
00173 #include "SeisTools.h"
00174 
00175 using namespace std;
00176 using namespace gplib;
00177 
00178 enum tgatype
00179   {
00180   pareto, anneal
00181   };
00182 
00183 typedef boost::shared_ptr<GeneralObjective> pCGeneralObjective;
00184 
00185 
00186 //! Set the annealing GA temperature parameter, we should only call this function if we are SURE that it is an AnnealinGA
00187 void SetupAnnealingGA(boost::shared_ptr<GeneralGA> &GA,
00188     const C1dInvGaConf Configuration)
00189   {
00190     // we get the temperature from the configuration file
00191     double InitTemperature = Configuration.inittemp;
00192     // if it is negative we determine it by performing one iteration
00193     if (Configuration.inittemp < 0)
00194       {
00195         double avgcombfit = 0;
00196         GA->DoIteration(0, false);
00197         //we calculate the average fit across all objective functions
00198         avgcombfit = accumulate(GA->GetAvgFit().begin(), GA->GetAvgFit().end(),
00199             0.0);
00200         // for negative temperatures we calculate the actual temperature from the average fit
00201         InitTemperature = avgcombfit * abs(Configuration.inittemp);
00202       }
00203     AnnealingGA *AnnealGA = dynamic_cast<AnnealingGA*> (GA.get());
00204     //we set the annealing parameters for the GA
00205     AnnealGA->SetParams(InitTemperature, Configuration.annealinggeneration,
00206         Configuration.coolingratio);
00207   }
00208 
00209 //! Use the information stored in recinforfile to setup the multi receiver function objective function
00210 void SetupRecObjective(const string recinfofile,
00211     Multi1DRecObjective &RecObjective, const bool normrec,
00212     RecCalc::trfmethod rfmethod)
00213   {
00214     ifstream recinfo(recinfofile.c_str());
00215     double slowness, sigma, cc, recerror, recweight, absweight;
00216     int shift;
00217     string recinputdata, absinputdata;
00218     char wantabsvel; // do we want absolute velocities as well
00219     //as long as we can read from recinfofile
00220     while (recinfo.good())
00221       {
00222         //assume we do not want absolute velocity information
00223         wantabsvel = 'r';
00224         //read the mandatory values from the file
00225         recinfo >> slowness >> sigma >> cc >> recerror >> shift >> wantabsvel
00226             >> recinputdata;
00227         // if we want absolute velocities we have to read in additional information
00228         if (wantabsvel == 'a')
00229           {
00230             recinfo >> recweight >> absweight >> absinputdata;
00231           }
00232         //if reading all values succeeded
00233         if (recinfo.good())
00234           {
00235             //create a shared pointer to a new receiver function data object
00236             boost::shared_ptr<const SeismicDataComp> RecData(
00237                 new SeismicDataComp(recinputdata, SeismicDataComp::sac));
00238             //if we need absolute velocity information
00239             if (wantabsvel == 'a')
00240               {
00241                 //read in data from the input file
00242                 SurfaceWaveData AbsVelData;
00243                 AbsVelData.ReadAscii(absinputdata);
00244                 // add an objective function that fits receiver functions and absolute velocities
00245                 RecObjective.AddAbsVelFunction(RecData, AbsVelData, shift,
00246                     sigma, cc, slowness, rfmethod, recerror, normrec,
00247                     absweight, recweight);
00248               }
00249             //if we don't want absolute velocity information
00250             else
00251               {
00252                 //add an objective function that only considers standard receiver functions
00253                 RecObjective.AddRecFunction(RecData, shift, sigma, cc,
00254                     slowness, rfmethod, recerror, normrec);
00255               }
00256           }
00257       }
00258   }
00259 
00260 int main(int argc, char *argv[])
00261   {
00262     //we report the total run time at the end so we record when we started
00263     boost::posix_time::ptime starttime =
00264         boost::posix_time::microsec_clock::local_time();
00265     //output some information about the program
00266     string version = "$Id: 1dinvga.cpp 1816 2009-09-07 11:28:35Z mmoorkamp $";
00267     cout << endl << endl;
00268     cout << "Program " << version << endl;
00269     cout << "Genetic algorithm to jointly invert seismic and MT data " << endl;
00270     cout << "look at 1dinvga.conf for configuration and setup " << endl;
00271     cout << "The root name of log-files can be overwritten by using " << endl;
00272     cout << "1dinvga newrootname " << endl;
00273 
00274     C1dInvGaConf Configuration;
00275     MTStation MTData;
00276     SurfaceWaveData RFAbsVel;
00277     SurfaceWaveData DispData;
00278     UniformRNG Random;
00279     Configuration.GetData("1dinvga.conf");
00280 
00281     ttranscribed transcribed;
00282     tgatype GAtype = pareto;
00283 
00284     string outputbase;
00285     // we can specify the start of the names of the log files on the command line or in the configuration file
00286     if (argc > 1)
00287       outputbase = argv[1];
00288     else
00289       outputbase = Configuration.outputbase;
00290     //initialize the streams for the log files
00291     //the ones that are always written out get the filename
00292     ofstream fitstat((outputbase + ".ftst").c_str());
00293     ofstream rankfile((outputbase + ".rank").c_str());
00294     ofstream frontfile((outputbase + ".front").c_str());
00295     //the optional ones are left without a filename
00296     ofstream misfitfile;
00297     ofstream valuefile;
00298     ofstream probfile;
00299     ofstream distancefile;
00300     // in most cases we do not need all of these values
00301     //so we only write them if the user insists
00302     if (Configuration.verbose)
00303       {
00304         misfitfile.open((outputbase + ".msft").c_str());
00305         valuefile.open((outputbase + ".vals").c_str());
00306         probfile.open((outputbase + ".probs").c_str());
00307         distancefile.open((outputbase + ".dist").c_str());
00308       }
00309     //we do some very basic consistency checking
00310     if ((Configuration.thickbase.size() != Configuration.resbase.size())
00311         || (Configuration.thickbase.size() != Configuration.svelbase.size()))
00312       {
00313         cerr << "Configuration of genes is not consistent ! ";
00314         return 100;
00315       }
00316     // we have three different types of inversion parameters
00317     const int nparam = 3;
00318     //determine the sizes of a number of GA structures from the configuration file
00319     const int genes = (Configuration.thickbase.size()
00320         + Configuration.resbase.size() + Configuration.svelbase.size());
00321     const int popsize = Configuration.popsize;
00322     const int maxgen = Configuration.generations;
00323     const int paramlength = genes / nparam;
00324     ttranscribed basevalues(genes);
00325     ttranscribed stepsizes(genes);
00326     tsizev genesizes(genes);
00327 
00328     misfitfile << popsize << " " << maxgen << " " << endl;
00329     valuefile << genes << " " << popsize << " " << maxgen << endl;
00330     //copy the vectors from the congiguration object to the GA
00331     copy(Configuration.resbase.begin(), Configuration.resbase.end(),
00332         basevalues.begin());
00333     copy(Configuration.resstep.begin(), Configuration.resstep.end(),
00334         stepsizes.begin());
00335     copy(Configuration.ressizes.begin(), Configuration.ressizes.end(),
00336         genesizes.begin());
00337     //the GA vectors are just concatenations of the different parameters
00338     copy(Configuration.thickbase.begin(), Configuration.thickbase.end(),
00339         basevalues.begin() + paramlength);
00340     copy(Configuration.thickstep.begin(), Configuration.thickstep.end(),
00341         stepsizes.begin() + paramlength);
00342     copy(Configuration.thicksizes.begin(), Configuration.thicksizes.end(),
00343         genesizes.begin() + paramlength);
00344 
00345     copy(Configuration.svelbase.begin(), Configuration.svelbase.end(),
00346         basevalues.begin() + 2 * paramlength);
00347     copy(Configuration.svelstep.begin(), Configuration.svelstep.end(),
00348         stepsizes.begin() + 2 * paramlength);
00349     copy(Configuration.svelsizes.begin(), Configuration.svelsizes.end(),
00350         genesizes.begin() + 2 * paramlength);
00351 
00352     const int totalgenesize = accumulate(genesizes.begin(), genesizes.end(), 0);
00353     //create the objects for the genetic algorithm
00354     BinaryPopulation Population(popsize, totalgenesize, Random, true);
00355     BinaryTournamentSelect Select(Random, boost::bind(
00356         &GeneralPopulation::GetProbabilities, boost::ref(Population)),
00357         boost::bind(&GeneralPopulation::GetCrowdingDistances, boost::ref(
00358             Population)));
00359     StandardPropagation Propagator(&Select, &Population, &Random);
00360     double indmutationprob = 1.0 - pow(1.0 - Configuration.mutationprob, 1.
00361         / totalgenesize); // we specify the overall probability for mutation in config file
00362     cout << "Individual Mutation set to: " << indmutationprob << endl;
00363     Propagator.SetParams(indmutationprob, Configuration.crossoverprob);
00364     GrayTranscribe Transcribe(basevalues, stepsizes, genesizes);
00365 
00366     //read in data
00367     try
00368       {
00369         boost::shared_ptr<Multi1DRecObjective> RecObjective(
00370             new Multi1DRecObjective());
00371         //we only read the data that has positive weights in the inversion
00372         if (Configuration.weights.at(0) > 0)
00373           {
00374             MTData.GetData(Configuration.mtinputdata);
00375           }
00376         if (Configuration.weights.at(1) > 0)
00377           {
00378             RecCalc::trfmethod rfmethod = RecCalc::specdiv;
00379             if (Configuration.recmethod == "iterdecon")
00380               {
00381                 rfmethod = RecCalc::iterdecon;
00382               }
00383             //use the information in the recinfofile to initialize the RF objective function
00384             SetupRecObjective(Configuration.recinfofile, *RecObjective.get(),
00385                 Configuration.normrec, rfmethod);
00386           }
00387         if (Configuration.weights.at(2) > 0)
00388           {
00389             DispData.ReadFile(Configuration.dispdata);
00390           }
00391         //setup MT Objective function
00392         boost::shared_ptr<Iso1DMTObjective> MTObjective(new Iso1DMTObjective(
00393             MTData));
00394         SetupMTFitParameters(Configuration, *MTObjective);
00395         MTObjective->SetFitExponent(Configuration.mtfitexponent);
00396 
00397         //setup RF objective function
00398         RecObjective->SetPoisson(Configuration.poisson);
00399         RecObjective->SetTimeWindow(Configuration.starttime,
00400             Configuration.endtime);
00401 
00402         //setup surface wave data
00403         boost::shared_ptr<SurfaceWaveObjective> SurfObjective(
00404             new SurfaceWaveObjective(DispData));
00405         SurfObjective->SetFitExponent(Configuration.surffitexponent);
00406         SurfObjective->SetPoisson(Configuration.poisson);
00407         SurfObjective->SetErrorLevel(Configuration.surferror);
00408 
00409         //setup Regularization
00410         boost::shared_ptr<GeneralObjective> RecRegularization;
00411         //if we don't want a reference model regularization
00412         // we regularize by smoothness over resistivity and velocity
00413         if (!Configuration.usevrefmodel)
00414           {
00415             RecRegularization = boost::shared_ptr<GeneralObjective>(
00416                 new CombinedRoughness);
00417           }
00418         //otherwise we regularize with a reference model
00419         else
00420           {
00421             //we check if the file exists
00422             if (!boost::filesystem::exists(Configuration.vrefmodel))
00423               {
00424                 cerr
00425                     << "Cannot find reference model for regularization ! Exiting !"
00426                     << endl;
00427                 return 100;
00428               }
00429             //get the model from the file
00430             ResPkModel VRefModel;
00431             VRefModel.ReadModel(Configuration.vrefmodel);
00432             // and initialize the regularization object
00433             RecRegularization = boost::shared_ptr<GeneralObjective>(
00434                 new SeismicModelDiff(VRefModel));
00435           }
00436         //MT is always regularized by smoothness
00437         //this can create some redundancy and should be changed in the future
00438         boost::shared_ptr<GeneralObjective> MTRegularization(new MTRoughness());
00439         //create the vector of objective functions for the GA
00440         vector<pCGeneralObjective> ObjVector;
00441         ObjVector.push_back(MTObjective);
00442         ObjVector.push_back(RecObjective);
00443         ObjVector.push_back(SurfObjective);
00444         ObjVector.push_back(RecRegularization);
00445         ObjVector.push_back(MTRegularization);
00446 
00447         boost::shared_ptr<GeneralGA> GA;
00448         if (Configuration.gatype == "pareto")
00449           {
00450             GAtype = pareto;
00451             boost::shared_ptr<ParetoGA> Pareto(new ParetoGA(&Propagator,
00452                 &Population, &Transcribe, ObjVector, Configuration.threads));
00453             GA = Pareto;
00454           }
00455         else if (Configuration.gatype == "anneal")
00456           {
00457             GAtype = anneal;
00458             boost::shared_ptr<AnnealingGA> Annealing(new AnnealingGA(
00459                 &Propagator, &Population, &Transcribe, ObjVector,
00460                 Configuration.threads));
00461             GA = Annealing;
00462           }
00463         else
00464           {
00465             cerr << "Invalid GA type in configuration file." << endl;
00466             cerr << " Has to be either 'pareto' or 'anneal' !" << endl;
00467             return 100;
00468           }
00469         GA->SetWeights(Configuration.weights);
00470         //Setting up indices for forward modelling
00471         GeneralGA::tparamindv ParamIndices;
00472         const int nmtparam = 2 * Configuration.resbase.size();
00473         std::vector<int> LocalIndices;
00474         //generate Indices 0 .. nmtparam for MT Objective function
00475         generate_n(back_inserter(LocalIndices), nmtparam, IntSequence(0));
00476         ParamIndices.push_back(LocalIndices);
00477         const int nseisparam = 2 * Configuration.thickbase.size();
00478         LocalIndices.clear();
00479         // generate Indice thickbase .. thickbase+nseisparam for rf and surface wave data
00480         generate_n(back_inserter(LocalIndices), nseisparam, IntSequence(
00481             Configuration.thickbase.size()));
00482         ParamIndices.push_back(LocalIndices);
00483         ParamIndices.push_back(LocalIndices);
00484         const int nmodelparam = 3 * Configuration.thickbase.size();
00485         LocalIndices.clear();
00486         //generate Indices 0 .. nmodelparam for Regularization Objective function
00487         generate_n(back_inserter(LocalIndices), nmodelparam, IntSequence(0));
00488         ParamIndices.push_back(LocalIndices);
00489         ParamIndices.push_back(LocalIndices); // we push back twice, because both Regularization functionals work with all parameters
00490         GA->SetParameterIndices(ParamIndices);
00491         GA->SetElitist(Configuration.elitist);
00492 
00493         if (GAtype == anneal)
00494           {
00495             SetupAnnealingGA(GA, Configuration);
00496             Population.InitPop();
00497           }
00498         //Population.PrintPopulation(cout);
00499         for (int i = 0; i < maxgen; ++i)
00500           {
00501             GA->DoIteration(i, i == (maxgen - 1));
00502             if (Configuration.verbose) // in most cases we do not need all of these values
00503               {
00504                 GA->PrintMisfit(misfitfile);
00505                 GA->PrintTranscribed(valuefile);
00506                 Population.PrintProbabilities(probfile);
00507                 Population.PrintDistances(distancefile);
00508               }
00509             GA->PrintBestMisfit(frontfile);
00510 
00511             fitstat << i << " ";
00512             GA->PrintFitStat(fitstat);
00513             if (WantAbort()) //if we want to exit prematurely
00514               {
00515                 RemoveAbort();
00516                 // set counter to two before the last generation
00517                 // this way we do the last iteration for the GA which might have some special treatment
00518                 // and do everything that follows below to store the models and plots
00519                 i = maxgen - 2;
00520               }
00521           }
00522 
00523         int bestcount = 0;
00524         // at the end we only want to output the unique models
00525         // so we initialize a new UniquePop object that keeps track
00526         UniquePop FinalUnique;
00527         tfitvec bestfit(ObjVector.size());
00528         tpopmember member(Population.GetGenesize());
00529         //Get the indices of the models in the pareto front
00530         vector<int> BestIndices(GA->GetBestModelIndices());
00531         const int noutmodels = GA->GetNBestmodels();
00532         const int nobjective = ObjVector.size();
00533         //We have to translate between the parameters in the GA and the parameters
00534         // for the objective functions
00535         GeneralGA::tparamvector LocalParameters(nobjective);
00536         //we allocate memory for each parameter set and objective function
00537         for (int i = 0; i < nobjective; ++i)
00538           {
00539             LocalParameters.at(i).resize(ParamIndices.at(i).size(), false);
00540           }
00541         // we write the parameters for the unique final models into a file for overview
00542         ofstream finalmodels((outputbase + ".final").c_str());
00543         //for all good models including duplicates
00544         for (int h = 0; h < noutmodels; ++h)
00545           {
00546             //get the genetic string for the current best model
00547             member = row(Population.GetPopulation(), BestIndices.at(h));
00548             //translate into numerical values
00549             transcribed = Transcribe.GetValues(member);
00550             //copy the right portion of the vector for each objective function
00551             GA->SetupParams(transcribed, LocalParameters);
00552             //if we haven't calculated the model before
00553             if (!FinalUnique.Find(transcribed, bestfit))
00554               {
00555                 //write out some information
00556                 cout << "Best: " << h << " Index : " << BestIndices.at(h)
00557                     << " ";
00558                 //recalculate the data, we didn't store this during the inversion
00559                 //we only calculate if we have a weight greater 0
00560                 for (int f = 0; f < nobjective; ++f)
00561                   {
00562                     if (Configuration.weights.at(f) > 0)
00563                       {
00564                         bestfit(f) = ObjVector.at(f)->CalcPerformance(
00565                             LocalParameters.at(f));
00566                       }
00567                     else
00568                       {
00569                         bestfit(f) = 0;
00570                       }
00571                     cout << setw(6) << setfill(' ') << setprecision(4)
00572                         << bestfit(f) << " ";
00573                   }
00574                 // write out the fit information and the index of the model
00575                 //so we can find the right files when looking at tradeoff etc.
00576                 copy(bestfit.begin(), bestfit.end(), ostream_iterator<double> (
00577                     finalmodels, " "));
00578                 cout << " " << h;
00579                 finalmodels << " " << bestcount << endl;
00580                 ostringstream filename;
00581                 filename << "best_" << bestcount;
00582                 //save all models, data and plots if the weight for the dataset
00583                 // is different from zero, we have to treat data and regularization differently
00584                 // so we cannot condense it with the for/if combination above that goes over all objectives
00585                 cout << " Saved as : " << filename.str();
00586                 if (Configuration.weights.at(0) > 0)
00587                   {
00588                     MTObjective->WriteModel(filename.str() + "_mt.mod");
00589                     MTObjective->WritePlot(filename.str() + "_mt.plot");
00590                     MTObjective->WriteData(filename.str());
00591                   }
00592                 if (Configuration.weights.at(1) > 0)
00593                   {
00594                     RecObjective->WriteData(filename.str() + ".rec");
00595                     RecObjective->WriteModel(filename.str() + "_rec.mod");
00596                     RecObjective->WritePlot(filename.str() + "_rec");
00597                   }
00598                 if (Configuration.weights.at(2) > 0)
00599                   {
00600                     SurfObjective->WriteData(filename.str() + ".disp");
00601                     SurfObjective->WriteModel(filename.str() + "_disp.mod");
00602                     SurfObjective->WritePlot(filename.str() + "_disp.plot");
00603                   }
00604                 filename.str("");
00605                 //store the model and fit, so we do not recalculate
00606                 FinalUnique.Insert(bestfit, transcribed);
00607                 bestcount++;
00608                 cout << endl;
00609               }
00610           }
00611         //if we want verbose information
00612         if (Configuration.verbose)
00613           {
00614             //print all the good models in raw form to a file
00615             ofstream bestfile((outputbase + ".best").c_str());
00616             FinalUnique.PrintAll(bestfile);
00617             // and print the unique population members in the whole run
00618             ofstream uniquepop((outputbase + ".unpop").c_str());
00619             GA->PrintUniquePop(uniquepop);
00620           }
00621         //write out some run time information
00622         boost::posix_time::ptime endtime =
00623             boost::posix_time::microsec_clock::local_time();
00624         std::cout << "Total Runtime: " << endtime - starttime << std::endl;
00625       } catch (FatalException &e)
00626       {
00627         cerr << e.what() << endl;
00628         return -1;
00629       }
00630   }