Skip to content
Draft
393 changes: 273 additions & 120 deletions alignment/alignment.cpp

Large diffs are not rendered by default.

92 changes: 45 additions & 47 deletions alignment/alignment.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//
// C++ Interface: alignment
//
// Description:
// Description:
//
//
// Author: BUI Quang Minh, Steffen Klaere, Arndt von Haeseler <[email protected]>, (C) 2008
Expand Down Expand Up @@ -319,21 +319,20 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
virtual void orderPatternByNumChars(int pat_type);

/**
* un-group site-patterns, i.e., making #sites = #patterns and pattern frequency = 1 for all patterns
* un-group site-patterns, i.e. making #sites = #patterns and pattern frequency = 1 for all patterns
*/
void ungroupSitePattern();


/**
* re-group site-patterns
* re-group site-patterns so that sites within each pattern fall into the same group
* @param groups number of groups
* @param site_group group ID (0, 1, ...ngroups-1; must be continuous) of all sites
*/
void regroupSitePattern(int groups, IntVector &site_group);
void regroupSitePattern(int groups, const IntVector &site_group);


/****************************************************************************
output alignment
output alignment
****************************************************************************/
SeqType detectSequenceType(StrVector &sequences);

Expand Down Expand Up @@ -535,9 +534,11 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
*/
bool isGapOnlySeq(size_t seq_id);

virtual bool isSuperAlignment() {
return false;
}
bool isSSF() { return !ptn_state_freq.empty(); }

bool isSSR() { return !ptn_rate_scaler.empty(); }

virtual bool isSuperAlignment() { return false; }

/****************************************************************************
alignment general processing
Expand Down Expand Up @@ -916,13 +917,13 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
vector<uint32_t> pomo_sampled_states;
IntIntMap pomo_sampled_states_index; // indexing, to quickly find if a PoMo-2-state is already present

/* for site-specific state frequency model with Huaichun, Edward, Andrew */
/* site to model ID map */
IntVector site_model;
/** site to state frequency vector */
vector<double*> site_state_freq;
/* for site-specific models with Huaichun, Edward, Andrew */

/** pattern index to state frequency vector map */
vector<double*> ptn_state_freq;

/** pattern index to rate scaler map */
vector<double> ptn_rate_scaler;

/**
* @return true if data type is SEQ_CODON and state is a stop codon
Expand Down Expand Up @@ -985,13 +986,19 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
void getAppearance(StateType state, StateBitset &state_app);

/**
* read site specific state frequency vectors from a file to create corresponding model
* update site_model and site_state_freq variables for this class
* @param aln input alignment
* @param site_freq_file file name
* @return TRUE if alignment needs to be changed, FALSE otherwise
* read site-specific state frequency vectors or site-specific rate (branch length) scalers
* from a file to create a site-specific model
* @param site_param_file input file name
* @param param_type should be set to "freq" or "rate"
* @return TRUE if alignment patterns need to be changed, FALSE otherwise
*/
bool readSiteStateFreq(const char* site_freq_file);
bool readSiteParamFile(const char* site_param_file, const string &param_type);

/**
* normalize site-specific rates by their mean value
* @return mean rate before normalization
*/
double normalizePtnRateScaler();

// added by TD
/**
Expand Down Expand Up @@ -1024,51 +1031,42 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
* @return modified (new) alignment
*/
Alignment* removeAndFillUpGappySites();

/**
* special initialization for codon sequences, e.g., setting #states, genetic_code
* @param sequence_type user-defined sequence type
*/
void initCodon(char *gene_code_id);

/**
* set the expected_num_sites (for alisim)
* @param the expected_num_sites
*/
void setExpectedNumSites(int new_expected_num_sites);

/**
Extract Maple file from an alignment file
*/
void extractMapleFile(const std::string& aln_name, const InputType& format);

protected:
/** sequence names */
vector<string> seq_names;

/** expected num_sites */
int expected_num_sites = -1;

/**
sequence names
*/
vector<string> seq_names;

/**
expected num_sites
*/
int expected_num_sites = -1;
/** site to pattern index map */
IntVector site_pattern;

/**
Site to pattern index
*/
IntVector site_pattern;
/** pattern index to first pattern site map */
IntVector pattern_first_site;

/**
hash map from pattern to index in the vector of patterns (the alignment)
*/
PatternIntMap pattern_index;

/**
alisim: caching ntfreq if it has already randomly initialized
*/
double* cache_ntfreq = NULL;
/** hash map from pattern to index in the vector of patterns (the alignment) */
PatternIntMap pattern_index;

/** alisim: caching ntfreq if it has already randomly initialized */
double* cache_ntfreq = NULL;

private:
/**
Expand Down
11 changes: 5 additions & 6 deletions alignment/alignmentpairwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@ double AlignmentPairwise::computeFunction(double value) {
auto sequence2 = tree->getConvertedSequenceByNumber(seq_id2);
auto frequencies = tree->getConvertedSequenceFrequencies();
size_t sequenceLength = tree->getConvertedSequenceLength();

if (site_rate->isSiteSpecificRate()) {
for (int i = 0; i < sequenceLength; i++) {
int state1 = sequence1[i];
Expand All @@ -221,7 +220,7 @@ double AlignmentPairwise::computeFunction(double value) {
if (state1 >= num_states || state2 >= num_states) {
continue;
}
double trans = tree->getModelFactory()->computeTrans(value * site_rate->getPtnRate(i), state1, state2);
double trans = tree->getModel()->computeTrans(value, model->getPtnModelID(i), state1, state2);
lh -= log(trans) * frequencies[i];
}
return lh;
Expand Down Expand Up @@ -358,10 +357,10 @@ void AlignmentPairwise::computeFuncDerv(double value, double &df, double &ddf) {
continue;
}
double freq = frequencies[i];
double rate_val = site_rate->getPtnRate(i);
double rate_val = 1.0;
double rate_sqr = rate_val * rate_val;
double derv1, derv2;
double trans = tree->getModel()->computeTrans(value * rate_val,model->getPtnModelID(i), state1, state2, derv1, derv2);
double trans = tree->getModel()->computeTrans(value * rate_val, model->getPtnModelID(i), state1, state2, derv1, derv2);
double d1 = derv1 / trans;
df -= rate_val * d1 * freq;
ddf -= rate_sqr * (derv2/trans - d1*d1) * freq;
Expand All @@ -376,10 +375,10 @@ void AlignmentPairwise::computeFuncDerv(double value, double &df, double &ddf) {
if (num_states<=state2) {
continue;
}
double rate_val = site_rate->getPtnRate(i);
double rate_val = 1.0;
double rate_sqr = rate_val * rate_val;
double derv1, derv2;
double trans = tree->getModel()->computeTrans(value * rate_val,model->getPtnModelID(i), state1, state2, derv1, derv2);
double trans = tree->getModel()->computeTrans(value * rate_val, model->getPtnModelID(i), state1, state2, derv1, derv2);
double d1 = derv1 / trans;
double freq = tree->aln->at(i).frequency;
df -= rate_val * d1 * freq;
Expand Down
21 changes: 10 additions & 11 deletions alignment/superalignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,13 @@ SuperAlignment::SuperAlignment() : Alignment() {
SuperAlignment::SuperAlignment(Params &params) : Alignment()
{
readFromParams(params);

init();

// only show Degree of missing data if AliSim is inactive or an input alignment is supplied
if (!(Params::getInstance().alisim_active && !Params::getInstance().alisim_inference_mode))
cout << "Degree of missing data: " << computeMissingData() << endl;

#ifdef _OPENMP
if (params.num_threads > 1) {
if (params.parallel_over_sites) {
Expand All @@ -72,10 +72,9 @@ SuperAlignment::SuperAlignment(Params &params) : Alignment()
cout << " However, parallelisation over sites will have adverse effect on short partitions.";
}
}
cout << endl;
}
#endif
cout << endl;

}

void SuperAlignment::readFromParams(Params &params) {
Expand Down Expand Up @@ -307,9 +306,9 @@ void SuperAlignment::readPartitionRaxml(Params &params) {
input_aln->sequence_type = params.sequence_type;
((SuperAlignment*) input_aln)->init();
}
cout << endl << "Partition file is not in NEXUS format, assuming RAxML-style partition file..." << endl;

cout << "Partition file is not in NEXUS format, assuming RAxML-style partition file..." << endl;

size_t pos = params.model_name.find_first_of("+*");
string rate_type = "";
if (pos != string::npos) rate_type = params.model_name.substr(pos);
Expand Down Expand Up @@ -508,9 +507,9 @@ void SuperAlignment::readPartitionNexus(Params &params) {
if (empty_partition) {
cout << "NOTE: No CharPartition defined, use all CharSets" << endl;
}
cout << endl << "Loading " << sets_block->charsets.size() << " partitions..." << endl;

cout << "Loading " << sets_block->charsets.size() << " partitions..." << endl;

for (it = sets_block->charsets.begin(); it != sets_block->charsets.end(); it++)
if (empty_partition || (*it)->char_partition != "") {
if ((*it)->model_name == "")
Expand Down
Loading