1 #ifndef EXACT_MATCHER_HH
2 #define EXACT_MATCHER_HH
14 #include <unordered_map>
17 #include "ext_rna_data.hh"
19 #include "sparsification_mapper.hh"
20 #include "trace_controller.hh"
24 #include <ViennaRNA/fold_vars.h>
25 #include <ViennaRNA/utils.h>
26 #include <ViennaRNA/PS_dot.h>
27 #include <ViennaRNA/fold.h>
29 PS_rna_plot(
char *
string,
char *structure,
char *file);
31 PS_rna_plot_a(
char *
string,
char *structure,
char *file,
char *pre,
char *post);
33 fold(
const char *sequence,
char *structure);
39 typedef std::vector<unsigned int> intVec;
40 typedef std::pair<unsigned int, unsigned int> intPair;
41 typedef std::pair<intPair, intPair> intPPair;
42 typedef const intPPair *intPPairPTR;
43 typedef std::vector<intPPair>::const_iterator IntPPairCITER;
59 const std::string &seqId_,
60 const intVec &mySinglePattern_)
61 : myId(myId_), seqId(seqId_), pattern(mySinglePattern_){};
120 const std::string &structure_,
125 structure(structure_),
128 std::cerr <<
"Error! PatternPair cannot be constructed due to "
129 "different sizes of SinglePatterns!"
133 size = first.
getPat().size();
192 return outsideBounds;
203 const std::vector<intPPair> &
248 std::string structure;
251 std::vector<intPPair> insideBounds;
252 intPPair outsideBounds;
263 typedef std::multimap<int, SelfValuePTR, std::greater<int> >
265 typedef orderedMapTYPE::const_iterator
267 typedef orderedMapTYPE::iterator
270 typedef patListTYPE::iterator
272 typedef patListTYPE::const_iterator
274 typedef std::unordered_map<std::string, SelfValuePTR>
303 add(
const std::string &
id,
306 const std::string &structure,
388 return minPatternSize;
428 matchedEPMs(myLCSEPM),
429 patterns(myPatterns){};
443 MapToPS(
const std::string &sequenceA,
444 const std::string &sequenceB,
446 const std::string &file1,
447 const std::string &file2);
454 std::pair<SequenceAnnotation, SequenceAnnotation>
460 const std::string &sequenceB,
461 const std::string &outfile);
468 struct HoleCompare2 {
470 operator()(
const intPPairPTR &h1,
const intPPairPTR &h2)
const {
472 if (h1->first.second - h1->first.first - 1 <
473 h2->first.second - h2->first.first - 1) {
477 if (h1->first.second - h1->first.first - 1 ==
478 h2->first.second - h2->first.first - 1) {
479 if ((h1->first.first == h2->first.first) &&
480 (h1->first.second == h2->first.second) &&
481 (h1->second.first == h2->second.first) &&
482 (h1->second.second == h2->second.second)) {
491 typedef std::multimap<intPPairPTR,
494 HoleOrderingMapTYPE2;
495 typedef HoleOrderingMapTYPE2::const_iterator HoleMapCITER2;
500 calculateHoles3(
bool quiet);
502 calculatePatternBoundaries(PatternPair *myPair);
504 calculateTraceback2(
const int i,
508 std::vector<std::vector<int> > holeVec);
514 std::vector<std::vector<int> > &D_h,
518 max3(
int a,
int b,
int c) {
519 int tmp = a > b ? a : b;
520 return (tmp > c ? tmp : c);
525 getStructure(PatternPairMap &myMap,
bool firstSeq,
int length);
528 intvec2str(
const std::vector<unsigned int> &V,
529 const std::string &delim) {
530 std::stringstream oss;
531 copy(V.begin(), V.end(),
532 std::ostream_iterator<unsigned int>(oss, delim.c_str()));
535 if (tmpstr.length() > 0)
536 tmpstr.erase(tmpstr.end() - 1);
541 upperCase(
const std::string &seq) {
543 for (
unsigned int i = 0; i < seq.length(); i++)
544 s += toupper(seq[i]);
548 std::vector<std::vector<std::vector<PatternPairMap::SelfValuePTR> > >
550 HoleOrderingMapTYPE2 holeOrdering2;
551 const Sequence &seqA;
552 const Sequence &seqB;
553 PatternPairMap &matchedEPMs;
554 const PatternPairMap &patterns;
570 typedef std::pair<matidx_t, matidx_t>
572 typedef std::pair<seqpos_t, seqpos_t>
593 sparse_mapperA(sparse_mapperA_),
594 sparse_mapperB(sparse_mapperB_)
601 const SparsificationMapper &
603 return sparse_mapperA;
609 return sparse_mapperB;
628 index_t left_endB = std::numeric_limits<index_t>::max())
const {
650 index_t left_endB = std::numeric_limits<index_t>::max())
const {
679 index_t left_endA = std::numeric_limits<index_t>::max(),
680 index_t left_endB = std::numeric_limits<index_t>::max())
const {
681 bool debug_valid_mat_pos =
false;
683 if (debug_valid_mat_pos)
684 std::cout <<
"first valid mat pos before with tc " << std::endl;
686 seqpos_t i = cur_pos_seq.first;
687 seqpos_t j = cur_pos_seq.second;
689 matidx_t idx_after_max_col;
694 matidx_t col_before =
710 if (debug_valid_mat_pos)
711 std::cout <<
"interval " <<
min_col <<
","
712 << idx_after_max_col << std::endl;
726 assert(idx_after_max_col > 0);
728 matidx_t
max_col = idx_after_max_col - 1;
775 return (pos_diag.first + 1 == seq_pos_to_be_matched.first) &&
776 (pos_diag.second + 1 == seq_pos_to_be_matched.second);
802 return is_valid(seq_pos.first, seq_pos.second);
821 typedef std::vector<PairArcIdx>
840 bool first_insertion;
851 struct compare_el_pat_vec {
860 char el1_struc = el1.
third;
861 char el2_struc = el2.
third;
862 return (el1_pos1 < el2_pos1) ||
863 (el1_pos1 == el2_pos1 && el1_pos2 < el2_pos2) ||
864 (el1_pos1 == el2_pos1 && el1_pos2 == el2_pos2 &&
865 el1_struc < el2_struc);
869 struct compare_el_am_to_do {
874 return (el1.first < el2.first) ||
875 ((el1.first == el2.first) && el1.second < el2.second);
886 first_insertion(true),
922 return first_insertion;
979 first_insertion = first_insertion_;
995 get_am(PairArcIdxVec::size_type idx)
const {
996 assert(idx < am_to_do.size());
997 return am_to_do[idx];
1004 PairArcIdxVec::size_type
1006 return am_to_do.size();
1020 PairArcIdxVec::const_iterator
1022 return am_to_do.begin();
1030 PairArcIdxVec::const_iterator
1032 return am_to_do.end();
1042 assert(idx < pat_vec.size());
1043 return pat_vec[idx];
1050 pat_vec_t::size_type
1052 return pat_vec.size();
1059 pat_vec_t::const_iterator
1061 return pat_vec.begin();
1068 pat_vec_t::const_iterator
1070 return pat_vec.end();
1079 assert(!pat_vec.empty());
1080 return pair_seqpos_t(pat_vec.back().first, pat_vec.back().second);
1094 pat_vec.push_back(
el_pat_vec(posA, posB, c));
1111 pat_vec_t::size_type pos) {
1112 if (pat_vec.size() <= pos) {
1113 pat_vec.push_back(
el_pat_vec(posA, posB, c));
1138 am_to_do.push_back(pair_arc_idx);
1148 am_to_do.pop_back();
1160 sort(pat_vec.begin(), pat_vec.end(), compare_el_pat_vec());
1168 sort(am_to_do.begin(), am_to_do.end(), compare_el_am_to_do());
1178 pat_vec.insert(pat_vec.end(), epm_to_insert.
begin(),
1179 epm_to_insert.
end());
1191 return std::includes(this->
begin(), this->
end(),
1192 epm_to_test.
begin(), epm_to_test.
end(),
1193 compare_el_pat_vec());
1207 epm_to_test.
am_end(), compare_el_am_to_do());
1217 out <<
"_________________________________________________"
1219 out <<
"epm with score " << this->score << std::endl;
1221 for (pat_vec_t::const_iterator it = pat_vec.begin();
1222 it != pat_vec.end(); ++it) {
1223 out << it->first <<
":" << it->second <<
" ";
1227 for (pat_vec_t::const_iterator it = pat_vec.begin();
1228 it != pat_vec.end(); ++it) {
1232 out <<
"am_to_do " << am_to_do << std::endl;
1233 out <<
"tolerance left " << this->max_tol_left << std::endl;
1235 out <<
"score " << score << std::endl;
1236 out <<
"pos " << this->cur_pos.first <<
","
1237 << this->cur_pos.second << std::endl;
1238 out <<
"state " << this->state << std::endl;
1240 out <<
"______________________________________________________"
1264 inline std::ostream &
1279 max3(
const T1 &first,
const T1 &second,
const T1 &third) {
1280 return max(
max(first, second), third);
1293 max4(
const T1 &first,
const T1 &second,
const T1 &third,
const T1 &fourth) {
1294 return max(
max3(first, second, third), fourth);
1335 typedef std::list<EPM>
1337 typedef epm_cont_t::iterator epm_it_t;
1338 typedef std::pair<score_t, epm_cont_t> el_map_am_to_do_t;
1347 typedef std::unordered_map<PairArcIdx, el_map_am_to_do_t>
1382 &sparse_trace_controller;
1410 int difference_to_opt_score;
1415 long int max_number_of_EPMs;
1417 long int cur_number_of_EPMs;
1420 bool inexact_struct_match;
1422 score_t struct_mismatch_score;
1429 pair_seqpos_t pos_of_max;
1439 const Arc pseudo_arcA;
1440 const Arc pseudo_arcB;
1475 D(
const Arc &a,
const Arc &b)
const {
1476 return Dmat(a.
idx(), b.
idx());
1488 D(
const Arc &a,
const Arc &b) {
1489 return Dmat(a.
idx(), b.
idx());
1493 nucleotide_match(seqpos_t pos_seqA, seqpos_t pos_seqB)
const {
1494 assert(pos_seqA >= 1 && pos_seqA <= seqA.
length() &&
1496 pos_seqB <= seqB.
length());
1497 return (seqA[pos_seqA] == seqB[pos_seqB]);
1501 seq_matching(ArcIdx idxA,
1503 matpos_t cur_mat_pos,
1504 pair_seqpos_t cur_seq_pos)
const {
1505 seqpos_t i = cur_seq_pos.first;
1506 seqpos_t j = cur_seq_pos.second;
1508 return sparse_trace_controller.
pos_unpaired(idxA, idxB,
1510 nucleotide_match(i, j);
1522 initialize_gap_matrices();
1559 compute_LGLR(
const Arc &a,
const Arc &b,
bool suboptimal);
1576 compute_matrix_entry(
const Arc &a,
1579 matpos_t mat_pos_diag,
1602 seq_str_matching(
const Arc &a,
1604 matpos_t mat_pos_diag,
1605 pair_seqpos_t seq_pos_to_be_matched,
1623 score_for_seq_match();
1634 score_for_am(
const Arc &a,
const Arc &b)
const;
1649 score_for_stacking(
const Arc &a,
1652 const Arc &inner_b);
1662 add_foundEPM(
EPM &cur_epm,
bool count_EPMs);
1666 if (this->difference_to_opt_score != -1)
1671 if (cur_number_of_EPMs >= max_number_of_EPMs + 1)
1688 find_start_pos_for_tb(
bool suboptimal,
1689 score_t difference_to_opt_score = -1,
1690 bool count_EPMs =
false);
1694 double valid_deviation = 0.8;
1695 return (cur_number_of_EPMs >=
1696 max_number_of_EPMs * valid_deviation &&
1697 cur_number_of_EPMs <= max_number_of_EPMs);
1723 trace_LGLR_heuristic(
const Arc &a,
const Arc &b,
EPM &cur_epm);
1744 trace_seq_str_matching_heuristic(
const Arc &a,
1747 matpos_t &cur_mat_pos,
1748 matpos_t mat_pos_diag,
1749 pair_seqpos_t seq_pos_to_be_matched,
1783 apply_filter(epm_cont_t &found_epms);
1798 trace_LGLR_suboptimal(
const Arc &a,
1801 epm_cont_t &found_epms,
1831 trace_seq_str_matching_subopt(
const Arc &a,
1834 matpos_t mat_pos_diag,
1835 pair_seqpos_t seq_pos_to_be_matched,
1836 const PairArcIdx &am,
1839 epm_cont_t &found_epms,
1840 map_am_to_do_t &map_am_to_do,
1863 check_poss(
const Arc &a,
1868 epm_cont_t &found_epms,
1869 map_am_to_do_t &am_to_do_for_cur_am,
1893 store_new_poss(
const Arc &a,
1899 epm_cont_t &found_epms,
1900 map_am_to_do_t &am_to_do_for_cur_am,
1922 trace_G_suboptimal(
const Arc &a,
1927 epm_cont_t &found_epms,
1928 map_am_to_do_t &map_am_to_do,
1944 is_valid_gap(
const Arc &a,
const Arc &b,
const poss_L_LR &pot_new_poss);
1967 preproc_fill_epm(map_am_to_do_t &am_to_do,
1969 epm_cont_t &found_epms,
1971 score_t min_allowed_score = -1);
2001 fill_epm(
const map_am_to_do_t &map_am_to_do,
2003 std::vector<score_t> &max_tol_left_up_to_pos,
2004 std::vector<const EPM *> &epms_to_insert,
2007 epm_cont_t &found_epms,
2015 print_matrices(
const Arc &a,
2024 validate_epm(
const EPM &epm_to_test)
const;
2030 validate_epm_list(epm_cont_t &found_epms)
const;
2072 score_t difference_to_opt_score_,
2074 long int max_number_of_EPMs_,
2075 bool inexact_struct_match_,
2076 score_t struct_mismatch_score_,
Represents a match of two base pairs (arc match)
Definition: arc_matches.hh:35
const Arc & arcB() const
Definition: arc_matches.hh:72
const Arc & arcA() const
Definition: arc_matches.hh:62
Maintains the relevant arc matches and their scores.
Definition: arc_matches.hh:116
Represents a base pair.
Definition: basepairs.hh:39
size_t right() const
Definition: basepairs.hh:77
size_t idx() const
Definition: basepairs.hh:87
size_t left() const
Definition: basepairs.hh:67
Describes sequence and structure ensemble of an RNA.
Definition: basepairs.hh:108
a class for the representation of exact pattern matches (EPM)
Definition: exact_matcher.hh:809
PairArcIdxVec::const_iterator am_end() const
Definition: exact_matcher.hh:1031
SparseTraceController::pair_seqpos_t pair_seqpos_t
pair of positions in sequence A and B
Definition: exact_matcher.hh:819
void sort_am_to_do()
Definition: exact_matcher.hh:1167
pat_vec_t::const_iterator begin() const
Definition: exact_matcher.hh:1060
SparseTraceController::matpos_t matpos_t
a type for a position in a sparsified matrix
Definition: exact_matcher.hh:816
void add(seqpos_t posA, seqpos_t posB, char c)
Definition: exact_matcher.hh:1093
el_pat_vec pat_vec_at(pat_vec_t::size_type idx) const
Definition: exact_matcher.hh:1041
void print_epm(std::ostream &out, bool verbose) const
Definition: exact_matcher.hh:1216
void store_am(const Arc &a, const Arc &b)
Definition: exact_matcher.hh:1135
bool get_first_insertion() const
returns whether it is the first insertion into the EPM
Definition: exact_matcher.hh:921
score_t get_score() const
returns the score of the EPM
Definition: exact_matcher.hh:897
EPM()
Constructor.
Definition: exact_matcher.hh:881
void set_max_tol_left(score_t tol)
Definition: exact_matcher.hh:969
std::pair< ArcIdx, ArcIdx > PairArcIdx
pair of arc indices
Definition: exact_matcher.hh:820
void sort_patVec()
Definition: exact_matcher.hh:1159
SparsificationMapper::seq_pos_t seqpos_t
a type for a sequence position
Definition: exact_matcher.hh:814
void set_first_insertion(bool first_insertion_)
Definition: exact_matcher.hh:978
bool includes_am(const EPM &epm_to_test) const
Definition: exact_matcher.hh:1205
triple< seqpos_t, seqpos_t, char > el_pat_vec
Definition: exact_matcher.hh:826
void add_am(const Arc &a, const Arc &b)
Definition: exact_matcher.hh:1124
void overwrite(seqpos_t posA, seqpos_t posB, char c, pat_vec_t::size_type pos)
Definition: exact_matcher.hh:1108
bool is_invalid() const
Definition: exact_matcher.hh:929
BasePairs__Arc Arc
arc class of BasePairs
Definition: exact_matcher.hh:811
void set_cur_pos(const matpos_t &cur_pos_)
Definition: exact_matcher.hh:960
const matpos_t & get_cur_pos() const
returns the current matrix position of the EPM
Definition: exact_matcher.hh:909
void clear_am_to_do()
deletes the list am_to_do
Definition: exact_matcher.hh:1011
void set_state(int state_)
Definition: exact_matcher.hh:951
pair_seqpos_t last_matched_pos()
Definition: exact_matcher.hh:1078
pat_vec_t::size_type pat_vec_size() const
Definition: exact_matcher.hh:1051
SparsificationMapper::ArcIdx ArcIdx
arc index
Definition: exact_matcher.hh:817
pat_vec_t::const_iterator end() const
Definition: exact_matcher.hh:1069
const PairArcIdx & get_am(PairArcIdxVec::size_type idx) const
Definition: exact_matcher.hh:995
std::vector< PairArcIdx > PairArcIdxVec
a vector of pairs of arc indices
Definition: exact_matcher.hh:822
virtual ~EPM()
destructor
Definition: exact_matcher.hh:889
int get_state() const
return the current matrix state of the EPM
Definition: exact_matcher.hh:903
void set_invalid()
sets the flag invalid for the EPM
Definition: exact_matcher.hh:984
PairArcIdx next_arcmatch()
Definition: exact_matcher.hh:1146
void insert_epm(const EPM &epm_to_insert)
Definition: exact_matcher.hh:1177
void set_score(score_t score_)
Definition: exact_matcher.hh:942
const score_t & get_max_tol_left() const
returns the maximal tolerance that is left for the EPM
Definition: exact_matcher.hh:915
PairArcIdxVec::size_type number_of_am()
Definition: exact_matcher.hh:1005
std::vector< el_pat_vec > pat_vec_t
type for pattern vector
Definition: exact_matcher.hh:828
bool includes(const EPM &epm_to_test) const
Definition: exact_matcher.hh:1189
PairArcIdxVec::const_iterator am_begin() const
Definition: exact_matcher.hh:1021
Computes exact pattern matchings (EPM) between two RNA sequences.
Definition: exact_matcher.hh:1312
void compute_arcmatch_score()
Definition: exact_matcher.cc:227
ExactMatcher(const Sequence &seqA_, const Sequence &seqB_, const RnaData &rna_dataA_, const RnaData &rna_dataB_, const ArcMatches &arc_matches_, const SparseTraceController &sparse_trace_controller_, PatternPairMap &foundEPMs_, int alpha_1_, int alpha_2_, int alpha_3_, score_t difference_to_opt_score_, score_t min_score_, long int max_number_of_EPMs_, bool inexact_struct_match_, score_t struct_mismatch_score_, bool apply_filter_, bool verbose_)
Constructor.
Definition: exact_matcher.cc:10
void test_arcmatch_score()
for debugging
Definition: exact_matcher.cc:261
void trace_EPMs(bool suboptimal)
computes the traceback and traces all EPMs
Definition: exact_matcher.cc:589
Definition: infty_int.hh:325
computes the best chain of EPMs, the LCS-EPM
Definition: exact_matcher.hh:412
void output_locarna(const std::string &sequenceA, const std::string &sequenceB, const std::string &outfile)
outputs anchor constraints to be used as input for locarna
Definition: exact_matcher.cc:3119
LCSEPM(const Sequence &seqA_, const Sequence &seqB_, const PatternPairMap &myPatterns, PatternPairMap &myLCSEPM)
Definition: exact_matcher.hh:421
virtual ~LCSEPM()
Destructor.
Definition: exact_matcher.cc:2591
void calculateLCSEPM(bool quiet)
calculates the best chain of EPMs, the LCS-EPM
Definition: exact_matcher.cc:2599
std::pair< SequenceAnnotation, SequenceAnnotation > anchor_annotation()
get anchor annotation
Definition: exact_matcher.cc:3032
void output_clustal(const std::string &outfile_name)
writes chain as clustal alignment
Definition: exact_matcher.cc:3146
void MapToPS(const std::string &sequenceA, const std::string &sequenceB, PatternPairMap &myMap, const std::string &file1, const std::string &file2)
output chained EPMs to PS files
Definition: exact_matcher.cc:2945
pos_type length() const
Length of multiple aligment.
Definition: multiple_alignment.hh:561
manage a set of EPMs (PatternPair)
Definition: exact_matcher.hh:258
const patListTYPE & getList() const
Definition: exact_matcher.cc:2530
PatternPair * SelfValuePTR
pointer to PatternPair
Definition: exact_matcher.hh:261
const PatternPair & getPatternPair(const std::string &id) const
gets the PatternPair with the Id id
Definition: exact_matcher.cc:2520
PatternPair selfValueTYPE
PatternPair.
Definition: exact_matcher.hh:260
int getMapBases()
computes the number of mapped bases
Definition: exact_matcher.cc:2549
std::unordered_map< std::string, SelfValuePTR > PatternIdMapTYPE
map type patternId -> pointer to PatternPair
Definition: exact_matcher.hh:275
patListTYPE::const_iterator patListCITER
const iterator for the list of PatternPairs
Definition: exact_matcher.hh:273
orderedMapTYPE::const_iterator orderedMapCITER
const iterator for the map
Definition: exact_matcher.hh:266
virtual ~PatternPairMap()
Destructor.
Definition: exact_matcher.cc:2470
const int getMinPatternSize() const
Definition: exact_matcher.hh:387
PatternPairMap()
Contructor.
Definition: exact_matcher.cc:2463
orderedMapTYPE::iterator orderedMapITER
iterator for the map
Definition: exact_matcher.hh:268
std::multimap< int, SelfValuePTR, std::greater< int > > orderedMapTYPE
ordered map type
Definition: exact_matcher.hh:264
const int size() const
Definition: exact_matcher.cc:2544
orderedMapTYPE & getOrderedMap2()
Definition: exact_matcher.cc:2539
int getMapEPMScore()
computes the score of the list of PatternPairs patternList
Definition: exact_matcher.cc:2558
std::vector< std::unique_ptr< selfValueTYPE > > patListTYPE
list of patternPairs
Definition: exact_matcher.hh:269
const orderedMapTYPE & getOrderedMap() const
Definition: exact_matcher.cc:2534
void makeOrderedMap()
creates the ordered Map
Definition: exact_matcher.cc:2498
patListTYPE::iterator patListITER
iterator for the list of PatternPairs
Definition: exact_matcher.hh:271
void add(const std::string &id, const SinglePattern &first, const SinglePattern &second, const std::string &structure, int score)
adds a PatternPair consisting of two SinglePatterns to the PatternPairMap
Definition: exact_matcher.cc:2474
const SelfValuePTR getPatternPairPTR(const std::string &id) const
gets the pointer to the PatternPair with the Id id
Definition: exact_matcher.cc:2525
is able to manage an EPM, consists of 2 singlepatterns, one in each RNA
Definition: exact_matcher.hh:105
const int & getSize() const
Definition: exact_matcher.hh:153
const std::vector< intPPair > & getInsideBounds() const
Definition: exact_matcher.hh:204
void setOutsideBounds(intPPair myPPair)
Definition: exact_matcher.cc:2439
PatternPair(const std::string &myId, const SinglePattern &myFirstPat, const SinglePattern &mySecPat, const std::string &structure_, int &score_)
Constructor.
Definition: exact_matcher.hh:117
const std::string & get_struct() const
Definition: exact_matcher.hh:238
const SinglePattern & getSecPat() const
Definition: exact_matcher.hh:171
const int getScore() const
Definition: exact_matcher.hh:220
const int getEPMScore() const
Definition: exact_matcher.hh:229
void addInsideBounds(intPPair myPPair)
adds the inside Bound myPPair
Definition: exact_matcher.cc:2444
virtual ~PatternPair()
Destructor.
Definition: exact_matcher.hh:137
const std::string & getId() const
Definition: exact_matcher.hh:144
void setEPMScore(int myScore)
Definition: exact_matcher.cc:2449
void resetBounds()
clears the insideBounds
Definition: exact_matcher.cc:2434
const intPPair getOutsideBounds() const
Definition: exact_matcher.hh:191
const SinglePattern & getFirstPat() const
Definition: exact_matcher.hh:162
represent sparsified data of RNA ensemble
Definition: rna_data.hh:44
"Sequence View" of multiple alignment as array of column vectors
Definition: sequence.hh:17
stores a Pattern in one sequence
Definition: exact_matcher.hh:48
const std::string & getmyId() const
Definition: exact_matcher.hh:73
virtual ~SinglePattern()
Destructor.
Definition: exact_matcher.hh:66
const intVec & getPat() const
Definition: exact_matcher.hh:91
SinglePattern(const std::string &myId_, const std::string &seqId_, const intVec &mySinglePattern_)
constructor
Definition: exact_matcher.hh:58
const std::string & getseqId() const
Definition: exact_matcher.hh:82
combines the TraceController with the Mapper for both sequences
Definition: exact_matcher.hh:560
SparseTraceController(const SparsificationMapper &sparse_mapperA_, const SparsificationMapper &sparse_mapperB_, const TraceController &trace_controller_)
constructor
Definition: exact_matcher.hh:589
matpos_t diag_pos_bef(index_t indexA, index_t indexB, pair_seqpos_t cur_pos_seq, index_t left_endA=std::numeric_limits< index_t >::max(), index_t left_endB=std::numeric_limits< index_t >::max()) const
computes the first valid matrix position before a sequence position considering the trace controller
Definition: exact_matcher.hh:675
pair_seqpos_t pos_in_seq(index_t idxA, index_t idxB, const matpos_t &cur_pos) const
maps the matrix position cur_pos to the corresponding pair of positions in sequence A and B
Definition: exact_matcher.hh:748
const SparsificationMapper & get_sparse_mapperA() const
destructor
Definition: exact_matcher.hh:602
bool pos_unpaired(index_t idxA, index_t idxB, matpos_t pos) const
checks whether the matrix position pos can be unpaired in both sequences
Definition: exact_matcher.hh:787
bool matching_wo_gap(index_t idxA, index_t idxB, const matpos_t &idx_pos_diag, pair_seqpos_t seq_pos_to_be_matched) const
is a EPM without a gap in between possible
Definition: exact_matcher.hh:770
bool is_valid_idx_pos(index_t idxA, index_t idxB, matpos_t mat_pos) const
checks whether a matrix position is valid
Definition: exact_matcher.hh:800
matidx_t min_col_idx(index_t indexA, index_t indexB, matidx_t idx_i, index_t left_endB=std::numeric_limits< index_t >::max()) const
minimal column of trace in a row in the sparsified matrix
Definition: exact_matcher.hh:624
std::pair< matidx_t, matidx_t > matpos_t
a type for a position in a sparsified matrix
Definition: exact_matcher.hh:571
matidx_t idx_after_max_col_idx(index_t indexA, index_t indexB, matidx_t idx_i, index_t left_endB=std::numeric_limits< index_t >::max()) const
index after maximal column of trace in a row in the sparsified matrix
Definition: exact_matcher.hh:646
const SparsificationMapper & get_sparse_mapperB() const
returns reference to sparsification mapper for sequence B
Definition: exact_matcher.hh:608
std::pair< seqpos_t, seqpos_t > pair_seqpos_t
a type for a pair of positions in the sequences
Definition: exact_matcher.hh:573
Represents the mapping for sparsification.
Definition: sparsification_mapper.hh:30
seq_pos_t get_pos_in_seq_new(index_t idx, matidx_t pos) const
Definition: sparsification_mapper.hh:225
std::vector< ArcIdx > ArcIdxVec
vector of arc indices
Definition: sparsification_mapper.hh:34
matidx_t first_valid_mat_pos_before(index_t index, seq_pos_t pos, index_t left_end=std::numeric_limits< index_t >::max()) const
Definition: sparsification_mapper.hh:208
size_t ArcIdx
type of arc index
Definition: sparsification_mapper.hh:33
pos_type seq_pos_t
type for a sequence position
Definition: sparsification_mapper.hh:36
pos_type matidx_t
type for a matrix position
Definition: sparsification_mapper.hh:35
matidx_t idx_after_leq(index_t index, seq_pos_t max_col, index_t left_end=std::numeric_limits< index_t >::max()) const
Definition: sparsification_mapper.hh:344
size_t index_t
type for an index
Definition: sparsification_mapper.hh:38
matidx_t idx_geq(index_t index, seq_pos_t min_col, index_t left_end=std::numeric_limits< index_t >::max()) const
Definition: sparsification_mapper.hh:299
bool pos_unpaired(index_t idx, matidx_t pos) const
Definition: sparsification_mapper.hh:249
Controls the matrix cells valid for traces.
Definition: trace_controller.hh:200
bool is_valid(size_type i, size_type j) const
Is (i,j) a valid cell of the DP matrices (i.e. on some possible trace)?
Definition: trace_controller.hh:329
size_t max_col(size_t i) const
Maximal column of trace in a row.
Definition: trace_controller.hh:135
size_t min_col(size_t i) const
Minimal column of trace in a row.
Definition: trace_controller.hh:123
Represents a 5-tuple.
Definition: tuples.hh:64
Represents a 3-tuple.
Definition: tuples.hh:17
T3 third
third value
Definition: tuples.hh:19
Definition: aligner.cc:15
size_type pos_type
type of a sequence position
Definition: aux.hh:126
std::ostream & operator<<(std::ostream &out, const AlignerRestriction &r)
Definition: aligner_restriction.hh:135
TaintedInftyInt max(const TaintedInftyInt &x, const TaintedInftyInt &y)
Definition: infty_int.hh:567
T1 max4(const T1 &first, const T1 &second, const T1 &third, const T1 &fourth)
Definition: exact_matcher.hh:1293
bool operator<(const EPM &epm1, const EPM &epm2)
Definition: exact_matcher.hh:1254
size_t size_type
general size type
Definition: aux.hh:120
InftyInt infty_score_t
Definition: scoring_fwd.hh:17
long int score_t
type of the locarna score as defined by the class Scoring
Definition: scoring_fwd.hh:13
T1 max3(const T1 &first, const T1 &second, const T1 &third)
Definition: exact_matcher.hh:1279