LocARNA-2.0.0
exact_matcher.hh
1 #ifndef EXACT_MATCHER_HH
2 #define EXACT_MATCHER_HH
3 
4 #ifdef HAVE_CONFIG_H
5 #include <config.h>
6 #endif
7 
8 #include <algorithm>
9 #include <iostream>
10 #include <iterator>
11 #include <list>
12 #include <limits>
13 #include <sstream>
14 #include <unordered_map>
15 
16 #include "aux.hh"
17 #include "ext_rna_data.hh"
18 #include "scoring.hh"
19 #include "sparsification_mapper.hh"
20 #include "trace_controller.hh"
21 #include "tuples.hh"
22 
23 extern "C" {
24 #include <ViennaRNA/fold_vars.h>
25 #include <ViennaRNA/utils.h>
26 #include <ViennaRNA/PS_dot.h>
27 #include <ViennaRNA/fold.h>
28 int
29 PS_rna_plot(char *string, char *structure, char *file);
30 int
31 PS_rna_plot_a(char *string, char *structure, char *file, char *pre, char *post);
32 float
33 fold(const char *sequence, char *structure);
34 }
35 
36 namespace LocARNA {
37 
38  typedef size_t size_type;
39  typedef std::vector<unsigned int> intVec;
40  typedef std::pair<unsigned int, unsigned int> intPair;
41  typedef std::pair<intPair, intPair> intPPair;
42  typedef const intPPair *intPPairPTR;
43  typedef std::vector<intPPair>::const_iterator IntPPairCITER;
44 
48  class SinglePattern {
49  public:
50  SinglePattern(){};
51 
58  SinglePattern(const std::string &myId_,
59  const std::string &seqId_,
60  const intVec &mySinglePattern_)
61  : myId(myId_), seqId(seqId_), pattern(mySinglePattern_){};
62 
66  virtual ~SinglePattern() { pattern.clear(); };
67 
72  const std::string &
73  getmyId() const {
74  return myId;
75  };
76 
81  const std::string &
82  getseqId() const {
83  return seqId;
84  };
85 
90  const intVec &
91  getPat() const {
92  return pattern;
93  };
94 
95  private:
96  std::string myId;
97  std::string seqId;
98  intVec pattern;
99  };
100 
105  class PatternPair {
106  public:
107  PatternPair(){};
108 
117  PatternPair(const std::string &myId,
118  const SinglePattern &myFirstPat,
119  const SinglePattern &mySecPat,
120  const std::string &structure_,
121  int &score_)
122  : id(myId),
123  first(myFirstPat),
124  second(mySecPat),
125  structure(structure_),
126  EPMscore(score_) {
127  if (first.getPat().size() != second.getPat().size()) {
128  std::cerr << "Error! PatternPair cannot be constructed due to "
129  "different sizes of SinglePatterns!"
130  << std::endl;
131  }
132  score = EPMscore;
133  size = first.getPat().size();
134  };
135 
137  virtual ~PatternPair() { insideBounds.clear(); };
138 
143  const std::string &
144  getId() const {
145  return id;
146  };
147 
152  const int &
153  getSize() const {
154  return size;
155  };
156 
161  const SinglePattern &
162  getFirstPat() const {
163  return first;
164  };
165 
170  const SinglePattern &
171  getSecPat() const {
172  return second;
173  };
174 
176  void
177  resetBounds();
178 
183  void
184  setOutsideBounds(intPPair myPPair);
185 
190  const intPPair
192  return outsideBounds;
193  };
194 
196  void
197  addInsideBounds(intPPair myPPair);
198 
203  const std::vector<intPPair> &
204  getInsideBounds() const {
205  return insideBounds;
206  };
207 
212  void
213  setEPMScore(int myScore);
214 
219  const int
220  getScore() const {
221  return score;
222  };
223 
228  const int
229  getEPMScore() const {
230  return EPMscore;
231  };
232 
237  const std::string &
238  get_struct() const {
239  return structure;
240  };
241 
242  private:
243  std::string id;
244  int size;
245  SinglePattern first;
246  SinglePattern second;
247 
248  std::string structure;
249  int score;
250  int EPMscore;
251  std::vector<intPPair> insideBounds;
252  intPPair outsideBounds;
253  };
254 
259  public:
262 
263  typedef std::multimap<int, SelfValuePTR, std::greater<int> >
265  typedef orderedMapTYPE::const_iterator
267  typedef orderedMapTYPE::iterator
269  typedef std::vector<std::unique_ptr<selfValueTYPE>> patListTYPE;
270  typedef patListTYPE::iterator
272  typedef patListTYPE::const_iterator
274  typedef std::unordered_map<std::string, SelfValuePTR>
276 
278  PatternPairMap();
279 
280  // //! Copy Constructor
281  // //! @param myPairMap PatternPairMap
282  // PatternPairMap(const PatternPairMap &myPairMap)
283  // : patternList(myPairMap.patternList),
284  // patternOrderedMap(myPairMap.patternOrderedMap),
285  // idMap(myPairMap.idMap) {
286  // minPatternSize = 100000;
287  // };
288  PatternPairMap(const PatternPairMap &myPairMap) = delete;
289 
291  virtual ~PatternPairMap();
292 
302  void
303  add(const std::string &id,
304  const SinglePattern &first,
305  const SinglePattern &second,
306  const std::string &structure,
307  int score);
308 
313  void
314  add(const SelfValuePTR value);
315 
317  void
318  makeOrderedMap();
319 
320  // //! updates the PatternPairMap from the ordered Map
321  // void
322  // updateFromMap();
323 
329  const PatternPair &
330  getPatternPair(const std::string &id) const;
331 
337  const SelfValuePTR
338  getPatternPairPTR(const std::string &id) const;
339 
344  const patListTYPE &
345  getList() const;
346 
351  const orderedMapTYPE &
352  getOrderedMap() const;
353 
359  getOrderedMap2();
360 
365  const int
366  size() const;
367 
372  int
373  getMapBases();
374 
379  int
380  getMapEPMScore();
381 
386  const int
388  return minPatternSize;
389  };
390 
391  private:
392  patListTYPE patternList;
393  orderedMapTYPE patternOrderedMap;
395  PatternIdMapTYPE idMap;
396  int minPatternSize;
397  };
398 
405  std::ostream &
406  operator<<(std::ostream &out,
407  const PatternPairMap::patListTYPE &pat_pair_map);
408 
412  class LCSEPM {
413  public:
421  LCSEPM(const Sequence &seqA_,
422  const Sequence &seqB_,
423  const PatternPairMap &myPatterns,
424  PatternPairMap &myLCSEPM)
425 
426  : seqA(seqA_),
427  seqB(seqB_),
428  matchedEPMs(myLCSEPM),
429  patterns(myPatterns){};
430 
432  virtual ~LCSEPM();
433 
442  void
443  MapToPS(const std::string &sequenceA,
444  const std::string &sequenceB,
445  PatternPairMap &myMap,
446  const std::string &file1,
447  const std::string &file2);
448 
450  void
451  calculateLCSEPM(bool quiet);
452 
454  std::pair<SequenceAnnotation, SequenceAnnotation>
456 
458  void
459  output_locarna(const std::string &sequenceA,
460  const std::string &sequenceB,
461  const std::string &outfile);
462 
464  void
465  output_clustal(const std::string &outfile_name);
466 
467  private:
468  struct HoleCompare2 {
469  bool
470  operator()(const intPPairPTR &h1, const intPPairPTR &h2) const {
471  // first compare size of holes
472  if (h1->first.second - h1->first.first - 1 <
473  h2->first.second - h2->first.first - 1) {
474  return true;
475  }
476  // compare if holes are identical in both structures
477  if (h1->first.second - h1->first.first - 1 ==
478  h2->first.second - h2->first.first - 1) {
479  if ((h1->first.first == h2->first.first) &&
480  (h1->first.second == h2->first.second) &&
481  (h1->second.first == h2->second.first) &&
482  (h1->second.second == h2->second.second)) {
483  return true;
484  }
485  }
486 
487  return false;
488  }
489  };
490 
491  typedef std::multimap<intPPairPTR,
493  HoleCompare2>
494  HoleOrderingMapTYPE2;
495  typedef HoleOrderingMapTYPE2::const_iterator HoleMapCITER2;
496 
497  void
498  preProcessing();
499  void
500  calculateHoles3(bool quiet);
501  void
502  calculatePatternBoundaries(PatternPair *myPair);
503  void
504  calculateTraceback2(const int i,
505  const int j,
506  const int k,
507  const int l,
508  std::vector<std::vector<int> > holeVec);
509  int
510  D_rec2(const int &i,
511  const int &j,
512  const int &k,
513  const int &l,
514  std::vector<std::vector<int> > &D_h,
515  const bool debug);
516 
517  int
518  max3(int a, int b, int c) {
519  int tmp = a > b ? a : b;
520  return (tmp > c ? tmp : c);
521  };
522 
524  char *
525  getStructure(PatternPairMap &myMap, bool firstSeq, int length);
526 
527  std::string
528  intvec2str(const std::vector<unsigned int> &V,
529  const std::string &delim) {
530  std::stringstream oss;
531  copy(V.begin(), V.end(),
532  std::ostream_iterator<unsigned int>(oss, delim.c_str()));
533  std::string tmpstr;
534  tmpstr = oss.str();
535  if (tmpstr.length() > 0)
536  tmpstr.erase(tmpstr.end() - 1);
537  return tmpstr;
538  }
539 
540  std::string
541  upperCase(const std::string &seq) {
542  std::string s = "";
543  for (unsigned int i = 0; i < seq.length(); i++)
544  s += toupper(seq[i]);
545  return s;
546  }
547 
548  std::vector<std::vector<std::vector<PatternPairMap::SelfValuePTR> > >
549  EPM_Table2;
550  HoleOrderingMapTYPE2 holeOrdering2;
551  const Sequence &seqA;
552  const Sequence &seqB;
553  PatternPairMap &matchedEPMs;
554  const PatternPairMap &patterns;
555  };
556 
561  private:
563  matidx_t;
565  seqpos_t;
567  index_t;
568 
569  public:
570  typedef std::pair<matidx_t, matidx_t>
572  typedef std::pair<seqpos_t, seqpos_t>
574 
575  private:
577  &sparse_mapperA;
579  &sparse_mapperB;
580 
581  public:
590  const SparsificationMapper &sparse_mapperB_,
591  const TraceController &trace_controller_)
592  : TraceController::TraceController(trace_controller_),
593  sparse_mapperA(sparse_mapperA_),
594  sparse_mapperB(sparse_mapperB_)
595 
596  {}
597 
598  virtual ~SparseTraceController(){};
599 
601  const SparsificationMapper &
603  return sparse_mapperA;
604  }
605 
607  const SparsificationMapper &
609  return sparse_mapperB;
610  }
611 
623  matidx_t
625  index_t indexA,
626  index_t indexB,
627  matidx_t idx_i,
628  index_t left_endB = std::numeric_limits<index_t>::max()) const {
629  seqpos_t i = sparse_mapperA.get_pos_in_seq_new(indexA, idx_i);
630  return sparse_mapperB.idx_geq(indexB, min_col(i), left_endB);
631  }
632 
645  matidx_t
647  index_t indexA,
648  index_t indexB,
649  matidx_t idx_i,
650  index_t left_endB = std::numeric_limits<index_t>::max()) const {
651  seqpos_t i = sparse_mapperA.get_pos_in_seq_new(indexA, idx_i);
652  return sparse_mapperB.idx_after_leq(indexB, max_col(i), left_endB);
653  }
654 
674  matpos_t
676  index_t indexA,
677  index_t indexB,
678  pair_seqpos_t cur_pos_seq,
679  index_t left_endA = std::numeric_limits<index_t>::max(),
680  index_t left_endB = std::numeric_limits<index_t>::max()) const {
681  bool debug_valid_mat_pos = false;
682 
683  if (debug_valid_mat_pos)
684  std::cout << "first valid mat pos before with tc " << std::endl;
685 
686  seqpos_t i = cur_pos_seq.first;
687  seqpos_t j = cur_pos_seq.second;
688 
689  matidx_t idx_after_max_col;
690 
691  // find valid matrix position based on the SparsificationMapper
692  matidx_t cur_row =
693  sparse_mapperA.first_valid_mat_pos_before(indexA, i, left_endA);
694  matidx_t col_before =
695  sparse_mapperB.first_valid_mat_pos_before(indexB, j, left_endB);
696 
697  // bool valid_pos_found = false;
698 
699  // find a valid position that is valid also based on the
700  // TraceController
701  // go through the rows and find an interval that includes the column
702  // col_before or lies
703  // before the column col_before
704  for (;; --cur_row) {
705  matidx_t min_col =
706  min_col_idx(indexA, indexB, cur_row, left_endB);
707  idx_after_max_col =
708  idx_after_max_col_idx(indexA, indexB, cur_row, left_endB);
709 
710  if (debug_valid_mat_pos)
711  std::cout << "interval " << min_col << ","
712  << idx_after_max_col << std::endl;
713 
714  // valid interval found
715  if (min_col < idx_after_max_col && min_col <= col_before) {
716  // valid_pos_found=true;
717  break;
718  }
719 
720  if (cur_row == 0) {
721  break;
722  }
723  }
724 
725  // assert(valid_pos_found);
726  assert(idx_after_max_col > 0);
727 
728  matidx_t max_col = idx_after_max_col - 1;
729 
730  // the column of the new position is the col_before or lies before
731  // it
732  matpos_t result = matpos_t(cur_row, std::min(max_col, col_before));
733 
734  assert(is_valid_idx_pos(indexA, indexB, result));
735 
736  return result;
737  }
738 
748  pos_in_seq(index_t idxA,
749  index_t idxB, // const Arc &a, const Arc &b,
750  const matpos_t &cur_pos) const {
751  return pair_seqpos_t(
752  sparse_mapperA.get_pos_in_seq_new(idxA, cur_pos.first),
753  sparse_mapperB.get_pos_in_seq_new(idxB, cur_pos.second));
754  }
755 
769  bool
770  matching_wo_gap(index_t idxA,
771  index_t idxB,
772  const matpos_t &idx_pos_diag,
773  pair_seqpos_t seq_pos_to_be_matched) const {
774  pair_seqpos_t pos_diag = pos_in_seq(idxA, idxB, idx_pos_diag);
775  return (pos_diag.first + 1 == seq_pos_to_be_matched.first) &&
776  (pos_diag.second + 1 == seq_pos_to_be_matched.second);
777  }
778 
786  bool
787  pos_unpaired(index_t idxA, index_t idxB, matpos_t pos) const {
788  return sparse_mapperA.pos_unpaired(idxA, pos.first) &&
789  sparse_mapperB.pos_unpaired(idxB, pos.second);
790  }
791 
799  bool
800  is_valid_idx_pos(index_t idxA, index_t idxB, matpos_t mat_pos) const {
801  pair_seqpos_t seq_pos = pos_in_seq(idxA, idxB, mat_pos);
802  return is_valid(seq_pos.first, seq_pos.second);
803  }
804  };
805 
809  class EPM {
810  public:
811  typedef BasePairs__Arc Arc;
812 
820  typedef std::pair<ArcIdx, ArcIdx> PairArcIdx;
821  typedef std::vector<PairArcIdx>
823 
827 
828  typedef std::vector<el_pat_vec> pat_vec_t;
829 
830  private:
831  pat_vec_t pat_vec;
832 
833  score_t score;
834  int state;
836  matpos_t cur_pos;
838  score_t max_tol_left;
840  bool first_insertion;
842  bool invalid;
846 
847  PairArcIdxVec am_to_do;
849 
851  struct compare_el_pat_vec {
852  public:
853  bool
854  operator()(const EPM::el_pat_vec &el1,
855  const EPM::el_pat_vec &el2) const {
856  seqpos_t el1_pos1 = el1.first;
857  seqpos_t el1_pos2 = el1.second;
858  seqpos_t el2_pos1 = el2.first;
859  seqpos_t el2_pos2 = el2.second;
860  char el1_struc = el1.third;
861  char el2_struc = el2.third;
862  return (el1_pos1 < el2_pos1) ||
863  (el1_pos1 == el2_pos1 && el1_pos2 < el2_pos2) ||
864  (el1_pos1 == el2_pos1 && el1_pos2 == el2_pos2 &&
865  el1_struc < el2_struc);
866  }
867  };
868 
869  struct compare_el_am_to_do {
870  public:
871  bool
872  operator()(const EPM::PairArcIdx &el1,
873  const EPM::PairArcIdx &el2) const {
874  return (el1.first < el2.first) ||
875  ((el1.first == el2.first) && el1.second < el2.second);
876  }
877  };
878 
879  public:
881  EPM()
882  : score(0),
883  state(0),
884  cur_pos(matpos_t(0, 0)),
885  max_tol_left(0),
886  first_insertion(true),
887  invalid(false) {}
888 
889  virtual ~EPM() {}
890 
891  //-----------------------------------------------------------------------
892  // getter methods
893  //-----------------------------------------------------------------------
894 
896  score_t
897  get_score() const {
898  return score;
899  }
900 
902  int
903  get_state() const {
904  return state;
905  }
906 
908  const matpos_t &
909  get_cur_pos() const {
910  return cur_pos;
911  }
912 
914  const score_t &
916  return max_tol_left;
917  }
918 
920  bool
922  return first_insertion;
923  }
924 
928  bool
929  is_invalid() const {
930  return invalid;
931  }
932 
933  //-----------------------------------------------------------------------
934  // setter methods
935  //-----------------------------------------------------------------------
936 
941  void
942  set_score(score_t score_) {
943  score = score_;
944  }
945 
950  void
951  set_state(int state_) {
952  state = state_;
953  }
954 
959  void
960  set_cur_pos(const matpos_t &cur_pos_) {
961  cur_pos = cur_pos_;
962  }
963 
968  void
970  max_tol_left = tol;
971  }
972 
977  void
978  set_first_insertion(bool first_insertion_) {
979  first_insertion = first_insertion_;
980  }
981 
983  void
985  invalid = true;
986  }
987 
994  const PairArcIdx &
995  get_am(PairArcIdxVec::size_type idx) const {
996  assert(idx < am_to_do.size());
997  return am_to_do[idx];
998  }
999 
1004  PairArcIdxVec::size_type
1006  return am_to_do.size();
1007  }
1008 
1010  void
1012  am_to_do.clear();
1013  }
1014 
1020  PairArcIdxVec::const_iterator
1021  am_begin() const {
1022  return am_to_do.begin();
1023  }
1024 
1030  PairArcIdxVec::const_iterator
1031  am_end() const {
1032  return am_to_do.end();
1033  }
1034 
1040  el_pat_vec
1041  pat_vec_at(pat_vec_t::size_type idx) const {
1042  assert(idx < pat_vec.size());
1043  return pat_vec[idx];
1044  }
1045 
1050  pat_vec_t::size_type
1051  pat_vec_size() const {
1052  return pat_vec.size();
1053  }
1054 
1059  pat_vec_t::const_iterator
1060  begin() const {
1061  return pat_vec.begin();
1062  }
1063 
1068  pat_vec_t::const_iterator
1069  end() const {
1070  return pat_vec.end();
1071  }
1072 
1079  assert(!pat_vec.empty());
1080  return pair_seqpos_t(pat_vec.back().first, pat_vec.back().second);
1081  }
1082 
1092  void
1093  add(seqpos_t posA, seqpos_t posB, char c) {
1094  pat_vec.push_back(el_pat_vec(posA, posB, c));
1095  }
1096 
1107  void
1109  seqpos_t posB,
1110  char c,
1111  pat_vec_t::size_type pos) {
1112  if (pat_vec.size() <= pos) {
1113  pat_vec.push_back(el_pat_vec(posA, posB, c));
1114  }
1115  pat_vec.at(pos) = el_pat_vec(posA, posB, c);
1116  }
1117 
1123  void
1124  add_am(const Arc &a, const Arc &b) {
1125  add(a.right(), b.right(), ')');
1126  add(a.left(), b.left(), '(');
1127  }
1128 
1134  void
1135  store_am(const Arc &a, const Arc &b) {
1136  const PairArcIdx &pair_arc_idx = PairArcIdx(a.idx(), b.idx());
1137  // store the pair of arc indices in the am_to_do datastructure
1138  am_to_do.push_back(pair_arc_idx);
1139  }
1140 
1145  PairArcIdx
1147  PairArcIdx arc_idx = am_to_do.back();
1148  am_to_do.pop_back();
1149  return arc_idx;
1150  }
1151 
1158  void
1160  sort(pat_vec.begin(), pat_vec.end(), compare_el_pat_vec());
1161  }
1162 
1166  void
1168  sort(am_to_do.begin(), am_to_do.end(), compare_el_am_to_do());
1169  }
1170 
1176  void
1177  insert_epm(const EPM &epm_to_insert) {
1178  pat_vec.insert(pat_vec.end(), epm_to_insert.begin(),
1179  epm_to_insert.end());
1180  }
1181 
1188  bool
1189  includes(const EPM &epm_to_test) const {
1190  assert(pat_vec_size() >= epm_to_test.pat_vec_size());
1191  return std::includes(this->begin(), this->end(),
1192  epm_to_test.begin(), epm_to_test.end(),
1193  compare_el_pat_vec());
1194  }
1195 
1204  bool
1205  includes_am(const EPM &epm_to_test) const {
1206  return std::includes(am_begin(), am_end(), epm_to_test.am_begin(),
1207  epm_to_test.am_end(), compare_el_am_to_do());
1208  }
1209 
1215  void
1216  print_epm(std::ostream &out, bool verbose) const {
1217  out << "_________________________________________________"
1218  << std::endl;
1219  out << "epm with score " << this->score << std::endl;
1220  out << " ";
1221  for (pat_vec_t::const_iterator it = pat_vec.begin();
1222  it != pat_vec.end(); ++it) {
1223  out << it->first << ":" << it->second << " ";
1224  }
1225  out << std::endl;
1226  out << " ";
1227  for (pat_vec_t::const_iterator it = pat_vec.begin();
1228  it != pat_vec.end(); ++it) {
1229  out << it->third;
1230  }
1231  out << std::endl;
1232  out << "am_to_do " << am_to_do << std::endl;
1233  out << "tolerance left " << this->max_tol_left << std::endl;
1234  if (verbose) {
1235  out << "score " << score << std::endl;
1236  out << "pos " << this->cur_pos.first << ","
1237  << this->cur_pos.second << std::endl;
1238  out << "state " << this->state << std::endl;
1239  }
1240  out << "______________________________________________________"
1241  << std::endl;
1242  }
1243  };
1244 
1253  inline bool
1254  operator<(const EPM &epm1, const EPM &epm2) {
1255  return epm1.get_max_tol_left() > epm2.get_max_tol_left();
1256  }
1257 
1264  inline std::ostream &
1265  operator<<(std::ostream &out, const EPM &epm) {
1266  epm.print_epm(out, false);
1267  return out;
1268  }
1269 
1277  template <class T1>
1278  T1
1279  max3(const T1 &first, const T1 &second, const T1 &third) {
1280  return max(max(first, second), third);
1281  }
1282 
1291  template <class T1>
1292  T1
1293  max4(const T1 &first, const T1 &second, const T1 &third, const T1 &fourth) {
1294  return max(max3(first, second, third), fourth);
1295  }
1296 
1313  typedef BasePairs__Arc Arc;
1314 
1315  typedef SparsificationMapper::ArcIdx ArcIdx;
1317  ArcIdxVec;
1319  matidx_t;
1321  seqpos_t;
1322  typedef SparsificationMapper::index_t index_t;
1327  matpos_t;
1329  pair_seqpos_t;
1330 
1331  typedef EPM::PairArcIdx PairArcIdx;
1332  typedef EPM::PairArcIdxVec
1333  PairArcIdxVec;
1334 
1335  typedef std::list<EPM>
1336  epm_cont_t;
1337  typedef epm_cont_t::iterator epm_it_t;
1338  typedef std::pair<score_t, epm_cont_t> el_map_am_to_do_t;
1344 
1347  typedef std::unordered_map<PairArcIdx, el_map_am_to_do_t>
1348  map_am_to_do_t;
1349 
1350  private:
1354  //<state, max_tol, current matrix position, potential arcMatch, sequence
1355  //position to be matched>
1356  typedef quintuple<int,
1357  infty_score_t,
1358  matpos_t,
1359  PairArcIdx,
1360  pair_seqpos_t>
1361  poss_L_LR;
1362 
1363  // infty_score_t because of the check_poss, change to score_t!!!
1367  poss_in_G; //<state,max_tol,current matrix position>
1368 
1369  const Sequence &seqA;
1370  const Sequence &seqB;
1371 
1372  const RnaData &rna_dataA;
1374  const RnaData &rna_dataB;
1375 
1376  const ArcMatches
1377  &arc_matches;
1378 
1379  const BasePairs &bpsA;
1380  const BasePairs &bpsB;
1381  const SparseTraceController
1382  &sparse_trace_controller;
1384  // (valid positions in the matrix)
1385  const SparsificationMapper
1386  &sparse_mapperA;
1387  const SparsificationMapper
1388  &sparse_mapperB;
1389  PatternPairMap &foundEPMs;
1392 
1393  ScoreMatrix L;
1394  ScoreMatrix G_A;
1398  ScoreMatrix G_AB;
1400  ScoreMatrix
1401  LR;
1402  ScoreMatrix F;
1403  ScoreMatrix Dmat;
1405 
1406  int alpha_1;
1407  int alpha_2;
1408  int alpha_3;
1409 
1410  int difference_to_opt_score;
1413  // worse than the optimal score are considered
1414  int min_score;
1415  long int max_number_of_EPMs;
1417  long int cur_number_of_EPMs;
1419 
1420  bool inexact_struct_match;
1422  score_t struct_mismatch_score;
1424  bool add_filter;
1426 
1427  bool verbose;
1428 
1429  pair_seqpos_t pos_of_max;
1430 
1431  enum {
1432  in_LR,
1433  in_G_A,
1434  in_G_AB,
1435  in_L,
1436  in_F
1437  };
1438 
1439  const Arc pseudo_arcA;
1440  const Arc pseudo_arcB;
1441 
1449  const infty_score_t &
1450  D(const ArcMatch &am) const {
1451  return D(am.arcA(), am.arcB());
1452  }
1453 
1461  infty_score_t &
1462  D(const ArcMatch &am) {
1463  return D(am.arcA(), am.arcB());
1464  }
1465 
1474  const infty_score_t &
1475  D(const Arc &a, const Arc &b) const {
1476  return Dmat(a.idx(), b.idx());
1477  }
1478 
1487  infty_score_t &
1488  D(const Arc &a, const Arc &b) {
1489  return Dmat(a.idx(), b.idx());
1490  }
1491 
1492  bool
1493  nucleotide_match(seqpos_t pos_seqA, seqpos_t pos_seqB) const {
1494  assert(pos_seqA >= 1 && pos_seqA <= seqA.length() &&
1495  pos_seqB >= 1 &&
1496  pos_seqB <= seqB.length()); // seqA and seqB are 1-based
1497  return (seqA[pos_seqA] == seqB[pos_seqB]);
1498  }
1499 
1500  bool
1501  seq_matching(ArcIdx idxA,
1502  ArcIdx idxB,
1503  matpos_t cur_mat_pos,
1504  pair_seqpos_t cur_seq_pos) const {
1505  seqpos_t i = cur_seq_pos.first;
1506  seqpos_t j = cur_seq_pos.second;
1507 
1508  return sparse_trace_controller.pos_unpaired(idxA, idxB,
1509  cur_mat_pos) &&
1510  nucleotide_match(i, j);
1511  }
1512 
1513  // ----------------------------------------
1514  // fill matrices
1515 
1521  void
1522  initialize_gap_matrices();
1523 
1525  void
1526  init_Fmat();
1527 
1539  void
1540  init_mat(ScoreMatrix &mat,
1541  const Arc &a,
1542  const Arc &b,
1543  infty_score_t first_entry,
1544  infty_score_t first_col,
1545  infty_score_t first_row);
1546 
1558  pair_seqpos_t
1559  compute_LGLR(const Arc &a, const Arc &b, bool suboptimal);
1560 
1576  compute_matrix_entry(const Arc &a,
1577  const Arc &b,
1578  matpos_t mat_pos,
1579  matpos_t mat_pos_diag,
1580  bool matrixLR,
1581  bool suboptimal);
1582 
1602  seq_str_matching(const Arc &a,
1603  const Arc &b,
1604  matpos_t mat_pos_diag,
1605  pair_seqpos_t seq_pos_to_be_matched,
1606  score_t add_score,
1607  bool matrixLR,
1608  bool suboptimal);
1609 
1611  void
1612  compute_F();
1613 
1614  // --------------------------------------------
1615  // helper functions
1616 
1622  score_t
1623  score_for_seq_match();
1624 
1634  score_for_am(const Arc &a, const Arc &b) const;
1635 
1648  score_t
1649  score_for_stacking(const Arc &a,
1650  const Arc &b,
1651  const Arc &inner_a,
1652  const Arc &inner_b);
1653 
1661  void
1662  add_foundEPM(EPM &cur_epm, bool count_EPMs);
1663 
1664  bool
1665  check_PPM() {
1666  if (this->difference_to_opt_score != -1)
1667  return true; // when we use the parameter
1668  // difference_to_opt_score,
1669  // we enumerate all EPMs regardless of whether the number extends
1670  // the max_number_of_EPMs
1671  if (cur_number_of_EPMs >= max_number_of_EPMs + 1)
1672  return false;
1673  else
1674  return true;
1675  }
1676 
1687  void
1688  find_start_pos_for_tb(bool suboptimal,
1689  score_t difference_to_opt_score = -1,
1690  bool count_EPMs = false);
1691 
1692  bool
1693  check_num_EPMs() {
1694  double valid_deviation = 0.8;
1695  return (cur_number_of_EPMs >=
1696  max_number_of_EPMs * valid_deviation &&
1697  cur_number_of_EPMs <= max_number_of_EPMs);
1698  }
1699 
1700  // --------------------------------------------
1701  // heuristic traceback
1702 
1711  void
1712  trace_F_heuristic(pos_type i, pos_type j, EPM &cur_epm);
1713 
1722  void
1723  trace_LGLR_heuristic(const Arc &a, const Arc &b, EPM &cur_epm);
1724 
1743  bool
1744  trace_seq_str_matching_heuristic(const Arc &a,
1745  const Arc &b,
1746  int &state,
1747  matpos_t &cur_mat_pos,
1748  matpos_t mat_pos_diag,
1749  pair_seqpos_t seq_pos_to_be_matched,
1750  score_t add_score);
1751 
1752  // --------------------------------------------
1753  // suboptimal traceback
1754 
1766  void
1767  trace_F_suboptimal(pos_type i,
1768  pos_type j,
1769  score_t max_tol,
1770  bool recurse,
1771  bool count_EPMs);
1772 
1782  void
1783  apply_filter(epm_cont_t &found_epms);
1784 
1797  void
1798  trace_LGLR_suboptimal(const Arc &a,
1799  const Arc &b,
1800  score_t max_tol,
1801  epm_cont_t &found_epms,
1802  bool recurse,
1803  bool count_EPMs);
1804 
1830  void
1831  trace_seq_str_matching_subopt(const Arc &a,
1832  const Arc &b,
1833  score_t score_contr,
1834  matpos_t mat_pos_diag,
1835  pair_seqpos_t seq_pos_to_be_matched,
1836  const PairArcIdx &am,
1837  poss_L_LR &poss,
1838  epm_it_t cur_epm,
1839  epm_cont_t &found_epms,
1840  map_am_to_do_t &map_am_to_do,
1841  bool count_EPMs);
1842 
1862  bool
1863  check_poss(const Arc &a,
1864  const Arc &b,
1865  const poss_L_LR &pot_new_poss,
1866  poss_L_LR &poss,
1867  epm_it_t cur_epm,
1868  epm_cont_t &found_epms,
1869  map_am_to_do_t &am_to_do_for_cur_am,
1870  bool count_EPMs);
1871 
1892  void
1893  store_new_poss(const Arc &a,
1894  const Arc &b,
1895  bool last_poss,
1896  const poss_L_LR &new_poss,
1897  poss_L_LR &poss,
1898  epm_it_t cur_epm,
1899  epm_cont_t &found_epms,
1900  map_am_to_do_t &am_to_do_for_cur_am,
1901  bool count_EPMs);
1902 
1921  void
1922  trace_G_suboptimal(const Arc &a,
1923  const Arc &b,
1924  const poss_L_LR &pot_new_poss,
1925  poss_L_LR &poss,
1926  epm_it_t cur_epm,
1927  epm_cont_t &found_epms,
1928  map_am_to_do_t &map_am_to_do,
1929  bool count_EPMs);
1930 
1943  bool
1944  is_valid_gap(const Arc &a, const Arc &b, const poss_L_LR &pot_new_poss);
1945 
1966  void
1967  preproc_fill_epm(map_am_to_do_t &am_to_do,
1968  epm_it_t cur_epm,
1969  epm_cont_t &found_epms,
1970  bool count_EPMs,
1971  score_t min_allowed_score = -1);
1972 
2000  void
2001  fill_epm(const map_am_to_do_t &map_am_to_do,
2002  size_type vec_idx,
2003  std::vector<score_t> &max_tol_left_up_to_pos,
2004  std::vector<const EPM *> &epms_to_insert,
2005  score_t min_score,
2006  epm_it_t cur_epm,
2007  epm_cont_t &found_epms,
2008  bool count_EPMs);
2009 
2010  // --------------------------------------------
2011  // debugging/testing
2012 
2013  // print the matrices in the condensed form
2014  void
2015  print_matrices(const Arc &a,
2016  const Arc &b,
2017  size_type offset_A,
2018  size_type offset_B,
2019  bool suboptimal,
2020  bool add_info);
2021 
2022  // checks whether an epm is valid, i.e. only one gap per arc match etc.
2023  bool
2024  validate_epm(const EPM &epm_to_test) const;
2025 
2026  // checks the validity of the epm list, i.e. that no epm is contained in
2027  // another epm (all
2028  // epms are maximally extended)
2029  bool
2030  validate_epm_list(epm_cont_t &found_epms) const;
2031 
2032  public:
2062  ExactMatcher(const Sequence &seqA_,
2063  const Sequence &seqB_,
2064  const RnaData &rna_dataA_,
2065  const RnaData &rna_dataB_,
2066  const ArcMatches &arc_matches_,
2067  const SparseTraceController &sparse_trace_controller_,
2068  PatternPairMap &foundEPMs_,
2069  int alpha_1_,
2070  int alpha_2_,
2071  int alpha_3_,
2072  score_t difference_to_opt_score_,
2073  score_t min_score_,
2074  long int max_number_of_EPMs_,
2075  bool inexact_struct_match_,
2076  score_t struct_mismatch_score_,
2077  bool apply_filter_,
2078  bool verbose_);
2079 
2080  ~ExactMatcher();
2081 
2084  void
2086 
2088  void
2090 
2097  void
2098  trace_EPMs(bool suboptimal);
2099  };
2100 
2101 } // end namespace
2102 
2103 #endif // EXACT_MATCHER_HH
Represents a match of two base pairs (arc match)
Definition: arc_matches.hh:35
const Arc & arcB() const
Definition: arc_matches.hh:72
const Arc & arcA() const
Definition: arc_matches.hh:62
Maintains the relevant arc matches and their scores.
Definition: arc_matches.hh:116
Represents a base pair.
Definition: basepairs.hh:39
size_t right() const
Definition: basepairs.hh:77
size_t idx() const
Definition: basepairs.hh:87
size_t left() const
Definition: basepairs.hh:67
Describes sequence and structure ensemble of an RNA.
Definition: basepairs.hh:108
a class for the representation of exact pattern matches (EPM)
Definition: exact_matcher.hh:809
PairArcIdxVec::const_iterator am_end() const
Definition: exact_matcher.hh:1031
SparseTraceController::pair_seqpos_t pair_seqpos_t
pair of positions in sequence A and B
Definition: exact_matcher.hh:819
void sort_am_to_do()
Definition: exact_matcher.hh:1167
pat_vec_t::const_iterator begin() const
Definition: exact_matcher.hh:1060
SparseTraceController::matpos_t matpos_t
a type for a position in a sparsified matrix
Definition: exact_matcher.hh:816
void add(seqpos_t posA, seqpos_t posB, char c)
Definition: exact_matcher.hh:1093
el_pat_vec pat_vec_at(pat_vec_t::size_type idx) const
Definition: exact_matcher.hh:1041
void print_epm(std::ostream &out, bool verbose) const
Definition: exact_matcher.hh:1216
void store_am(const Arc &a, const Arc &b)
Definition: exact_matcher.hh:1135
bool get_first_insertion() const
returns whether it is the first insertion into the EPM
Definition: exact_matcher.hh:921
score_t get_score() const
returns the score of the EPM
Definition: exact_matcher.hh:897
EPM()
Constructor.
Definition: exact_matcher.hh:881
void set_max_tol_left(score_t tol)
Definition: exact_matcher.hh:969
std::pair< ArcIdx, ArcIdx > PairArcIdx
pair of arc indices
Definition: exact_matcher.hh:820
void sort_patVec()
Definition: exact_matcher.hh:1159
SparsificationMapper::seq_pos_t seqpos_t
a type for a sequence position
Definition: exact_matcher.hh:814
void set_first_insertion(bool first_insertion_)
Definition: exact_matcher.hh:978
bool includes_am(const EPM &epm_to_test) const
Definition: exact_matcher.hh:1205
triple< seqpos_t, seqpos_t, char > el_pat_vec
Definition: exact_matcher.hh:826
void add_am(const Arc &a, const Arc &b)
Definition: exact_matcher.hh:1124
void overwrite(seqpos_t posA, seqpos_t posB, char c, pat_vec_t::size_type pos)
Definition: exact_matcher.hh:1108
bool is_invalid() const
Definition: exact_matcher.hh:929
BasePairs__Arc Arc
arc class of BasePairs
Definition: exact_matcher.hh:811
void set_cur_pos(const matpos_t &cur_pos_)
Definition: exact_matcher.hh:960
const matpos_t & get_cur_pos() const
returns the current matrix position of the EPM
Definition: exact_matcher.hh:909
void clear_am_to_do()
deletes the list am_to_do
Definition: exact_matcher.hh:1011
void set_state(int state_)
Definition: exact_matcher.hh:951
pair_seqpos_t last_matched_pos()
Definition: exact_matcher.hh:1078
pat_vec_t::size_type pat_vec_size() const
Definition: exact_matcher.hh:1051
SparsificationMapper::ArcIdx ArcIdx
arc index
Definition: exact_matcher.hh:817
pat_vec_t::const_iterator end() const
Definition: exact_matcher.hh:1069
const PairArcIdx & get_am(PairArcIdxVec::size_type idx) const
Definition: exact_matcher.hh:995
std::vector< PairArcIdx > PairArcIdxVec
a vector of pairs of arc indices
Definition: exact_matcher.hh:822
virtual ~EPM()
destructor
Definition: exact_matcher.hh:889
int get_state() const
return the current matrix state of the EPM
Definition: exact_matcher.hh:903
void set_invalid()
sets the flag invalid for the EPM
Definition: exact_matcher.hh:984
PairArcIdx next_arcmatch()
Definition: exact_matcher.hh:1146
void insert_epm(const EPM &epm_to_insert)
Definition: exact_matcher.hh:1177
void set_score(score_t score_)
Definition: exact_matcher.hh:942
const score_t & get_max_tol_left() const
returns the maximal tolerance that is left for the EPM
Definition: exact_matcher.hh:915
PairArcIdxVec::size_type number_of_am()
Definition: exact_matcher.hh:1005
std::vector< el_pat_vec > pat_vec_t
type for pattern vector
Definition: exact_matcher.hh:828
bool includes(const EPM &epm_to_test) const
Definition: exact_matcher.hh:1189
PairArcIdxVec::const_iterator am_begin() const
Definition: exact_matcher.hh:1021
Computes exact pattern matchings (EPM) between two RNA sequences.
Definition: exact_matcher.hh:1312
void compute_arcmatch_score()
Definition: exact_matcher.cc:227
ExactMatcher(const Sequence &seqA_, const Sequence &seqB_, const RnaData &rna_dataA_, const RnaData &rna_dataB_, const ArcMatches &arc_matches_, const SparseTraceController &sparse_trace_controller_, PatternPairMap &foundEPMs_, int alpha_1_, int alpha_2_, int alpha_3_, score_t difference_to_opt_score_, score_t min_score_, long int max_number_of_EPMs_, bool inexact_struct_match_, score_t struct_mismatch_score_, bool apply_filter_, bool verbose_)
Constructor.
Definition: exact_matcher.cc:10
void test_arcmatch_score()
for debugging
Definition: exact_matcher.cc:261
void trace_EPMs(bool suboptimal)
computes the traceback and traces all EPMs
Definition: exact_matcher.cc:589
Definition: infty_int.hh:325
computes the best chain of EPMs, the LCS-EPM
Definition: exact_matcher.hh:412
void output_locarna(const std::string &sequenceA, const std::string &sequenceB, const std::string &outfile)
outputs anchor constraints to be used as input for locarna
Definition: exact_matcher.cc:3119
LCSEPM(const Sequence &seqA_, const Sequence &seqB_, const PatternPairMap &myPatterns, PatternPairMap &myLCSEPM)
Definition: exact_matcher.hh:421
virtual ~LCSEPM()
Destructor.
Definition: exact_matcher.cc:2591
void calculateLCSEPM(bool quiet)
calculates the best chain of EPMs, the LCS-EPM
Definition: exact_matcher.cc:2599
std::pair< SequenceAnnotation, SequenceAnnotation > anchor_annotation()
get anchor annotation
Definition: exact_matcher.cc:3032
void output_clustal(const std::string &outfile_name)
writes chain as clustal alignment
Definition: exact_matcher.cc:3146
void MapToPS(const std::string &sequenceA, const std::string &sequenceB, PatternPairMap &myMap, const std::string &file1, const std::string &file2)
output chained EPMs to PS files
Definition: exact_matcher.cc:2945
pos_type length() const
Length of multiple aligment.
Definition: multiple_alignment.hh:561
manage a set of EPMs (PatternPair)
Definition: exact_matcher.hh:258
const patListTYPE & getList() const
Definition: exact_matcher.cc:2530
PatternPair * SelfValuePTR
pointer to PatternPair
Definition: exact_matcher.hh:261
const PatternPair & getPatternPair(const std::string &id) const
gets the PatternPair with the Id id
Definition: exact_matcher.cc:2520
PatternPair selfValueTYPE
PatternPair.
Definition: exact_matcher.hh:260
int getMapBases()
computes the number of mapped bases
Definition: exact_matcher.cc:2549
std::unordered_map< std::string, SelfValuePTR > PatternIdMapTYPE
map type patternId -> pointer to PatternPair
Definition: exact_matcher.hh:275
patListTYPE::const_iterator patListCITER
const iterator for the list of PatternPairs
Definition: exact_matcher.hh:273
orderedMapTYPE::const_iterator orderedMapCITER
const iterator for the map
Definition: exact_matcher.hh:266
virtual ~PatternPairMap()
Destructor.
Definition: exact_matcher.cc:2470
const int getMinPatternSize() const
Definition: exact_matcher.hh:387
PatternPairMap()
Contructor.
Definition: exact_matcher.cc:2463
orderedMapTYPE::iterator orderedMapITER
iterator for the map
Definition: exact_matcher.hh:268
std::multimap< int, SelfValuePTR, std::greater< int > > orderedMapTYPE
ordered map type
Definition: exact_matcher.hh:264
const int size() const
Definition: exact_matcher.cc:2544
orderedMapTYPE & getOrderedMap2()
Definition: exact_matcher.cc:2539
int getMapEPMScore()
computes the score of the list of PatternPairs patternList
Definition: exact_matcher.cc:2558
std::vector< std::unique_ptr< selfValueTYPE > > patListTYPE
list of patternPairs
Definition: exact_matcher.hh:269
const orderedMapTYPE & getOrderedMap() const
Definition: exact_matcher.cc:2534
void makeOrderedMap()
creates the ordered Map
Definition: exact_matcher.cc:2498
patListTYPE::iterator patListITER
iterator for the list of PatternPairs
Definition: exact_matcher.hh:271
void add(const std::string &id, const SinglePattern &first, const SinglePattern &second, const std::string &structure, int score)
adds a PatternPair consisting of two SinglePatterns to the PatternPairMap
Definition: exact_matcher.cc:2474
const SelfValuePTR getPatternPairPTR(const std::string &id) const
gets the pointer to the PatternPair with the Id id
Definition: exact_matcher.cc:2525
is able to manage an EPM, consists of 2 singlepatterns, one in each RNA
Definition: exact_matcher.hh:105
const int & getSize() const
Definition: exact_matcher.hh:153
const std::vector< intPPair > & getInsideBounds() const
Definition: exact_matcher.hh:204
void setOutsideBounds(intPPair myPPair)
Definition: exact_matcher.cc:2439
PatternPair(const std::string &myId, const SinglePattern &myFirstPat, const SinglePattern &mySecPat, const std::string &structure_, int &score_)
Constructor.
Definition: exact_matcher.hh:117
const std::string & get_struct() const
Definition: exact_matcher.hh:238
const SinglePattern & getSecPat() const
Definition: exact_matcher.hh:171
const int getScore() const
Definition: exact_matcher.hh:220
const int getEPMScore() const
Definition: exact_matcher.hh:229
void addInsideBounds(intPPair myPPair)
adds the inside Bound myPPair
Definition: exact_matcher.cc:2444
virtual ~PatternPair()
Destructor.
Definition: exact_matcher.hh:137
const std::string & getId() const
Definition: exact_matcher.hh:144
void setEPMScore(int myScore)
Definition: exact_matcher.cc:2449
void resetBounds()
clears the insideBounds
Definition: exact_matcher.cc:2434
const intPPair getOutsideBounds() const
Definition: exact_matcher.hh:191
const SinglePattern & getFirstPat() const
Definition: exact_matcher.hh:162
represent sparsified data of RNA ensemble
Definition: rna_data.hh:44
"Sequence View" of multiple alignment as array of column vectors
Definition: sequence.hh:17
stores a Pattern in one sequence
Definition: exact_matcher.hh:48
const std::string & getmyId() const
Definition: exact_matcher.hh:73
virtual ~SinglePattern()
Destructor.
Definition: exact_matcher.hh:66
const intVec & getPat() const
Definition: exact_matcher.hh:91
SinglePattern(const std::string &myId_, const std::string &seqId_, const intVec &mySinglePattern_)
constructor
Definition: exact_matcher.hh:58
const std::string & getseqId() const
Definition: exact_matcher.hh:82
combines the TraceController with the Mapper for both sequences
Definition: exact_matcher.hh:560
SparseTraceController(const SparsificationMapper &sparse_mapperA_, const SparsificationMapper &sparse_mapperB_, const TraceController &trace_controller_)
constructor
Definition: exact_matcher.hh:589
matpos_t diag_pos_bef(index_t indexA, index_t indexB, pair_seqpos_t cur_pos_seq, index_t left_endA=std::numeric_limits< index_t >::max(), index_t left_endB=std::numeric_limits< index_t >::max()) const
computes the first valid matrix position before a sequence position considering the trace controller
Definition: exact_matcher.hh:675
pair_seqpos_t pos_in_seq(index_t idxA, index_t idxB, const matpos_t &cur_pos) const
maps the matrix position cur_pos to the corresponding pair of positions in sequence A and B
Definition: exact_matcher.hh:748
const SparsificationMapper & get_sparse_mapperA() const
destructor
Definition: exact_matcher.hh:602
bool pos_unpaired(index_t idxA, index_t idxB, matpos_t pos) const
checks whether the matrix position pos can be unpaired in both sequences
Definition: exact_matcher.hh:787
bool matching_wo_gap(index_t idxA, index_t idxB, const matpos_t &idx_pos_diag, pair_seqpos_t seq_pos_to_be_matched) const
is a EPM without a gap in between possible
Definition: exact_matcher.hh:770
bool is_valid_idx_pos(index_t idxA, index_t idxB, matpos_t mat_pos) const
checks whether a matrix position is valid
Definition: exact_matcher.hh:800
matidx_t min_col_idx(index_t indexA, index_t indexB, matidx_t idx_i, index_t left_endB=std::numeric_limits< index_t >::max()) const
minimal column of trace in a row in the sparsified matrix
Definition: exact_matcher.hh:624
std::pair< matidx_t, matidx_t > matpos_t
a type for a position in a sparsified matrix
Definition: exact_matcher.hh:571
matidx_t idx_after_max_col_idx(index_t indexA, index_t indexB, matidx_t idx_i, index_t left_endB=std::numeric_limits< index_t >::max()) const
index after maximal column of trace in a row in the sparsified matrix
Definition: exact_matcher.hh:646
const SparsificationMapper & get_sparse_mapperB() const
returns reference to sparsification mapper for sequence B
Definition: exact_matcher.hh:608
std::pair< seqpos_t, seqpos_t > pair_seqpos_t
a type for a pair of positions in the sequences
Definition: exact_matcher.hh:573
Represents the mapping for sparsification.
Definition: sparsification_mapper.hh:30
seq_pos_t get_pos_in_seq_new(index_t idx, matidx_t pos) const
Definition: sparsification_mapper.hh:225
std::vector< ArcIdx > ArcIdxVec
vector of arc indices
Definition: sparsification_mapper.hh:34
matidx_t first_valid_mat_pos_before(index_t index, seq_pos_t pos, index_t left_end=std::numeric_limits< index_t >::max()) const
Definition: sparsification_mapper.hh:208
size_t ArcIdx
type of arc index
Definition: sparsification_mapper.hh:33
pos_type seq_pos_t
type for a sequence position
Definition: sparsification_mapper.hh:36
pos_type matidx_t
type for a matrix position
Definition: sparsification_mapper.hh:35
matidx_t idx_after_leq(index_t index, seq_pos_t max_col, index_t left_end=std::numeric_limits< index_t >::max()) const
Definition: sparsification_mapper.hh:344
size_t index_t
type for an index
Definition: sparsification_mapper.hh:38
matidx_t idx_geq(index_t index, seq_pos_t min_col, index_t left_end=std::numeric_limits< index_t >::max()) const
Definition: sparsification_mapper.hh:299
bool pos_unpaired(index_t idx, matidx_t pos) const
Definition: sparsification_mapper.hh:249
Controls the matrix cells valid for traces.
Definition: trace_controller.hh:200
bool is_valid(size_type i, size_type j) const
Is (i,j) a valid cell of the DP matrices (i.e. on some possible trace)?
Definition: trace_controller.hh:329
size_t max_col(size_t i) const
Maximal column of trace in a row.
Definition: trace_controller.hh:135
size_t min_col(size_t i) const
Minimal column of trace in a row.
Definition: trace_controller.hh:123
Represents a 5-tuple.
Definition: tuples.hh:64
Represents a 3-tuple.
Definition: tuples.hh:17
T3 third
third value
Definition: tuples.hh:19
Definition: aligner.cc:15
size_type pos_type
type of a sequence position
Definition: aux.hh:126
std::ostream & operator<<(std::ostream &out, const AlignerRestriction &r)
Definition: aligner_restriction.hh:135
TaintedInftyInt max(const TaintedInftyInt &x, const TaintedInftyInt &y)
Definition: infty_int.hh:567
T1 max4(const T1 &first, const T1 &second, const T1 &third, const T1 &fourth)
Definition: exact_matcher.hh:1293
bool operator<(const EPM &epm1, const EPM &epm2)
Definition: exact_matcher.hh:1254
size_t size_type
general size type
Definition: aux.hh:120
InftyInt infty_score_t
Definition: scoring_fwd.hh:17
long int score_t
type of the locarna score as defined by the class Scoring
Definition: scoring_fwd.hh:13
T1 max3(const T1 &first, const T1 &second, const T1 &third)
Definition: exact_matcher.hh:1279