LocARNA-2.0.0
arc_matches.hh
1 #ifndef LOCARNA_ARC_MATCHES_HH
2 #define LOCARNA_ARC_MATCHES_HH
3 
4 #ifdef HAVE_CONFIG_H
5 #include <config.h>
6 #endif
7 
8 #include <algorithm>
9 #include <vector>
10 #include <unordered_map>
11 
12 #include "scoring_fwd.hh"
13 #include "aux.hh"
14 #include "matrix.hh"
15 #include "basepairs.hh"
16 
17 #include <assert.h>
18 
19 namespace LocARNA {
20  class Scoring;
21  class Sequence;
22  class RnaData;
23  class AnchorConstraints;
24  class TraceController;
25  class MatchController;
26 
35  class ArcMatch {
36  public:
37  typedef std::vector<int>::size_type size_type;
38  typedef size_type idx_type;
39  typedef BasePairs__Arc Arc;
40  private:
41  const Arc *arcA_;
42  const Arc *arcB_;
43  idx_type idx_;
44 
45  public:
53  ArcMatch(const Arc *arcA, const Arc *arcB, idx_type idx)
54  : arcA_(arcA), arcB_(arcB), idx_(idx) {}
55 
61  const Arc &
62  arcA() const {
63  return *arcA_;
64  }
65 
71  const Arc &
72  arcB() const {
73  return *arcB_;
74  }
75 
81  idx_type
82  idx() const {
83  return idx_;
84  }
85  };
86 
88  typedef std::vector<ArcMatch> ArcMatchVec;
89 
91  typedef std::vector<ArcMatch::idx_type> ArcMatchIdxVec;
92 
116  class ArcMatches {
117  public:
118  typedef std::vector<int>::size_type size_type;
119  typedef BasePairs__Arc Arc;
120  protected:
123 
126 
127  /* Constraints and Heuristics */
128 
130 
132 
135 
137 
150  bool
151  is_valid_arcmatch(const Arc &arcA, const Arc &arcB) const;
152 
153  /* END constraints and heuristics */
154 
158 
161 
167 
169  std::vector<score_t> scores;
170 
174 
178 
181 
183  void
185 
189  const ArcMatches &arc_matches;
190 
191  public:
197  explicit lex_greater_left_ends(const ArcMatches &arc_matches_)
198  : arc_matches(arc_matches_) {}
199 
210  bool
212  const ArcMatch::idx_type &j) const {
213  size_type ali = arc_matches.arcmatch(i).arcA().left();
214  size_type bli = arc_matches.arcmatch(i).arcB().left();
215  size_type alj = arc_matches.arcmatch(j).arcA().left();
216  size_type blj = arc_matches.arcmatch(j).arcB().left();
217 
218  return (ali > alj) || (ali == alj && bli > blj);
219  }
220  };
221 
226  class tuple5 {
227  public:
228  typedef std::vector<int>::size_type size_type;
229 
236 
247  size_type j_,
248  size_type k_,
249  size_type l_,
250  score_t score_)
251  : i(i_), j(j_), k(k_), l(l_), score(score_) {}
252  };
253 
254  public:
277  ArcMatches(const Sequence &seqA_,
278  const Sequence &seqB_,
279  const std::string &arcmatch_scores_file,
280  int probability_scale,
283  const MatchController &trace_controller,
285 
307  ArcMatches(const RnaData &rnadataA,
308  const RnaData &rnadataB,
309  double min_prob,
312  const MatchController &trace_controller,
314 
316  ~ArcMatches();
317 
318  // for the mea probabilistic consistency transformation, support to read
319  // and write the arcmatch scores
320  // this allows in general to have user defined arc-match scores
321 
334  void
335  read_arcmatch_scores(const std::string &arcmatch_scores_file,
336  int probability_scale);
337 
343  void
344  write_arcmatch_scores(const std::string &arcmatch_scores_file,
345  const Scoring &scoring) const;
346 
348  const BasePairs &
349  get_base_pairsA() const {
350  return *bpsA;
351  }
352 
354  const BasePairs &
355  get_base_pairsB() const {
356  return *bpsB;
357  }
358 
361  bool
362  explicit_scores() const {
364  }
365 
371  void
372  make_scores_explicit(const Scoring &scoring);
373 
380  score_t
381  get_score(const ArcMatch &am) const {
382  assert(maintain_explicit_scores);
383  return scores[am.idx()];
384  }
385 
387  size_type
388  num_arc_matches() const {
389  return number_of_arcmatches;
390  }
391 
393  const ArcMatch &
394  arcmatch(size_type idx) const {
395  assert(idx < number_of_arcmatches);
396  return arc_matches_vec[idx];
397  }
398 
399  // ============================================================
400  // Iteration over arc matches
401  //
402 
404  const ArcMatchIdxVec &
406  return common_right_end_lists(i, j);
407  }
408 
410  const ArcMatchIdxVec &
412  return common_left_end_lists(i, j);
413  }
414 
415  // ============================================================
416 
438  void
440  size_type bl,
441  size_type *max_ar,
442  size_type *max_br,
443  bool no_lonely_pairs) const;
444 
451  void
453  size_type bl,
454  size_type *min_ar,
455  size_type *min_br) const;
456 
457  // ------------------------------------------------------------
458  // inner arc matches
459 
464  bool
465  exists_inner_arc_match(const ArcMatch &am) const {
466  return inner_arcmatch_idxs[am.idx()] < num_arc_matches();
467  }
468 
474  const ArcMatch &
475  inner_arc_match(const ArcMatch &am) const {
476  return arcmatch(inner_arcmatch_idxs[am.idx()]);
477  }
478 
485  void
487 
488  // ------------------------------------------------------------
489  // iteration (in no specific order)
490 
492  typedef ArcMatchVec::const_iterator const_iterator;
493 
496  begin() const {
497  return arc_matches_vec.begin();
498  }
499 
502  end() const {
503  return arc_matches_vec.begin() + number_of_arcmatches;
504  }
505  };
506 
514  class ArcMatchesIndexed : public ArcMatches {
515  public:
539  const Sequence &seqB_,
540  const std::string &arcmatch_scores_file,
541  int probability_scale,
544  const MatchController &trace_controller,
546  : ArcMatches(seqA_,
547  seqB_,
548  arcmatch_scores_file,
549  probability_scale,
552  trace_controller,
553  constraints),
554  am_index_() {
555  build_arcmatch_index();
556  }
557 
579  ArcMatchesIndexed(const RnaData &rnadataA,
580  const RnaData &rnadataB,
581  double min_prob,
584  const MatchController &trace_controller,
586  : ArcMatches(rnadataA,
587  rnadataB,
588  min_prob,
591  trace_controller,
592  constraints),
593  am_index_() {
594  build_arcmatch_index();
595  }
596 
597  private:
599  typedef std::pair<size_type, size_type> idx_pair_t;
600 
602  typedef std::unordered_map<idx_pair_t, ArcMatch::idx_type>
603  am_index_type;
604 
606  am_index_type am_index_;
607 
612  void
613  build_arcmatch_index();
614 
615  public:
624  const ArcMatch::idx_type
626  // The invalid arc match index is implemented to be the
627  // maximum valid index + 1.
628  // This allows an efficient implementation, where we push
629  // an invalid arc match to the end of vector arc_matches_vec.
630  // Thus, we return size-1!
631 
632  return number_of_arcmatches;
633  }
634 
643  const ArcMatch::idx_type
644  am_index(const size_type &arcAIdx, const size_type &arcBIdx) const {
645  am_index_type::const_iterator it =
646  am_index_.find(idx_pair_t(arcAIdx, arcBIdx));
647  if (am_index_.end() != it) {
648  return it->second;
649  } else {
650  return invalid_am_index();
651  }
652  }
653 
663  const ArcMatch &
664  am_index(const Arc &arcA, const Arc &arcB) const {
665  return arc_matches_vec[am_index(arcA.idx(), arcB.idx())];
666  }
667  };
668 
669 } // end namespace LocARNA
670 
671 #endif // LOCARNA_ARC_MATCHES_HH
Represents anchor constraints between two sequences.
Definition: anchor_constraints.hh:66
Represents a match of two base pairs (arc match)
Definition: arc_matches.hh:35
const Arc & arcB() const
Definition: arc_matches.hh:72
BasePairs__Arc Arc
arc
Definition: arc_matches.hh:39
ArcMatch(const Arc *arcA, const Arc *arcB, idx_type idx)
Definition: arc_matches.hh:53
idx_type idx() const
Definition: arc_matches.hh:82
std::vector< int >::size_type size_type
size type
Definition: arc_matches.hh:37
size_type idx_type
arc match index
Definition: arc_matches.hh:38
const Arc & arcA() const
Definition: arc_matches.hh:62
class ArcMatches with additional mapping
Definition: arc_matches.hh:514
const ArcMatch::idx_type am_index(const size_type &arcAIdx, const size_type &arcBIdx) const
Lookup arc match index by pair of arc indices.
Definition: arc_matches.hh:644
const ArcMatch & am_index(const Arc &arcA, const Arc &arcB) const
Lookup arc match by pair of arcs.
Definition: arc_matches.hh:664
ArcMatchesIndexed(const RnaData &rnadataA, const RnaData &rnadataB, double min_prob, size_type max_length_diff, size_type max_diff_at_am, const MatchController &trace_controller, const AnchorConstraints &constraints)
construct from single base pair probabilities.
Definition: arc_matches.hh:579
const ArcMatch::idx_type invalid_am_index() const
the invalid arc match index
Definition: arc_matches.hh:625
ArcMatchesIndexed(const Sequence &seqA_, const Sequence &seqB_, const std::string &arcmatch_scores_file, int probability_scale, size_type max_length_diff, size_type max_diff_at_am, const MatchController &trace_controller, const AnchorConstraints &constraints)
construct with explicit arc match score list
Definition: arc_matches.hh:538
Definition: arc_matches.hh:188
bool operator()(const ArcMatch::idx_type &i, const ArcMatch::idx_type &j) const
Compare to arc matches.
Definition: arc_matches.hh:211
lex_greater_left_ends(const ArcMatches &arc_matches_)
Definition: arc_matches.hh:197
Definition: arc_matches.hh:226
size_type k
position k
Definition: arc_matches.hh:232
size_type j
position j
Definition: arc_matches.hh:231
tuple5(size_type i_, size_type j_, size_type k_, size_type l_, score_t score_)
Definition: arc_matches.hh:246
std::vector< int >::size_type size_type
size type
Definition: arc_matches.hh:228
size_type i
position i
Definition: arc_matches.hh:230
score_t score
Definition: arc_matches.hh:234
size_type l
position l
Definition: arc_matches.hh:233
Maintains the relevant arc matches and their scores.
Definition: arc_matches.hh:116
size_type num_arc_matches() const
total number of arc matches
Definition: arc_matches.hh:388
const BasePairs & get_base_pairsA() const
returns the base pairs object for RNA A
Definition: arc_matches.hh:349
const_iterator begin() const
begin of arc matches vector
Definition: arc_matches.hh:496
ArcMatches(const Sequence &seqA_, const Sequence &seqB_, const std::string &arcmatch_scores_file, int probability_scale, size_type max_length_diff, size_type max_diff_at_am, const MatchController &trace_controller, const AnchorConstraints &constraints)
construct with explicit arc match score list
Definition: arc_matches.cc:112
void get_min_right_ends(size_type al, size_type bl, size_type *min_ar, size_type *min_br) const
Definition: arc_matches.cc:358
Matrix< ArcMatchIdxVec > common_left_end_lists
Definition: arc_matches.hh:177
bool maintain_explicit_scores
Definition: arc_matches.hh:155
size_type max_diff_at_am
for max diff at arc matches heuristics
Definition: arc_matches.hh:131
BasePairs * bpsA
base pairs of RNA A
Definition: arc_matches.hh:124
Matrix< ArcMatchIdxVec > common_right_end_lists
Definition: arc_matches.hh:173
std::vector< score_t > scores
vector of scores (of arc matches with the same index)
Definition: arc_matches.hh:169
const AnchorConstraints & constraints
for constraints
Definition: arc_matches.hh:136
bool is_valid_arcmatch(const Arc &arcA, const Arc &arcB) const
Definition: arc_matches.cc:20
void make_scores_explicit(const Scoring &scoring)
Make arcmatch scores explicit.
Definition: arc_matches.cc:373
void write_arcmatch_scores(const std::string &arcmatch_scores_file, const Scoring &scoring) const
Definition: arc_matches.cc:285
const ArcMatch & inner_arc_match(const ArcMatch &am) const
Definition: arc_matches.hh:475
const BasePairs & get_base_pairsB() const
returns the base pairs object for RNA B
Definition: arc_matches.hh:355
bool exists_inner_arc_match(const ArcMatch &am) const
Definition: arc_matches.hh:465
void read_arcmatch_scores(const std::string &arcmatch_scores_file, int probability_scale)
Reads scores for arc matches.
Definition: arc_matches.cc:191
void init_inner_arc_matchs()
initialize the vector of inner arc match indices
Definition: arc_matches.cc:51
const MatchController & match_controller
Definition: arc_matches.hh:133
ArcMatchVec arc_matches_vec
vector of all maintained arc matches
Definition: arc_matches.hh:160
const_iterator end() const
end of arc matches vector
Definition: arc_matches.hh:502
size_type lenA
length of sequence A
Definition: arc_matches.hh:121
size_type lenB
length of sequence B
Definition: arc_matches.hh:122
ArcMatchVec::const_iterator const_iterator
const iterator over arc matches
Definition: arc_matches.hh:492
const ArcMatch & arcmatch(size_type idx) const
get arc match by its index
Definition: arc_matches.hh:394
void sort_right_adjacency_lists()
Definition: arc_matches.cc:77
size_type max_length_diff
for max-diff-am heuristics
Definition: arc_matches.hh:129
score_t get_score(const ArcMatch &am) const
Definition: arc_matches.hh:381
std::vector< int >::size_type size_type
size
Definition: arc_matches.hh:118
void get_max_right_ends(size_type al, size_type bl, size_type *max_ar, size_type *max_br, bool no_lonely_pairs) const
get the maximal right ends of any arc match with left ends (al,bl).
Definition: arc_matches.cc:314
ArcMatchIdxVec inner_arcmatch_idxs
vector of indices of inner arc matches
Definition: arc_matches.hh:180
bool explicit_scores() const
Definition: arc_matches.hh:362
BasePairs__Arc Arc
arc
Definition: arc_matches.hh:119
const ArcMatchIdxVec & common_left_end_list(size_type i, size_type j) const
list of all arc matches that share the common left end (i,j)
Definition: arc_matches.hh:411
const ArcMatchIdxVec & common_right_end_list(size_type i, size_type j) const
list of all arc matches that share the common right end (i,j)
Definition: arc_matches.hh:405
size_type number_of_arcmatches
Definition: arc_matches.hh:166
BasePairs * bpsB
base pairs of RNA B
Definition: arc_matches.hh:125
~ArcMatches()
clean up base pair objects
Definition: arc_matches.cc:14
Represents a base pair.
Definition: basepairs.hh:39
size_t idx() const
Definition: basepairs.hh:87
size_t left() const
Definition: basepairs.hh:67
Describes sequence and structure ensemble of an RNA.
Definition: basepairs.hh:108
abstract class that declares the method is_valid_match()
Definition: trace_controller.hh:175
represent sparsified data of RNA ensemble
Definition: rna_data.hh:44
Provides methods for the scoring of alignments.
Definition: scoring.hh:271
"Sequence View" of multiple alignment as array of column vectors
Definition: sequence.hh:17
Definition: aligner.cc:15
std::vector< ArcMatch::idx_type > ArcMatchIdxVec
Vector of arc match indices.
Definition: arc_matches.hh:91
std::vector< ArcMatch > ArcMatchVec
Vector of arc matches.
Definition: arc_matches.hh:88
size_t size_type
general size type
Definition: aux.hh:120
long int score_t
type of the locarna score as defined by the class Scoring
Definition: scoring_fwd.hh:13