LocARNA-2.0.0
edge_probs.hh
1 #ifndef LOCARNA_EDGE_PROBS
2 #define LOCARNA_EDGE_PROBS
3 
4 #ifdef HAVE_CONFIG_H
5 #include <config.h>
6 #endif
7 
8 #include <string>
9 #include <cmath>
10 
11 #include "aux.hh"
12 #include "matrix.hh"
13 
14 namespace LocARNA {
15 
16  class StralScore;
17  class RnaData;
18  template <class T, size_t N> class Alphabet;
19  class Sequence;
20  class TraceController;
21  class FreeEndgaps;
22 
28  class EdgeProbs {
29  public:
30  using size_type=size_t;
31 
35  EdgeProbs(std::istream &in, size_type lenA, size_type lenB) {
36  read_sparse(in, lenA, lenB);
37  }
38 
43  std::ostream &
44  write_sparse(std::ostream &out, double threshold) const;
45 
47  size_type
48  lenA() const {
49  return probs_.sizes().first;
50  }
51 
53  size_type
54  lenB() const {
55  return probs_.sizes().second;
56  }
57 
59  double
60  prob(size_t i, size_t j) const {
61  assert(i < probs_.sizes().first);
62  assert(j < probs_.sizes().second);
63 
64  return probs_(i, j);
65  }
66 
67  protected:
69 
74  std::istream &
75  read_sparse(std::istream &in, size_type lenA, size_type lenB);
76 
81  void
82  read_sparse(const std::string &filename,
84  size_type lenB);
85  EdgeProbs() {}
86  };
87 
91  class MatchProbs : public EdgeProbs {
92  public:
96  MatchProbs(std::istream &in, size_type lenA, size_type lenB)
97  : EdgeProbs(in, lenA, lenB)
98  {
99  }
100  protected:
101  MatchProbs() {}
102  };
103 
117  template <class pf_score_t>
118  class PFGotoh {
119  public:
120  using size_type=size_t;
121 
140  PFGotoh(const RnaData &rnaA,
141  const RnaData &rnaB,
142  const TraceController &trace_controller,
143  const Matrix<double> &sim_mat,
144  const Alphabet<char, 4> &alphabet,
145  double gap_opening,
146  double gap_extension,
147  double pf_struct_weight,
148  double temp,
149  const FreeEndgaps &free_endgaps,
150  bool flag_local);
155  const pf_score_t&
156  z() const {return z_;}
157 
158  protected:
159 
160  size_type lenA_;
161  size_type lenB_;
162 
163  double temp_;
164 
165  bool flag_local_;
166 
167  pf_score_t z_;
168 
171 
174 
177 
181 
194  void
196  Matrix<pf_score_t> &zA,
197  Matrix<pf_score_t> &zB,
198  const TraceController &trace_controller,
199  const StralScore &score,
200  const FreeEndgaps &free_endgaps);
201 
202  bool fail() const {return z_==(pf_score_t)0 || std::isnan(z_) || std::isinf(z_);}
203 
204  };
205 
210  class TraceProbs : public EdgeProbs {
211  public:
215  TraceProbs(std::istream &in, size_type lenA, size_type lenB)
216  : EdgeProbs(in, lenA, lenB)
217  {}
218  protected:
219  TraceProbs() {}
220  };
221 
231  template <class pf_score_t>
232  class PFMatchProbs : public MatchProbs, PFGotoh<pf_score_t> {
233  public:
235 
255  PFMatchProbs(const RnaData &rnaA,
256  const RnaData &rnaB,
257  const TraceController &trace_controller,
258  const Matrix<double> &sim_mat,
259  const Alphabet<char, 4> &alphabet,
260  double gap_opening,
261  double gap_extension,
262  double pf_struct_weight,
263  double temp,
264  const FreeEndgaps &free_endgaps,
265  bool flag_local);
266 
267  bool fail() const {return PFGotoh<pf_score_t>::fail();}
268 
269  const pf_score_t &
270  z() const {return PFGotoh<pf_score_t>::z();}
271  };
272 
273 
278  template <class pf_score_t>
279  class PFTraceProbs : public TraceProbs, PFGotoh<pf_score_t> {
280  public:
282 
300  PFTraceProbs(const RnaData &rnaA,
301  const RnaData &rnaB,
302  const TraceController &trace_controller,
303  const Matrix<double> &sim_mat,
304  const Alphabet<char, 4> &alphabet,
305  double gap_opening,
306  double gap_extension,
307  double pf_struct_weight,
308  double temp,
309  const FreeEndgaps &free_endgaps,
310  bool flag_local);
311 
312  bool fail() const {return PFGotoh<pf_score_t>::fail();}
313 
314  const pf_score_t &
315  z() const {return PFGotoh<pf_score_t>::z();}
316  };
317 
318  /*
319  * @brief Provide match probabilities calculated by pair HMM (deprecated)
320  *
321  * For computing the probabilities the class
322  * uses a pairHMM analogously to PROBCONS.
323  * Also, the class reads transition probabilities
324  * from a file in the format of Probcons, which
325  * allows to use their parameter files
326  *
327  * @note the pairHMM approach does not work very well yet (parametrization
328  * is not obvious, correctness of implementation is still critical, uses
329  * only first sequence of multiple alignments,
330  * numerical problems without log transformation). Instead of fixing
331  * the issues of the pairHMM implementation, the probalign-like partition
332  * function approach was implemented.
333  */
334  class PairHMMMatchProbs : public MatchProbs {
335  public:
336 
348  public:
349  // ------------------------------------------------------------
350  // transition probabilities
351  // there are three states M, X, Y (plus implicitely start and end
352  // state)
353  double initM;
354  double initX;
355  double initY;
356  double startX;
357  double startY;
358  double extendM;
359  double extendX;
360  double extendY;
361  double startMFromX;
362  double startMFromY;
363 
364  std::string basenames;
365 
367  std::vector<double>
369 
377  explicit PairHMMParams(const std::string &filename);
378  };
379 
387  const Sequence &seqB,
388  const PairHMMParams &params) {
389  pairHMM_probs(seqA, seqB, params);
390  }
391 
392  protected:
398  void
399  pairHMM_probs(const Sequence &seqA,
400  const Sequence &seqB,
401  const PairHMMParams &params);
402  };
403 }
404 
405 #include "edge_probs.icc"
406 
407 
408 #endif // LOCARNA_EDGE_PROBS
Provides probabilities for alignment egdes (match or trace probabilities etc).
Definition: edge_probs.hh:28
EdgeProbs(std::istream &in, size_type lenA, size_type lenB)
construct from input stream (sparse format)
Definition: edge_probs.hh:35
std::ostream & write_sparse(std::ostream &out, double threshold) const
Definition: edge_probs.cc:35
size_t size_type
size
Definition: edge_probs.hh:30
size_type lenB() const
get the length of the second sequence
Definition: edge_probs.hh:54
Matrix< double > probs_
the base match probabilities
Definition: edge_probs.hh:68
size_type lenA() const
get the length of the first sequence
Definition: edge_probs.hh:48
double prob(size_t i, size_t j) const
return the match probability for the two bases
Definition: edge_probs.hh:60
void read_sparse(const std::string &filename, size_type lenA, size_type lenB)
std::istream & read_sparse(std::istream &in, size_type lenA, size_type lenB)
Definition: edge_probs.cc:19
Description of free end gaps.
Definition: free_endgaps.hh:15
Provide match probabilities.
Definition: edge_probs.hh:91
MatchProbs(std::istream &in, size_type lenA, size_type lenB)
construct from input stream (sparse format)
Definition: edge_probs.hh:96
size_pair_type sizes() const
Definition: matrix.hh:91
Provide Gotoh partition functions.
Definition: edge_probs.hh:118
Matrix< pf_score_t > zBr_
reverse zB_
Definition: edge_probs.hh:180
Matrix< pf_score_t > zB_
pfs over alignments ending w/ gap in B
Definition: edge_probs.hh:176
Matrix< pf_score_t > zM_
pfs over alignments ending in match i~j
Definition: edge_probs.hh:170
size_t size_type
size
Definition: edge_probs.hh:120
const pf_score_t & z() const
Get the partition function.
Definition: edge_probs.hh:156
Matrix< pf_score_t > zAr_
reverse zA_
Definition: edge_probs.hh:179
Matrix< pf_score_t > zA_
pfs over alignments ending w/ gap in A
Definition: edge_probs.hh:173
Matrix< pf_score_t > zMr_
reverse zM_
Definition: edge_probs.hh:178
PFGotoh(const RnaData &rnaA, const RnaData &rnaB, const TraceController &trace_controller, const Matrix< double > &sim_mat, const Alphabet< char, 4 > &alphabet, double gap_opening, double gap_extension, double pf_struct_weight, double temp, const FreeEndgaps &free_endgaps, bool flag_local)
Construct to provide partial partition functions from Gotoh-like matrices.
void pf_gotoh(Matrix< pf_score_t > &zM, Matrix< pf_score_t > &zA, Matrix< pf_score_t > &zB, const TraceController &trace_controller, const StralScore &score, const FreeEndgaps &free_endgaps)
perform the partition version of Gotoh's algorithm
Provide match probabilities calculated by pf approach.
Definition: edge_probs.hh:232
PFMatchProbs(const RnaData &rnaA, const RnaData &rnaB, const TraceController &trace_controller, const Matrix< double > &sim_mat, const Alphabet< char, 4 > &alphabet, double gap_opening, double gap_extension, double pf_struct_weight, double temp, const FreeEndgaps &free_endgaps, bool flag_local)
construct; run computation of pfs and probabilities
Provide match probabilities calculated by pf approach.
Definition: edge_probs.hh:279
PFTraceProbs(const RnaData &rnaA, const RnaData &rnaB, const TraceController &trace_controller, const Matrix< double > &sim_mat, const Alphabet< char, 4 > &alphabet, double gap_opening, double gap_extension, double pf_struct_weight, double temp, const FreeEndgaps &free_endgaps, bool flag_local)
construct; run computation of pfs and probabilities
Maintains parameter for computing match probabilities.
Definition: edge_probs.hh:347
double extendX
transition probability extendX
Definition: edge_probs.hh:359
double extendY
transition probability extendY
Definition: edge_probs.hh:360
double startX
transition probability startX
Definition: edge_probs.hh:356
double extendM
transition probability extendM
Definition: edge_probs.hh:358
Matrix< double > emmission
matrix of emmission probabilities
Definition: edge_probs.hh:366
std::vector< double > background
vector of background probabilities
Definition: edge_probs.hh:368
double startY
transition probability startY
Definition: edge_probs.hh:357
double initM
transition probability initM
Definition: edge_probs.hh:353
double initX
transition probability initX
Definition: edge_probs.hh:354
double startMFromY
transition probability startMFromY
Definition: edge_probs.hh:362
std::string basenames
base names
Definition: edge_probs.hh:364
double startMFromX
transition probability startMFromX
Definition: edge_probs.hh:361
PairHMMParams(const std::string &filename)
Definition: edge_probs.cc:49
double initY
transition probability initY
Definition: edge_probs.hh:355
Definition: edge_probs.hh:334
PairHMMMatchProbs(const Sequence &seqA, const Sequence &seqB, const PairHMMParams &params)
construct predicting pair probs by pairHMM
Definition: edge_probs.hh:386
void pairHMM_probs(const Sequence &seqA, const Sequence &seqB, const PairHMMParams &params)
Definition: edge_probs.cc:121
represent sparsified data of RNA ensemble
Definition: rna_data.hh:44
"Sequence View" of multiple alignment as array of column vectors
Definition: sequence.hh:17
Implements the stral-like scoring function.
Definition: stral_score.hh:29
Controls the matrix cells valid for traces.
Definition: trace_controller.hh:200
Provide trace probabilities.
Definition: edge_probs.hh:210
TraceProbs(std::istream &in, size_type lenA, size_type lenB)
construct from input stream (sparse format)
Definition: edge_probs.hh:215
Definition: aligner.cc:15
size_t size_type
general size type
Definition: aux.hh:120