LocARNA-2.0.0
ribosum.hh
1 #ifndef LOCARNA_RIBOSUM_HH
2 #define LOCARNA_RIBOSUM_HH
3 
4 #ifdef HAVE_CONFIG_H
5 #include <config.h>
6 #endif
7 
8 #include <array>
9 #include <cstdlib>
10 #include <fstream>
11 #include <sstream>
12 #include <math.h>
13 #include <string>
14 
15 #include "alphabet.hh"
16 #include "matrix.hh"
17 
18 namespace LocARNA {
19 
24  class Ribosum {
25  public:
30 
36  explicit Ribosum(const std::string &filename);
37 
39  virtual ~Ribosum();
40 
45  const matrix_t &
47  return bm_;
48  }
49 
54  const matrix_t &
56  return am_;
57  }
58 
61  const base_alphabet_type &
62  string_alphabet() const {
63  return basename_alphabet_;
64  }
65 
68  const char_alphabet_type &
69  alphabet() const {
71  }
72 
75  const std::string &
76  get_name() const {
77  return name_;
78  }
79 
85  double
86  basematch_score(char i, char j) const {
87  return bm_(alphabet().idx(i), alphabet().idx(j));
88  }
89 
98  double
99  arcmatch_score(char i, char j, char k, char l) const {
100  return am_(alphabet().idx(i) * 4 + alphabet().idx(j),
101  alphabet().idx(k) * 4 + alphabet().idx(l));
102  }
103 
104  friend std::ostream &
105  operator<<(std::ostream &out, const Ribosum &ribosum);
106  protected:
107  std::string name_;
111 
114 
117 
127  template<size_t N>
128  std::istream &
129  read_matrix(std::istream &in,
130  matrix_t &mat,
131  const Alphabet<std::string, N> &names) const;
132 
142  template<size_t N>
143  std::ostream &
144  write_matrix(std::ostream &out,
145  const matrix_t &mat,
146  const Alphabet<std::string, N> &alph) const;
147 
149  Ribosum();
150 
153  void
154  read_ribosum(std::istream &in);
155 
158  make_char_alphabet() const;
159 
166  void
167  set_basename_alphabet(const std::array<std::string,4> &a) {
170  }
171 
178  void
179  set_arcname_alphabet(const std::array<std::string,16> &a) {
181  }
182  };
183 
190  class RibosumFreq : public Ribosum {
191  public:
201  explicit RibosumFreq(const std::string &filename);
202 
206  double
207  base_prob(char i) const {
208  return base_probs_(alphabet().idx(i), 0);
209  }
210 
215  double
216  base_nonstruct_prob(char i) const {
217  return base_nonstruct_probs_(alphabet().idx(i), 0);
218  }
219 
222  const matrix_t &
223  get_base_probs() const {
224  return base_probs_;
225  }
226 
230  const matrix_t &
232  return base_nonstruct_probs_;
233  }
234 
239  double
240  basepair_prob(char i, char j) const {
241  return basepair_probs_(alphabet().idx(i), alphabet().idx(j));
242  }
243 
246  const matrix_t &
248  return basepair_probs_;
249  }
250 
255  double
256  basematch_prob(char i, char j) const {
257  return basematch_probs_(alphabet().idx(i), alphabet().idx(j));
258  }
259 
262  const matrix_t &
264  return basematch_probs_;
265  }
266 
275  double
276  arcmatch_prob(char i, char j, char k, char l) const {
277  return arcmatch_probs_(alphabet().idx(i) * 4 + alphabet().idx(j),
278  alphabet().idx(k) * 4 + alphabet().idx(l));
279  }
280 
283  const matrix_t &
285  return arcmatch_probs_;
286  }
287 
291  double
292  base_unpaired_prob(char i) const;
293 
307  double
308  basematch_score_corrected(char i, char j) const;
309 
312  void
313  print_basematch_scores_corrected(std::ostream &out) const;
314 
324  void
325  read_matrix(std::istream &in,
326  const std::string &header,
327  matrix_t &mat,
328  size_t xdim,
329  size_t ydim);
330 
338  void
339  write_ICC_code(std::ostream &out, const std::string &ribname) const;
340 
350  std::ostream &
351  write_matrix(std::ostream &out,
352  const std::string &name,
353  const Matrix<double> &mat) const;
354 
355  friend std::ostream &
356  operator<<(std::ostream &out, const RibosumFreq &ribosum);
357 
358  protected:
359  RibosumFreq();
360 
367 
368  void
369  write_CC_matrix(std::ostream &out,
370  const std::string &ribname,
371  const std::string &matname,
372  int x,
373  int y,
374  const Ribosum::matrix_t &m) const;
375 
376  private:
377  void
378  read_frequencies(std::istream &in);
379  };
380 
381 
382  template <size_t N>
383  std::istream &
384  Ribosum::read_matrix(std::istream &in,
385  Matrix<double> &mat,
386  const Alphabet<std::string, N> &alph) const {
387  auto siz = alph.size();
388 
389  std::string line;
390 
391  while (std::getline(in, line) && line == "")
392  ;
393 
394  {
395  std::istringstream linestream(line);
396 
397  for (size_t i = 0; i < siz; i++) {
398  std::string name;
399  linestream >> name;
400 
401  if (name != alph[i])
402  throw(std::ifstream::failure(
403  "Expecting correct table header. Found: " + line));
404  }
405  }
406 
407  mat.resize(siz, siz);
408  for (size_t i = 0; i < siz; i++) {
409  std::getline(in, line);
410  std::istringstream linestream(line);
411  std::string base;
412  linestream >> base;
413  if (base != alph[i])
414  throw(std::ifstream::failure("Expecting base name " +
415  alph[i] + " as row header"));
416 
417  for (size_t j = 0; j <= i; j++) {
418  double number;
419  linestream >> number;
420  mat(i, j) = mat(j, i) = number;
421  }
422  }
423  return in;
424  }
425 
426  template<size_t N>
427  std::ostream &
428  Ribosum::write_matrix(std::ostream &out,
429  const Matrix<double> &mat,
430  const Alphabet<std::string, N> &alph) const {
431  out << alph << std::endl;
432  out << mat << std::endl;
433  return out;
434  }
435 
436 
437 } // end namespace LocARNA
438 
439 #endif // LOCARNA_RIBOSUM_HH
void resize(size_type xdim, size_type ydim)
Definition: matrix.hh:102
Represents ribosum similarity matrices including raw frequencies.
Definition: ribosum.hh:190
std::ostream & write_matrix(std::ostream &out, const std::string &name, const Matrix< double > &mat) const
Definition: ribosum.cc:201
matrix_t base_probs_
matrix of base probabilities
Definition: ribosum.hh:361
const matrix_t & get_basematch_probs() const
Definition: ribosum.hh:263
double base_nonstruct_prob(char i) const
Definition: ribosum.hh:216
matrix_t base_nonstruct_probs_
Definition: ribosum.hh:362
const matrix_t & get_base_nonstruct_probs() const
Definition: ribosum.hh:231
double base_prob(char i) const
Definition: ribosum.hh:207
matrix_t arcmatch_probs_
matrix of arc match probabilities
Definition: ribosum.hh:366
const matrix_t & get_arcmatch_probs() const
Definition: ribosum.hh:284
matrix_t basematch_probs_
matrix of base match probabilties
Definition: ribosum.hh:365
double arcmatch_prob(char i, char j, char k, char l) const
Definition: ribosum.hh:276
void read_matrix(std::istream &in, const std::string &header, matrix_t &mat, size_t xdim, size_t ydim)
Definition: ribosum.cc:170
const matrix_t & get_basepair_probs() const
Definition: ribosum.hh:247
double basematch_prob(char i, char j) const
Definition: ribosum.hh:256
matrix_t basepair_probs_
matrix of base pair probabilities
Definition: ribosum.hh:364
friend std::ostream & operator<<(std::ostream &out, const RibosumFreq &ribosum)
Definition: ribosum.cc:218
void print_basematch_scores_corrected(std::ostream &out) const
Print the corrected score of base matches.
Definition: ribosum.cc:334
double basematch_score_corrected(char i, char j) const
Get corrected score for a base match.
Definition: ribosum.cc:325
double base_unpaired_prob(char i) const
probability that a nucleotide/base occurs unpaired
void write_ICC_code(std::ostream &out, const std::string &ribname) const
Definition: ribosum.cc:259
double basepair_prob(char i, char j) const
Definition: ribosum.hh:240
const matrix_t & get_base_probs() const
Definition: ribosum.hh:223
Represents ribosum similarity matrices.
Definition: ribosum.hh:24
char_alphabet_type char_basename_alphabet_
alphabet of base names as characters
Definition: ribosum.hh:116
std::ostream & write_matrix(std::ostream &out, const matrix_t &mat, const Alphabet< std::string, N > &alph) const
Definition: ribosum.hh:428
Alphabet< std::string, 4 > base_alphabet_type
type of alphabet
Definition: ribosum.hh:27
virtual ~Ribosum()
virtual destructor
Definition: ribosum.cc:39
matrix_t am_
16x16 matrix
Definition: ribosum.hh:109
void read_ribosum(std::istream &in)
Definition: ribosum.cc:42
std::istream & read_matrix(std::istream &in, matrix_t &mat, const Alphabet< std::string, N > &names) const
Definition: ribosum.hh:384
base_alphabet_type basename_alphabet_
alphabet of base names
Definition: ribosum.hh:112
void set_basename_alphabet(const std::array< std::string, 4 > &a)
Definition: ribosum.hh:167
Alphabet< char, 4 > char_alphabet_type
type of alphabet
Definition: ribosum.hh:28
char_alphabet_type make_char_alphabet() const
transform the basename alphabet to alphabet over characters
Definition: ribosum.cc:88
const std::string & get_name() const
Definition: ribosum.hh:76
std::string name_
name of ribosum
Definition: ribosum.hh:107
Ribosum()
Construct empty.
Definition: ribosum.cc:31
Matrix< double > matrix_t
type of a matrix
Definition: ribosum.hh:26
friend std::ostream & operator<<(std::ostream &out, const Ribosum &ribosum)
Definition: ribosum.cc:106
double arcmatch_score(char i, char j, char k, char l) const
Get arc match score.
Definition: ribosum.hh:99
Alphabet< std::string, 16 > arc_alphabet_type
type of alphabet
Definition: ribosum.hh:29
const char_alphabet_type & alphabet() const
Definition: ribosum.hh:69
arc_alphabet_type arcname_alphabet_
alphabet of arc names
Definition: ribosum.hh:113
double basematch_score(char i, char j) const
Get base match score.
Definition: ribosum.hh:86
const matrix_t & get_basematch_scores() const
Get base match scores.
Definition: ribosum.hh:46
void set_arcname_alphabet(const std::array< std::string, 16 > &a)
Definition: ribosum.hh:179
matrix_t bm_
scores for base matches, 4x4 matrix
Definition: ribosum.hh:108
const matrix_t & get_arcmatch_scores() const
Definition: ribosum.hh:55
const base_alphabet_type & string_alphabet() const
Definition: ribosum.hh:62
Definition: aligner.cc:15