1 #ifndef LOCARNA_SCORING_HH
2 #define LOCARNA_SCORING_HH
13 #include "scoring_fwd.hh"
15 #include "sequence.hh"
16 #include "arc_matches.hh"
17 #include "named_arguments.hh"
110 DEFINE_NAMED_ARG_FEATURE(exp_probA,
double);
112 DEFINE_NAMED_ARG_FEATURE(exp_probB,
double);
143 using valid_args = std::tuple<
190 template <
typename... Args>
195 "Invalid type in named arguments pack." );
197 auto args = std::make_tuple(argpack...);
200 exp_probA_ = get_named_arg<exp_probA>(args);
201 exp_probB_ = get_named_arg<exp_probB>(args);
204 match_ = get_named_arg_opt<match>(args);
205 mismatch_ = get_named_arg_opt<mismatch>(args);
206 indel_ = get_named_arg_opt<indel>(args);
207 indel_loop_ = get_named_arg_opt<indel_loop>(args);
208 indel_opening_ = get_named_arg_opt<indel_opening>(args);
209 indel_opening_loop_ = get_named_arg_opt<indel_opening_loop>(args);
210 ribosum_ = get_named_arg_opt<ribosum>(args);
211 ribofit_ = get_named_arg_opt<ribofit>(args);
212 unpaired_penalty_ = get_named_arg_opt<unpaired_penalty>(args);
213 struct_weight_ = get_named_arg_opt<struct_weight>(args);
214 tau_factor_ = get_named_arg_opt<tau_factor>(args);
215 exclusion_ = get_named_arg_opt<exclusion>(args);
216 temperature_alipf_ = get_named_arg_opt<temperature_alipf>(args);
217 stacking_ = get_named_arg_opt<stacking>(args);
218 new_stacking_ = get_named_arg_opt<new_stacking>(args);
219 mea_scoring_ = get_named_arg_opt<mea_scoring>(args);
220 mea_alpha_ = get_named_arg_opt<mea_alpha>(args);
221 mea_beta_ = get_named_arg_opt<mea_beta>(args);
222 mea_gamma_ = get_named_arg_opt<mea_gamma>(args);
223 probability_scale_ = get_named_arg_opt<probability_scale>(args);
363 std::vector<score_t> weightsA;
364 std::vector<score_t> weightsB;
366 std::vector<score_t> stack_weightsA;
368 std::vector<score_t> stack_weightsB;
374 precompute_sequence_identities();
386 return (
score_t)((d < 0) ? (d - 0.5) : (d + 0.5));
399 sigma_(
int i,
int j)
const;
430 std::vector<score_t> &weights,
431 std::vector<score_t> &stack_weights);
533 bool stacked =
false)
const;
551 template <
bool gapAorB>
576 template <
bool gapInA>
612 return params->exclusion_;
618 return params->indel_opening_;
629 return params->indel_opening_loop_;
692 bool stacked)
const {
697 ->explicit_scores()) {
699 <<
"ERROR sparse explicit scores is not supported!"
704 if (!
params->mea_scoring_) {
708 gapX<isA>(arcX.
left()) +
709 gapX<isA>(arcX.
right())) +
712 (stacked ? (isA ? stack_weightsA[arcX.
idx()]
713 : stack_weightsB[arcX.
idx()])
714 : (isA ? weightsA[arcX.
idx()] : weightsB[arcX.
idx()]));
716 std::cerr <<
"ERROR sparse mea_scoring is not supported!"
718 assert(!
params->mea_scoring_);
726 Scoring::gapX<true>(
size_type alignedToGap)
const {
727 return gapA(alignedToGap);
733 Scoring::gapX<false>(
size_type alignedToGap)
const {
734 return gapB(alignedToGap);
747 using pf_score_t = T;
787 return exp_sigma_tab(i, j);
799 return boltzmann_weight(
arcmatch(am));
812 return exp_gapcost_tabA[posA];
826 return exp_gapcost_tabB[posB];
832 return exp_indel_opening_score;
838 return exp_indel_opening_loop_score;
858 boltzmann_weight(
score_t s)
const {
859 return std::exp(s / (pf_score_t)
params->temperature_alipf_);
869 pf_score_t exp_indel_opening_score;
871 pf_score_t exp_indel_opening_loop_score;
874 std::vector<pf_score_t>
876 std::vector<pf_score_t>
881 template <
typename T>
889 :
Scoring(seqA,seqB,rna_dataA,rna_dataB,
890 arc_matches,match_probs,params) {
892 exp_indel_opening_score = boltzmann_weight(
params.indel_opening_);
893 exp_indel_opening_loop_score =
894 boltzmann_weight(
params.indel_opening_loop_);
899 template <
typename T>
905 exp_sigma_tab.resize(lenA + 1, lenB + 1);
909 exp_sigma_tab(i, j) = boltzmann_weight(sigma_tab(i, j));
914 template <
typename T>
921 exp_gapcost_tabA.resize(lenA + 1);
922 exp_gapcost_tabB.resize(lenB + 1);
924 for (
size_type i = 1; i < lenA + 1; i++) {
925 exp_gapcost_tabA[i] = boltzmann_weight(gapcost_tabA[i]);
928 for (
size_type i = 1; i < lenB + 1; i++) {
929 exp_gapcost_tabB[i] = boltzmann_weight(gapcost_tabB[i]);
Represents a match of two base pairs (arc match)
Definition: arc_matches.hh:35
Maintains the relevant arc matches and their scores.
Definition: arc_matches.hh:116
bool explicit_scores() const
Definition: arc_matches.hh:362
Represents a base pair.
Definition: basepairs.hh:39
size_t right() const
Definition: basepairs.hh:77
size_t idx() const
Definition: basepairs.hh:87
size_t left() const
Definition: basepairs.hh:67
Describes sequence and structure ensemble of an RNA.
Definition: basepairs.hh:108
Provide match probabilities.
Definition: edge_probs.hh:91
pos_type length() const
Length of multiple aligment.
Definition: multiple_alignment.hh:561
Definition: scoring.hh:745
std::vector< pf_score_t > PFScoreVector
Vector of partition functions.
Definition: scoring.hh:750
pf_score_t exp_indel_opening_loop() const
exp of cost to begin a new indel in loops
Definition: scoring.hh:837
pf_score_t exp_basematch(size_type i, size_type j) const
Boltzmann weight of score of a base match (without structure)
Definition: scoring.hh:786
pf_score_t exp_arcmatch(const ArcMatch &am) const
Boltzmann weight of score of arc match.
Definition: scoring.hh:798
void precompute_exp_gapcost()
Precompute the tables for Boltzmann weights of gapcost.
Definition: scoring.hh:916
pf_score_t exp_gapA(size_type posA) const
Boltzmann weight of score of deletion.
Definition: scoring.hh:810
PFScoring(const Sequence &seqA, const Sequence &seqB, const RnaData &rna_dataA, const RnaData &rna_dataB, const ArcMatches &arc_matches, const MatchProbs *match_probs, const ScoringParams ¶ms)
construct scoring object
Definition: scoring.hh:882
pf_score_t exp_indel_opening() const
exp of cost to begin a new indel
Definition: scoring.hh:831
pf_score_t exp_gapB(size_type posB) const
Boltzmann weight of score of insertion.
Definition: scoring.hh:823
void precompute_exp_sigma()
Precompute all Boltzmann weights of base similarities.
Definition: scoring.hh:901
Family of Ribofit matrices.
Definition: ribofit.hh:25
Represents ribosum similarity matrices including raw frequencies.
Definition: ribosum.hh:190
represent sparsified data of RNA ensemble
Definition: rna_data.hh:44
Parameters for scoring.
Definition: scoring.hh:51
ScoringParams(Args... argpack)
Definition: scoring.hh:191
DEFINE_NAMED_ARG_DEFAULT_FEATURE(stacking, bool, false)
turn on/off stacking terms
DEFINE_NAMED_ARG_DEFAULT_FEATURE(indel_loop, score_t, -200)
cost per indel for loops (for linear or affine gap cost).
DEFINE_NAMED_ARG_DEFAULT_FEATURE(indel_opening_loop, score_t, -800)
DEFINE_NAMED_ARG_DEFAULT_FEATURE(indel_opening, score_t, -750)
cost per gap (for affine gap-cost). Use affine gap cost if non-zero.
DEFINE_NAMED_ARG_DEFAULT_FEATURE(exclusion, score_t, 0)
cost of one exclusion.
DEFINE_NAMED_ARG_DEFAULT_FEATURE(mea_gamma, score_t, 100)
weight for mea contribution "consensus"
DEFINE_NAMED_ARG_DEFAULT_FEATURE(indel, score_t, -150)
cost per indel (for linear or affine gap cost).
DEFINE_NAMED_ARG_DEFAULT_FEATURE(match, score_t, 50)
DEFINE_NAMED_ARG_DEFAULT_FEATURE(ribofit, const Ribofit *, nullptr)
DEFINE_NAMED_ARG_DEFAULT_FEATURE(mea_beta, score_t, 200)
weight for mea contribution "structure"
DEFINE_NAMED_ARG_DEFAULT_FEATURE(probability_scale, score_t, 10000)
DEFINE_NAMED_ARG_DEFAULT_FEATURE(unpaired_penalty, score_t, 0)
penalty/cost for unpaired bases matched/mismatched/gapped
DEFINE_NAMED_ARG_DEFAULT_FEATURE(struct_weight, score_t, 200)
DEFINE_NAMED_ARG_DEFAULT_FEATURE(new_stacking, bool, false)
turn on/off new stacking terms
DEFINE_NAMED_ARG_DEFAULT_FEATURE(mea_scoring, bool, false)
turn on/off mea scoring
DEFINE_NAMED_ARG_DEFAULT_FEATURE(mea_alpha, score_t, 0)
weight for mea contribution "unstructured"
DEFINE_NAMED_ARG_DEFAULT_FEATURE(ribosum, const RibosumFreq *, nullptr)
DEFINE_NAMED_ARG_DEFAULT_FEATURE(tau_factor, score_t, 50)
DEFINE_NAMED_ARG_DEFAULT_FEATURE(mismatch, score_t, 0)
constant cost of a base mismatch
Provides methods for the scoring of alignments.
Definition: scoring.hh:271
score_t gapB(size_type posB) const
Definition: scoring.hh:603
score_t lambda_
Definition: scoring.hh:291
double ribosum_arcmatch_prob(const Arc &arcA, const Arc &arcB) const
Definition: scoring.cc:318
const RnaData & rna_dataA
rna data for RNA A
Definition: scoring.hh:282
score_t indel_opening_loop() const
cost to begin a new indel
Definition: scoring.hh:628
Scoring(const Sequence &seqA, const Sequence &seqB, const RnaData &rna_dataA, const RnaData &rna_dataB, const ArcMatches &arc_matches, const MatchProbs *match_probs, const ScoringParams ¶ms)
construct scoring object
Definition: scoring.cc:28
const ScoringParams * params
a collection of parameters for scoring
Definition: scoring.hh:276
bool is_stackable_am(const ArcMatch &am) const
Is arc match stackable.
Definition: scoring.cc:567
const MatchProbs * match_probs
base match probabilities
Definition: scoring.hh:280
void precompute_sigma()
Precompute all base similarities.
Definition: scoring.cc:112
score_t arcmatch_stacked(const ArcMatch &am) const
Score of stacked arc match.
Definition: scoring.hh:563
const Sequence & seqA
sequence A
Definition: scoring.hh:284
bool stacking() const
Query stacking flag.
Definition: scoring.hh:653
BasePairs__Arc Arc
arc
Definition: scoring.hh:273
score_t gapA(size_type posA) const
Definition: scoring.hh:589
void subtract(std::vector< score_t > &v, score_t x) const
subtract from each element of a score_t vector v a value x
Definition: scoring.cc:55
score_t sigma_(int i, int j) const
Compute base similarity.
Definition: scoring.cc:141
void precompute_gapcost()
Precompute the tables for gapcost.
Definition: scoring.cc:272
score_t arcmatch(const ArcMatch &am, bool stacked=false) const
Score of arc match, support explicit arc match scores.
Definition: scoring.cc:541
score_t riboX_arcmatch_score(const Arc &arcA, const Arc &arcB) const
ribofit or ribosum arcmatch score contribution
Definition: scoring.cc:369
double prob_exp(size_type len) const
Expected base pair probability.
Matrix< score_t > sigma_tab
precomputed table of base match similarities
Definition: scoring.hh:358
score_t lambda() const
Get factor lambda for normalized alignment.
Definition: scoring.hh:349
score_t arcDel(const BasePairs__Arc &arc, bool stacked=false) const
Very basic interface, score of aligning a basepair to gap.
Definition: scoring.hh:691
std::vector< score_t > gapcost_tabB
table for gapcost in B
Definition: scoring.hh:361
bool is_stackable_arcB(const Arc &a) const
Is arc of B stackable.
Definition: scoring.cc:562
score_t loop_indel_score(const score_t score) const
multiply an score by the ratio of indel_loop/indel
Definition: scoring.hh:623
const Sequence & seqB
sequence B
Definition: scoring.hh:285
void precompute_weights()
Precompute weights/stacked weights for all arcs in A and B.
Definition: scoring.cc:240
score_t exclusion() const
cost of an exclusion
Definition: scoring.hh:611
score_t gapX(size_type alignedToGap) const
const RnaData & rna_dataB
rna data for RNA B
Definition: scoring.hh:283
const ArcMatches * arc_matches_
arc matches
Definition: scoring.hh:278
score_t round2score(double d) const
Round a double to score_t.
Definition: scoring.hh:385
bool is_stackable_arcA(const Arc &a) const
Is arc of A stackable.
Definition: scoring.cc:557
void modify_by_parameter(score_t lambda)
modify scoring by a parameter lambda.
Definition: scoring.cc:77
score_t indel_opening() const
cost to begin a new indel
Definition: scoring.hh:617
std::vector< score_t > gapcost_tabA
table for gapcost in A
Definition: scoring.hh:360
Matrix< size_t > identity
sequence identities in percent
Definition: scoring.hh:371
void apply_unpaired_penalty()
Definition: scoring.cc:65
score_t probToWeight(double p, double prob_exp) const
convert probability to weight for scoring
Definition: scoring.cc:251
const ArcMatches * arc_matches() const
Definition: scoring.hh:538
score_t basematch(size_type i, size_type j) const
Score of a match of bases (without structure)
Definition: scoring.hh:489
"Sequence View" of multiple alignment as array of column vectors
Definition: sequence.hh:17
Definition: aligner.cc:15
std::vector< infty_score_t > ScoreVector
matrix of scores supporting infinity
Definition: scoring.hh:31
Matrix< double > ProbMatrix
matrix for storing probabilities
Definition: aligner_p.hh:27
Matrix< infty_score_t > ScoreMatrix
matrix of scores supporting infinity
Definition: scoring.hh:37
size_t size_type
general size type
Definition: aux.hh:120
long int score_t
type of the locarna score as defined by the class Scoring
Definition: scoring_fwd.hh:13
Definition: named_arguments.hh:56