1 #ifndef LOCARNA_MULTIPLE_ALIGNMENT_HH
2 #define LOCARNA_MULTIPLE_ALIGNMENT_HH
15 #include "scoring_fwd.hh"
16 #include "sequence_annotation.hh"
26 template <
class T,
size_t N>
100 return annotation_tags.size();
114 typedef std::pair<pos_type, pos_type>
135 : name_(
name), description_(
""), seq_(
seq) {}
146 const std::string &
seq)
239 std::string description_;
252 using value_type = char;
261 : ma_(ma), col_index_(col_index) {
262 assert(1 <= col_index);
263 assert(col_index <= ma.
length());
295 bool ret = this->
size() == ac.
size();
297 ret = (this->ma_.
seqentry(i).
seq()[this->col_index_] ==
312 return !(*
this == ac);
331 return col_[row_index_];
336 return row_index_ != it.row_index_ || col_!=it.col_;
343 : col_(col), row_index_(row_index) {
446 const std::string &nameB,
447 const std::string &alistringA,
448 const std::string &alistringB);
464 bool only_local =
false,
465 bool special_gap_symbols =
false);
511 return alig_.empty();
543 return annotations_.find(annotype) != annotations_.end();
562 return alig_.empty() ? 0 : alig_[0].seq().length();
565 using value_type = SeqEntry;
574 return alig_.begin();
592 contains(
const std::string &name)
const;
604 index(
const std::string &name)
const {
605 str2idx_map_t::const_iterator it = name2idx_.find(name);
606 assert(it != name2idx_.end());
630 return alig_[
index(name)];
784 write(std::ostream &out,
803 write(std::ostream &out,
822 const std::string &name,
823 const std::string &sequence,
824 size_t namewidth)
const;
840 write(std::ostream &out,
881 bool special_gap_symbols);
889 class annotation_tags_t :
public std::map< FormatType, std::map< AnnoType, std::string> > {
896 typedef std::map<std::string, size_type> str2idx_map_t;
899 typedef std::map<AnnoType, SequenceAnnotation> annotation_map_t;
905 static annotation_tags_t annotation_tags;
908 std::vector<SeqEntry> alig_;
911 annotation_map_t annotations_;
917 str2idx_map_t name2idx_;
934 deviation2(
const string1 &a1,
937 const string1 &ref2);
954 pairwise_match_score(
const SeqEntry &a1,
956 const SeqEntry &ref1,
957 const SeqEntry &ref2,
958 bool score_common_gaps);
970 static std::vector<int>
971 match_vector(
const string1 &s,
const string1 &t);
983 static std::vector<int>
984 match_vector2(
const string1 &s,
const string1 &t);
995 count_matches(
const SeqEntry &a1,
const SeqEntry &a2);
1010 count_exclusive_matches(
const SeqEntry &a1,
1012 const SeqEntry &ref1,
1013 const SeqEntry &ref2);
1033 pairwise_deviation_score(
const SeqEntry &a1,
1035 const SeqEntry &ref1,
1036 const SeqEntry &ref2);
1040 create_name2idx_map();
1051 read_clustallike(std::istream &in,
FormatType format);
1060 read_stockholm(std::istream &in);
1075 read_clustalw(std::istream &in);
1097 read_fasta(std::istream &in);
1107 operator<<(std::ostream &out,
const MultipleAlignment &ma);
1113 return std::all_of(alig_.begin(), alig_.end(),
1114 [&a](
const auto &row) {
1115 return std::all_of(row.seq().begin(), row.seq().end(),
1116 [&a](const auto &c) {
Definition: alignment.hh:73
Represents a structure-annotated sequence alignment.
Definition: alignment.hh:83
Specifies an alphabet of static size.
Definition: alphabet.hh:21
const iterator
Definition: multiple_alignment.hh:320
read only proxy class representing a column of the alignment
Definition: multiple_alignment.hh:250
bool operator!=(const AliColumn &ac) const
Test inequality.
Definition: multiple_alignment.hh:311
bool operator==(const AliColumn &ac) const
Test equality.
Definition: multiple_alignment.hh:294
auto end() const
end iterator (always const)
Definition: multiple_alignment.hh:369
const char & operator[](size_type row_index) const
element access
Definition: multiple_alignment.hh:273
auto begin() const
begin iterator (always const)
Definition: multiple_alignment.hh:361
size_type size() const
Size / Number of rows.
Definition: multiple_alignment.hh:282
AliColumn(const MultipleAlignment &ma, size_type col_index)
Construct from multiple alignment column.
Definition: multiple_alignment.hh:260
A row in a multiple alignment.
Definition: multiple_alignment.hh:111
pos_pair_t col_to_pos(pos_type col) const
Definition: multiple_alignment.cc:540
std::pair< pos_type, pos_type > pos_pair_t
pair of positions
Definition: multiple_alignment.hh:115
SeqEntry(const std::string &name, const std::string &description, const string1 &seq)
Construct from strings name, description and 1-based string seq.
Definition: multiple_alignment.hh:157
void push_back(char c)
append character to sequence
Definition: multiple_alignment.hh:227
const std::string & name() const
(read-only) access to name
Definition: multiple_alignment.hh:166
void set_seq(const string1 &seq)
write access to seq
Definition: multiple_alignment.hh:233
pos_type pos_to_col(pos_type pos) const
map sequence position -> alignment column.
Definition: multiple_alignment.cc:520
SeqEntry(const std::string &name, const std::string &seq)
Construct from strings name and seq.
Definition: multiple_alignment.hh:124
void reverse()
reverse sequence
Definition: multiple_alignment.hh:218
size_type length_wogaps() const
length without gaps
Definition: multiple_alignment.cc:509
SeqEntry(const std::string &name, const string1 &seq)
Construct from strings name and 1-based string seq.
Definition: multiple_alignment.hh:134
SeqEntry(const std::string &name, const std::string &description, const std::string &seq)
Construct from strings name, description and seq.
Definition: multiple_alignment.hh:144
const std::string & description() const
(read-only) access to description
Definition: multiple_alignment.hh:172
const string1 & seq() const
(read-only) access to seq
Definition: multiple_alignment.hh:178
Represents a multiple alignment.
Definition: multiple_alignment.hh:65
void write_debug(std::ostream &out=std::cout) const
Print contents of object to stream.
Definition: multiple_alignment.cc:957
void operator+=(const AliColumn &c)
Append a column.
Definition: multiple_alignment.cc:1117
size_type index(const std::string &name) const
Access index by name.
Definition: multiple_alignment.hh:604
MultipleAlignment(const MultipleAlignment &ma)=default
Copy construct.
void set_annotation(const AnnoType &annotype, const SequenceAnnotation &annotation)
Write access to annotation.
Definition: multiple_alignment.hh:531
size_type num_of_rows() const
Number of rows of multiple aligment.
Definition: multiple_alignment.hh:497
size_type deviation(const MultipleAlignment &ma) const
Deviation of a multiple alignment from a reference alignment.
Definition: multiple_alignment.cc:668
void append(const SeqEntry &seqentry)
Append sequence entry.
Definition: multiple_alignment.cc:1102
void prepend(const SeqEntry &seqentry)
Prepend sequence entry.
Definition: multiple_alignment.cc:1109
FormatType
file format type for multiple alignments
Definition: multiple_alignment.hh:70
@ STOCKHOLM
stockholm file format
@ CLUSTAL
(extended) clustal file format
bool empty() const
Emptiness check.
Definition: multiple_alignment.hh:510
AliColumn column(size_type col_index) const
Access alignment column.
Definition: multiple_alignment.hh:717
std::vector< SeqEntry >::const_iterator const_iterator
const iterator of sequence entries
Definition: multiple_alignment.hh:380
double avg_deviation_score(const MultipleAlignment &ma) const
Average deviation score.
Definition: multiple_alignment.cc:733
const_iterator end() const
End for read-only traversal of name/sequence pairs.
Definition: multiple_alignment.hh:582
std::vector< SeqEntry >::iterator iterator
iterator of sequence entries
Definition: multiple_alignment.hh:382
bool contains(const std::string &name) const
Test whether name exists.
Definition: multiple_alignment.cc:613
std::ostream & write(std::ostream &out, FormatType format=MultipleAlignment::FormatType::CLUSTAL) const
Write alignment to stream.
Definition: multiple_alignment.cc:1088
static size_t num_of_annotypes()
number of annotation types
Definition: multiple_alignment.hh:99
std::string consensus_sequence() const
Consensus sequence of multiple alignment.
Definition: multiple_alignment.cc:964
AnnoType
type of sequence annotation. enumerates legal annotation types
Definition: multiple_alignment.hh:81
@ anchors
anchor annotation (anchor constraints)
@ consensus_structure
consensus structure annotation (consensus structure)
bool has_annotation(const AnnoType &annotype) const
Definition: multiple_alignment.hh:542
const SeqEntry & seqentry(size_type index) const
Access name/sequence pair by index.
Definition: multiple_alignment.hh:618
bool checkAlphabet(const Alphabet< char, N > &alphabet) const
check character constraints
Definition: multiple_alignment.hh:1112
double sps(const MultipleAlignment &ma, bool compalign=true) const
Sum-of-pairs score between a multiple alignment and a reference alignment.
Definition: multiple_alignment.cc:690
void init(const AlignmentEdges &edges, const Sequence &seqA, const Sequence &seqB, bool special_gap_symbols)
Initialize from alignment edges and sequences.
Definition: multiple_alignment.cc:186
MultipleAlignment(MultipleAlignment &&ma)=default
Move construct.
pos_type length() const
Length of multiple aligment.
Definition: multiple_alignment.hh:561
bool is_proper() const
Test whether alignment is proper.
Definition: multiple_alignment.cc:597
const SequenceAnnotation & annotation(const AnnoType &annotype) const
Read access of annotation by prefix.
Definition: multiple_alignment.cc:587
static const std::vector< AnnoType > AnnoTypes
collection of the format types
Definition: multiple_alignment.hh:94
virtual ~MultipleAlignment()
virtual destructor
Definition: multiple_alignment.cc:250
std::ostream & write_name_sequence_line(std::ostream &out, const std::string &name, const std::string &sequence, size_t namewidth) const
Write formatted line of name and sequence.
Definition: multiple_alignment.cc:995
void reverse()
reverse the multiple alignment
Definition: multiple_alignment.cc:1095
MultipleAlignment()
Construct empty.
Definition: multiple_alignment.cc:64
MultipleAlignment & operator=(const MultipleAlignment &ma)=default
Copy assignment.
static const std::vector< FormatType > FormatTypes
collection of the format types
Definition: multiple_alignment.hh:77
void normalize_rna_symbols()
normalize rna symbols
Definition: multiple_alignment.cc:577
const SeqEntry & seqentry(const std::string &name) const
Access name/sequence pair by name.
Definition: multiple_alignment.hh:629
const_iterator begin() const
Begin for read-only traversal of name/sequence pairs.
Definition: multiple_alignment.hh:573
double cmfinder_realignment_score(const MultipleAlignment &ma) const
Cmfinder realignment score of a multiple alignment to a reference alignment.
Definition: multiple_alignment.cc:812
Annotation of a sequence.
Definition: sequence_annotation.hh:24
"Sequence View" of multiple alignment as array of column vectors
Definition: sequence.hh:17
A simple 1-based string.
Definition: string1.hh:21
void push_back(char c)
push back character
Definition: string1.hh:126
void reverse()
reverse string
Definition: string1.hh:116
Definition: aligner.cc:15
size_type pos_type
type of a sequence position
Definition: aux.hh:126
std::ostream & operator<<(std::ostream &out, const AlignerRestriction &r)
Definition: aligner_restriction.hh:135
size_t size_type
general size type
Definition: aux.hh:120