Documentation ¶
Overview ¶
Package nucleic provides support for manipulation of single nucleic acid sequences with and without quality data.
Two basic nucleic acid sequence types are provided, Seq and QSeq. Interfaces for more complex sequence types are also defined.
Index ¶
- Variables
- type Aligned
- type AlignedAppender
- type Consensifyer
- type Extracter
- type Getter
- type GetterAppender
- type QSeq
- func (self *QSeq) Alphabet() alphabet.Alphabet
- func (self *QSeq) AppendLetters(a ...alphabet.Letter) (err error)
- func (self *QSeq) AppendQLetters(a ...alphabet.QLetter) (err error)
- func (self *QSeq) At(pos seq.Position) alphabet.QLetter
- func (self *QSeq) Circular(c bool)
- func (self *QSeq) Compose(f feat.FeatureSet) (err error)
- func (self *QSeq) Copy() seq.Sequence
- func (self *QSeq) Count() int
- func (self *QSeq) Description() *string
- func (self *QSeq) EAt(pos seq.Position) float64
- func (self *QSeq) Encoding() alphabet.Encoding
- func (self *QSeq) End() int
- func (self *QSeq) IsCircular() bool
- func (self *QSeq) Join(p *QSeq, where int) (err error)
- func (self *QSeq) Len() int
- func (self *QSeq) Location() *string
- func (self *QSeq) Moltype() bio.Moltype
- func (self *QSeq) Name() *string
- func (self *QSeq) Nucleic()
- func (self *QSeq) Offset(o int)
- func (self *QSeq) QDecode(l byte) alphabet.Qphred
- func (self *QSeq) QEncode(pos seq.Position) byte
- func (self *QSeq) Raw() interface{}
- func (self *QSeq) RevComp()
- func (self *QSeq) Reverse()
- func (self *QSeq) Set(pos seq.Position, l alphabet.QLetter)
- func (self *QSeq) SetE(pos seq.Position, e float64)
- func (self *QSeq) SetEncoding(e alphabet.Encoding)
- func (self *QSeq) Start() int
- func (self *QSeq) Stitch(f feat.FeatureSet) (err error)
- func (self *QSeq) String() string
- func (self *QSeq) Subseq(start int, end int) (sub seq.Sequence, err error)
- func (self *QSeq) Truncate(start int, end int) (err error)
- func (self *QSeq) Validate() (bool, int)
- type Quality
- type Seq
- func (self *Seq) Alphabet() alphabet.Alphabet
- func (self *Seq) AppendLetters(a ...alphabet.Letter) (err error)
- func (self *Seq) AppendQLetters(a ...alphabet.QLetter) (err error)
- func (self *Seq) At(pos seq.Position) alphabet.QLetter
- func (self *Seq) Circular(c bool)
- func (self *Seq) Compose(f feat.FeatureSet) (err error)
- func (self *Seq) Copy() seq.Sequence
- func (self *Seq) Count() int
- func (self *Seq) Description() *string
- func (self *Seq) End() int
- func (self *Seq) IsCircular() bool
- func (self *Seq) Join(p *Seq, where int) (err error)
- func (self *Seq) Len() int
- func (self *Seq) Location() *string
- func (self *Seq) Moltype() bio.Moltype
- func (self *Seq) Name() *string
- func (self *Seq) Nucleic()
- func (self *Seq) Offset(o int)
- func (self *Seq) Raw() interface{}
- func (self *Seq) RevComp()
- func (self *Seq) Reverse()
- func (self *Seq) Set(pos seq.Position, l alphabet.QLetter)
- func (self *Seq) Start() int
- func (self *Seq) Stitch(f feat.FeatureSet) (err error)
- func (self *Seq) String() string
- func (self *Seq) Subseq(start int, end int) (sub seq.Sequence, err error)
- func (self *Seq) Truncate(start int, end int) (err error)
- func (self *Seq) Validate() (bool, int)
- type Sequence
- type Strand
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var Consensify = func(a Aligned, pos int, fill bool) alphabet.QLetter { alpha := a.Alphabet() w := make([]int, alpha.Len()) c := a.Column(pos, fill) for _, l := range c { if alpha.IsValid(l) { w[alpha.IndexOf(l)]++ } } var max, maxi int for i, v := range w { if v > max { max, maxi = v, i } } return alphabet.QLetter{ L: alpha.Letter(maxi), Q: alphabet.Ephred(1 - (float64(max) / float64(len(c)))), } }
The default Consensifyer function.
var DefaultQphred alphabet.Qphred = 40
The default value for Qphred scores from non-quality sequences.
var FloatTolerance float64 = 1e-10
Tolerance on float comparison for QConsensify
var LowQFilter = func(s seq.Sequence, _ alphabet.Letter) alphabet.Letter { return s.(*QSeq).alphabet.Ambiguous() }
The default LowQFilter function for QSeq.
var QConsensify = func(a Aligned, pos int, fill bool) alphabet.QLetter { alpha := a.Alphabet() w := make([]float64, alpha.Len()) for i := range w { w[i] = 1 } others := float64(alpha.Len() - 1) c := a.ColumnQL(pos, fill) for _, l := range c { if alpha.IsValid(l.L) { i, alt := alpha.IndexOf(l.L), l.Q.ProbE() p := (1 - alt) alt /= others for b := range w { if i == b { w[b] *= p } else { w[b] *= alt } } } } var ( max = 0. sum float64 best, count int ) for _, p := range w { sum += p } for i, v := range w { if v /= sum; v > max { max, best = v, i count = 0 } if v == max || math.Abs(max-v) < FloatTolerance { count++ } } if count > 1 { return alphabet.QLetter{ L: alpha.Ambiguous(), Q: 0, } } return alphabet.QLetter{ L: alpha.Letter(best), Q: alphabet.Ephred(1 - max), } }
A default Consensifyer function that takes letter quality into account. http://staden.sourceforge.net/manual/gap4_unix_120.html
var QStringify = func(s seq.Polymer) string { t := s.(*QSeq) gap := t.Alphabet().Gap() cs := make([]alphabet.Letter, 0, len(t.S)) for _, ql := range t.S { if alphabet.Qphred(ql.Q) > t.Threshold || ql.L == gap { cs = append(cs, ql.L) } else { cs = append(cs, t.LowQFilter(t, ql.L)) } } return alphabet.Letters(cs).String() }
The default Stringify function for QSeq.
var Stringify = func(s seq.Polymer) string { return alphabet.Letters(s.(*Seq).S).String() }
The default Stringify function for Seq.
Functions ¶
This section is empty.
Types ¶
type Aligned ¶
type Aligned interface { Sequence Column(pos int, fill bool) []alphabet.Letter ColumnQL(pos int, fill bool) []alphabet.QLetter Consensus(fill bool) *QSeq }
Aligned describes the interface for aligned multiple sequences.
type AlignedAppender ¶
type AlignedAppender interface { Aligned AppendColumns(a ...[]alphabet.QLetter) (err error) AppendEach(a [][]alphabet.QLetter) (err error) }
An AlignedAppenderis a multiple sequence alignment that can append letters.
type Consensifyer ¶
Consensifyer is a function type that returns the consensus letter for a column of an alignment.
type GetterAppender ¶
GetterAppender is a type for sets of sequences or aligned multiple sequences that can append letters to individual or grouped seqeunces.
type QSeq ¶
type QSeq struct { ID string Desc string Loc string S []alphabet.QLetter Strand Strand Threshold alphabet.Qphred // Threshold for returning valid letter. LowQFilter seq.Filter // How to represent below threshold letter. Stringify seq.Stringify // Function allowing user specified string representation. Meta interface{} // No operation implicitly copies or changes the contents of Meta. // contains filtered or unexported fields }
QSeq is a basic nucleic acid sequence with Phred quality scores.
func NewQSeq ¶
func NewQSeq(id string, ql []alphabet.QLetter, alpha alphabet.Nucleic, encode alphabet.Encoding) *QSeq
Create a new QSeq with the given id, letter sequence, alphabet and quality encoding.
Example ¶
d := NewQSeq("example DNA", []alphabet.QLetter{{'A', 40}, {'C', 39}, {'G', 40}, {'C', 38}, {'T', 35}, {'G', 20}}, alphabet.DNA, alphabet.Sanger) fmt.Println(d, d.Moltype())
Output: ACGCTG DNA
func (*QSeq) AppendLetters ¶
Append QLetters to the sequence, the DefaultQphred value is used for quality scores.
func (*QSeq) AppendQLetters ¶
Append letters with quality scores to the seq.
func (*QSeq) Compose ¶
func (self *QSeq) Compose(f feat.FeatureSet) (err error)
Join segments of the sequence, returning any error.
func (*QSeq) Description ¶
Description returns a pointer to the Desc string of the sequence.
func (*QSeq) IsCircular ¶
Return whether the sequence is circular.
func (*QSeq) QDecode ¶
Decode a quality letter to a phred score based on the sequence encoding setting.
func (*QSeq) QEncode ¶
Encode the quality at position pos to a letter based on the sequence encoding setting.
func (*QSeq) Raw ¶
func (self *QSeq) Raw() interface{}
Raw returns a pointer to the underlying []Qphred slice.
func (*QSeq) SetEncoding ¶
Set the quality encoding type to e.
func (*QSeq) Stitch ¶
func (self *QSeq) Stitch(f feat.FeatureSet) (err error)
Join sequentially order disjunct segments of the sequence, returning any error.
func (*QSeq) String ¶
Return a string representation of the sequence. Representation is determined by the Stringify field.
func (*QSeq) Truncate ¶
Truncate the sequenc from start to end, wrapping if the sequence is circular.
func (*QSeq) Validate ¶
Validate the letters of the sequence according to the specified alphabet.
Example ¶
r := NewQSeq("example RNA", []alphabet.QLetter{{'A', 40}, {'C', 39}, {'G', 40}, {'C', 38}, {'T', 35}, {'G', 20}}, alphabet.RNA, alphabet.Sanger) fmt.Println(r, r.Moltype()) if ok, pos := r.Validate(); ok { fmt.Println("valid RNA") } else { fmt.Println(strings.Repeat(" ", pos-1), "^ first invalid RNA position") }
Output: ACGCTG RNA ^ first invalid RNA position
type Seq ¶
type Seq struct { ID string Desc string Loc string S []alphabet.Letter Strand Strand Stringify seq.Stringify // Function allowing user specified string representation. Meta interface{} // No operation implicitly copies or changes the contents of Meta. // contains filtered or unexported fields }
Seq is a basic nucleic acid sequence.
func NewSeq ¶
Create a new Seq with the given id, letter sequence and alphabet.
Example ¶
d := NewSeq("example DNA", []alphabet.Letter("ACGCTGACTTGGTGCACGT"), alphabet.DNA) fmt.Println(d, d.Moltype())
Output: ACGCTGACTTGGTGCACGT DNA
func (*Seq) AppendLetters ¶
Append Letters to the sequence.
func (*Seq) AppendQLetters ¶
Append QLetters to the sequence, ignoring Q component.
func (*Seq) Compose ¶
func (self *Seq) Compose(f feat.FeatureSet) (err error)
Join segments of the sequence, returning any error.
Example ¶
s := NewSeq("example DNA", []alphabet.Letter("aAGTATAAgtcagtgcagtgtctggcag<TS>gtagtgaagtagggttagttta"), alphabet.DNA) f := feat.FeatureSet{ &feat.Feature{Start: 0, End: 32}, &feat.Feature{Start: 1, End: 8, Strand: -1}, &feat.Feature{Start: 28, End: s.Len() - 1}, } fmt.Println(s) if err := s.Compose(f); err == nil { fmt.Println(s) }
Output: aAGTATAAgtcagtgcagtgtctggcag<TS>gtagtgaagtagggttagttta aAGTATAAgtcagtgcagtgtctggcag<TS>TTATACT<TS>gtagtgaagtagggttagttt
func (*Seq) Description ¶
Description returns a pointer to the Desc string of the sequence.
func (*Seq) IsCircular ¶
Return whether the sequence is circular.
func (*Seq) Join ¶
Join p to the sequence at the end specified by where.
Example ¶
var s1, s2 *Seq s1 = NewSeq("a", []alphabet.Letter("agctgtgctga"), alphabet.DNA) s2 = NewSeq("b", []alphabet.Letter("CGTGCAGTCATGAGTGA"), alphabet.DNA) fmt.Println(s1, s2) if err := s1.Join(s2, seq.Start); err == nil { fmt.Println(s1) } s1 = NewSeq("a", []alphabet.Letter("agctgtgctga"), alphabet.DNA) s2 = NewSeq("b", []alphabet.Letter("CGTGCAGTCATGAGTGA"), alphabet.DNA) if err := s1.Join(s2, seq.End); err == nil { fmt.Println(s1) }
Output: agctgtgctga CGTGCAGTCATGAGTGA CGTGCAGTCATGAGTGAagctgtgctga agctgtgctgaCGTGCAGTCATGAGTGA
func (*Seq) Raw ¶
func (self *Seq) Raw() interface{}
Raw returns a pointer to the the underlying []alphabet.Letter slice.
func (*Seq) RevComp ¶
func (self *Seq) RevComp()
Reverse complement the sequence.
Example ¶
s := NewSeq("example DNA", []alphabet.Letter("ATGCtGACTTGGTGCACGT"), alphabet.DNA) fmt.Println(s) s.RevComp() fmt.Println(s)
Output: ATGCtGACTTGGTGCACGT ACGTGCACCAAGTCaGCAT
func (*Seq) Stitch ¶
func (self *Seq) Stitch(f feat.FeatureSet) (err error)
Join sequentially order disjunct segments of the sequence, returning any error.
Example ¶
s := NewSeq("example DNA", []alphabet.Letter("aAGTATAAgtcagtgcagtgtctggcagTGCTCGTGCgtagtgaagtagGGTTAGTTTa"), alphabet.DNA) f := feat.FeatureSet{ &feat.Feature{Start: 1, End: 8}, &feat.Feature{Start: 28, End: 37}, &feat.Feature{Start: 49, End: s.Len() - 1}, } fmt.Println(s) if err := s.Stitch(f); err == nil { fmt.Println(s) }
Output: aAGTATAAgtcagtgcagtgtctggcagTGCTCGTGCgtagtgaagtagGGTTAGTTTa AGTATAATGCTCGTGCGGTTAGTTT
func (*Seq) String ¶
Return a string representation of the sequence. Representation is determined by the Stringify field.
func (*Seq) Truncate ¶
Truncate the sequence from start to end, wrapping if the sequence is circular.
func (*Seq) Validate ¶
Validate the letters of the sequence according to the specified alphabet.
Example ¶
r := NewSeq("example RNA", []alphabet.Letter("ACGCTGACTTGGTGCACGT"), alphabet.RNA) fmt.Println(r, r.Moltype()) if ok, pos := r.Validate(); ok { fmt.Println("valid RNA") } else { fmt.Println(strings.Repeat(" ", pos-1), "^ first invalid RNA position") }
Output: ACGCTGACTTGGTGCACGT RNA ^ first invalid RNA position
Directories ¶
Path | Synopsis |
---|---|
Package alignment handles aligned sequences stored as columns.
|
Package alignment handles aligned sequences stored as columns. |
Package multi handles collections of sequences as alignments or sets.
|
Package multi handles collections of sequences as alignments or sets. |
Package packed provides support for manipulation of single nucleic acid sequences with and without quality data.
|
Package packed provides support for manipulation of single nucleic acid sequences with and without quality data. |