nucleic

package
v0.0.0-...-25502c3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 9, 2012 License: GPL-3.0 Imports: 6 Imported by: 0

Documentation

Overview

Package nucleic provides support for manipulation of single nucleic acid sequences with and without quality data.

Two basic nucleic acid sequence types are provided, Seq and QSeq. Interfaces for more complex sequence types are also defined.

Index

Examples

Constants

This section is empty.

Variables

View Source
var Consensify = func(a Aligned, pos int, fill bool) alphabet.QLetter {
	alpha := a.Alphabet()
	w := make([]int, alpha.Len())
	c := a.Column(pos, fill)

	for _, l := range c {
		if alpha.IsValid(l) {
			w[alpha.IndexOf(l)]++
		}
	}

	var max, maxi int
	for i, v := range w {
		if v > max {
			max, maxi = v, i
		}
	}

	return alphabet.QLetter{
		L: alpha.Letter(maxi),
		Q: alphabet.Ephred(1 - (float64(max) / float64(len(c)))),
	}
}

The default Consensifyer function.

View Source
var DefaultQphred alphabet.Qphred = 40

The default value for Qphred scores from non-quality sequences.

View Source
var FloatTolerance float64 = 1e-10

Tolerance on float comparison for QConsensify

View Source
var LowQFilter = func(s seq.Sequence, _ alphabet.Letter) alphabet.Letter { return s.(*QSeq).alphabet.Ambiguous() }

The default LowQFilter function for QSeq.

View Source
var QConsensify = func(a Aligned, pos int, fill bool) alphabet.QLetter {
	alpha := a.Alphabet()

	w := make([]float64, alpha.Len())
	for i := range w {
		w[i] = 1
	}

	others := float64(alpha.Len() - 1)
	c := a.ColumnQL(pos, fill)
	for _, l := range c {
		if alpha.IsValid(l.L) {
			i, alt := alpha.IndexOf(l.L), l.Q.ProbE()
			p := (1 - alt)
			alt /= others
			for b := range w {
				if i == b {
					w[b] *= p
				} else {
					w[b] *= alt
				}
			}
		}
	}

	var (
		max         = 0.
		sum         float64
		best, count int
	)
	for _, p := range w {
		sum += p
	}
	for i, v := range w {
		if v /= sum; v > max {
			max, best = v, i
			count = 0
		}
		if v == max || math.Abs(max-v) < FloatTolerance {
			count++
		}
	}

	if count > 1 {
		return alphabet.QLetter{
			L: alpha.Ambiguous(),
			Q: 0,
		}
	}

	return alphabet.QLetter{
		L: alpha.Letter(best),
		Q: alphabet.Ephred(1 - max),
	}
}

A default Consensifyer function that takes letter quality into account. http://staden.sourceforge.net/manual/gap4_unix_120.html

View Source
var QStringify = func(s seq.Polymer) string {
	t := s.(*QSeq)
	gap := t.Alphabet().Gap()
	cs := make([]alphabet.Letter, 0, len(t.S))
	for _, ql := range t.S {
		if alphabet.Qphred(ql.Q) > t.Threshold || ql.L == gap {
			cs = append(cs, ql.L)
		} else {
			cs = append(cs, t.LowQFilter(t, ql.L))
		}
	}

	return alphabet.Letters(cs).String()
}

The default Stringify function for QSeq.

View Source
var Stringify = func(s seq.Polymer) string { return alphabet.Letters(s.(*Seq).S).String() }

The default Stringify function for Seq.

Functions

This section is empty.

Types

type Aligned

type Aligned interface {
	Sequence
	Column(pos int, fill bool) []alphabet.Letter
	ColumnQL(pos int, fill bool) []alphabet.QLetter
	Consensus(fill bool) *QSeq
}

Aligned describes the interface for aligned multiple sequences.

type AlignedAppender

type AlignedAppender interface {
	Aligned
	AppendColumns(a ...[]alphabet.QLetter) (err error)
	AppendEach(a [][]alphabet.QLetter) (err error)
}

An AlignedAppenderis a multiple sequence alignment that can append letters.

type Consensifyer

type Consensifyer func(a Aligned, pos int, fill bool) alphabet.QLetter

Consensifyer is a function type that returns the consensus letter for a column of an alignment.

type Extracter

type Extracter interface {
	Sequence
	Extract(i int) Sequence
}

Extracter describes the interface for column based aligned multiple sequences.

type Getter

type Getter interface {
	Sequence
	Get(i int) Sequence
}

Getter describes the interface for sets of sequences or aligned multiple sequences.

type GetterAppender

type GetterAppender interface {
	Getter
	AppendEach(a [][]alphabet.QLetter) (err error)
}

GetterAppender is a type for sets of sequences or aligned multiple sequences that can append letters to individual or grouped seqeunces.

type QSeq

type QSeq struct {
	ID         string
	Desc       string
	Loc        string
	S          []alphabet.QLetter
	Strand     Strand
	Threshold  alphabet.Qphred // Threshold for returning valid letter.
	LowQFilter seq.Filter      // How to represent below threshold letter.
	Stringify  seq.Stringify   // Function allowing user specified string representation.
	Meta       interface{}     // No operation implicitly copies or changes the contents of Meta.
	// contains filtered or unexported fields
}

QSeq is a basic nucleic acid sequence with Phred quality scores.

func NewQSeq

func NewQSeq(id string, ql []alphabet.QLetter, alpha alphabet.Nucleic, encode alphabet.Encoding) *QSeq

Create a new QSeq with the given id, letter sequence, alphabet and quality encoding.

Example
d := NewQSeq("example DNA", []alphabet.QLetter{{'A', 40}, {'C', 39}, {'G', 40}, {'C', 38}, {'T', 35}, {'G', 20}}, alphabet.DNA, alphabet.Sanger)
fmt.Println(d, d.Moltype())
Output:

ACGCTG DNA

func (*QSeq) Alphabet

func (self *QSeq) Alphabet() alphabet.Alphabet

Return the Alphabet used by the sequence.

func (*QSeq) AppendLetters

func (self *QSeq) AppendLetters(a ...alphabet.Letter) (err error)

Append QLetters to the sequence, the DefaultQphred value is used for quality scores.

func (*QSeq) AppendQLetters

func (self *QSeq) AppendQLetters(a ...alphabet.QLetter) (err error)

Append letters with quality scores to the seq.

func (*QSeq) At

func (self *QSeq) At(pos seq.Position) alphabet.QLetter

Return the letter at position pos.

func (*QSeq) Circular

func (self *QSeq) Circular(c bool)

Specify that the sequence is circular.

func (*QSeq) Compose

func (self *QSeq) Compose(f feat.FeatureSet) (err error)

Join segments of the sequence, returning any error.

func (*QSeq) Copy

func (self *QSeq) Copy() seq.Sequence

Return a copy of the sequence.

func (*QSeq) Count

func (self *QSeq) Count() int

Satisfy Counter.

func (*QSeq) Description

func (self *QSeq) Description() *string

Description returns a pointer to the Desc string of the sequence.

func (*QSeq) EAt

func (self *QSeq) EAt(pos seq.Position) float64

Return the probability of a sequence error at position pos.

func (*QSeq) Encoding

func (self *QSeq) Encoding() alphabet.Encoding

Return the quality encoding type.

func (*QSeq) End

func (self *QSeq) End() int

Return the end position of the sequence in global coordinates.

func (*QSeq) IsCircular

func (self *QSeq) IsCircular() bool

Return whether the sequence is circular.

func (*QSeq) Join

func (self *QSeq) Join(p *QSeq, where int) (err error)

Join p to the sequence at the end specified by where.

func (*QSeq) Len

func (self *QSeq) Len() int

Return the length of the sequence.

func (*QSeq) Location

func (self *QSeq) Location() *string

Location returns a pointer to the Loc string of the sequence.

func (*QSeq) Moltype

func (self *QSeq) Moltype() bio.Moltype

Return the molecule type of the sequence.

func (*QSeq) Name

func (self *QSeq) Name() *string

Name returns a pointer to the ID string of the sequence.

func (*QSeq) Nucleic

func (self *QSeq) Nucleic()

Required to satisfy nucleic.Sequence interface.

func (*QSeq) Offset

func (self *QSeq) Offset(o int)

Set the global offset of the sequence to o.

func (*QSeq) QDecode

func (self *QSeq) QDecode(l byte) alphabet.Qphred

Decode a quality letter to a phred score based on the sequence encoding setting.

func (*QSeq) QEncode

func (self *QSeq) QEncode(pos seq.Position) byte

Encode the quality at position pos to a letter based on the sequence encoding setting.

func (*QSeq) Raw

func (self *QSeq) Raw() interface{}

Raw returns a pointer to the underlying []Qphred slice.

func (*QSeq) RevComp

func (self *QSeq) RevComp()

Reverse complement the sequence.

func (*QSeq) Reverse

func (self *QSeq) Reverse()

Reverse the sequence.

func (*QSeq) Set

func (self *QSeq) Set(pos seq.Position, l alphabet.QLetter)

Set the letter at position pos to l.

func (*QSeq) SetE

func (self *QSeq) SetE(pos seq.Position, e float64)

Set the quality at position pos to e to reflect the given p(Error).

func (*QSeq) SetEncoding

func (self *QSeq) SetEncoding(e alphabet.Encoding)

Set the quality encoding type to e.

func (*QSeq) Start

func (self *QSeq) Start() int

Return the start position of the sequence in global coordinates.

func (*QSeq) Stitch

func (self *QSeq) Stitch(f feat.FeatureSet) (err error)

Join sequentially order disjunct segments of the sequence, returning any error.

func (*QSeq) String

func (self *QSeq) String() string

Return a string representation of the sequence. Representation is determined by the Stringify field.

func (*QSeq) Subseq

func (self *QSeq) Subseq(start int, end int) (sub seq.Sequence, err error)

Return a subsequence from start to end, wrapping if the sequence is circular.

func (*QSeq) Truncate

func (self *QSeq) Truncate(start int, end int) (err error)

Truncate the sequenc from start to end, wrapping if the sequence is circular.

func (*QSeq) Validate

func (self *QSeq) Validate() (bool, int)

Validate the letters of the sequence according to the specified alphabet.

Example
r := NewQSeq("example RNA", []alphabet.QLetter{{'A', 40}, {'C', 39}, {'G', 40}, {'C', 38}, {'T', 35}, {'G', 20}}, alphabet.RNA, alphabet.Sanger)
fmt.Println(r, r.Moltype())
if ok, pos := r.Validate(); ok {
	fmt.Println("valid RNA")
} else {
	fmt.Println(strings.Repeat(" ", pos-1), "^ first invalid RNA position")
}
Output:

ACGCTG RNA
    ^ first invalid RNA position

type Quality

type Quality interface {
	seq.Polymer
	seq.Sequence
	seq.Scorer
	seq.Complementer
	Nucleic()
}

Quality describes the interface for nucleic acid sequences with quality scores.

type Seq

type Seq struct {
	ID        string
	Desc      string
	Loc       string
	S         []alphabet.Letter
	Strand    Strand
	Stringify seq.Stringify // Function allowing user specified string representation.
	Meta      interface{}   // No operation implicitly copies or changes the contents of Meta.
	// contains filtered or unexported fields
}

Seq is a basic nucleic acid sequence.

func NewSeq

func NewSeq(id string, b []alphabet.Letter, alpha alphabet.Nucleic) *Seq

Create a new Seq with the given id, letter sequence and alphabet.

Example
d := NewSeq("example DNA", []alphabet.Letter("ACGCTGACTTGGTGCACGT"), alphabet.DNA)
fmt.Println(d, d.Moltype())
Output:

ACGCTGACTTGGTGCACGT DNA

func (*Seq) Alphabet

func (self *Seq) Alphabet() alphabet.Alphabet

Return the Alphabet used by the sequence.

func (*Seq) AppendLetters

func (self *Seq) AppendLetters(a ...alphabet.Letter) (err error)

Append Letters to the sequence.

func (*Seq) AppendQLetters

func (self *Seq) AppendQLetters(a ...alphabet.QLetter) (err error)

Append QLetters to the sequence, ignoring Q component.

func (*Seq) At

func (self *Seq) At(pos seq.Position) alphabet.QLetter

Return the letter at position pos.

func (*Seq) Circular

func (self *Seq) Circular(c bool)

Specify that the sequence is circular.

func (*Seq) Compose

func (self *Seq) Compose(f feat.FeatureSet) (err error)

Join segments of the sequence, returning any error.

Example
s := NewSeq("example DNA", []alphabet.Letter("aAGTATAAgtcagtgcagtgtctggcag<TS>gtagtgaagtagggttagttta"), alphabet.DNA)
f := feat.FeatureSet{
	&feat.Feature{Start: 0, End: 32},
	&feat.Feature{Start: 1, End: 8, Strand: -1},
	&feat.Feature{Start: 28, End: s.Len() - 1},
}
fmt.Println(s)
if err := s.Compose(f); err == nil {
	fmt.Println(s)
}
Output:

aAGTATAAgtcagtgcagtgtctggcag<TS>gtagtgaagtagggttagttta
aAGTATAAgtcagtgcagtgtctggcag<TS>TTATACT<TS>gtagtgaagtagggttagttt

func (*Seq) Copy

func (self *Seq) Copy() seq.Sequence

Return a copy of the sequence.

func (*Seq) Count

func (self *Seq) Count() int

Satisfy Counter.

func (*Seq) Description

func (self *Seq) Description() *string

Description returns a pointer to the Desc string of the sequence.

func (*Seq) End

func (self *Seq) End() int

Return the end position of the sequence in global coordinates.

func (*Seq) IsCircular

func (self *Seq) IsCircular() bool

Return whether the sequence is circular.

func (*Seq) Join

func (self *Seq) Join(p *Seq, where int) (err error)

Join p to the sequence at the end specified by where.

Example
var s1, s2 *Seq

s1 = NewSeq("a", []alphabet.Letter("agctgtgctga"), alphabet.DNA)
s2 = NewSeq("b", []alphabet.Letter("CGTGCAGTCATGAGTGA"), alphabet.DNA)
fmt.Println(s1, s2)
if err := s1.Join(s2, seq.Start); err == nil {
	fmt.Println(s1)
}

s1 = NewSeq("a", []alphabet.Letter("agctgtgctga"), alphabet.DNA)
s2 = NewSeq("b", []alphabet.Letter("CGTGCAGTCATGAGTGA"), alphabet.DNA)
if err := s1.Join(s2, seq.End); err == nil {
	fmt.Println(s1)
}
Output:

agctgtgctga CGTGCAGTCATGAGTGA
CGTGCAGTCATGAGTGAagctgtgctga
agctgtgctgaCGTGCAGTCATGAGTGA

func (*Seq) Len

func (self *Seq) Len() int

Return the length of the sequence.

func (*Seq) Location

func (self *Seq) Location() *string

Location returns a pointer to the Loc string of the sequence.

func (*Seq) Moltype

func (self *Seq) Moltype() bio.Moltype

Return the molecule type of the sequence.

func (*Seq) Name

func (self *Seq) Name() *string

Name returns a pointer to the ID string of the sequence.

func (*Seq) Nucleic

func (self *Seq) Nucleic()

Required to satisfy nucleic.Sequence interface.

func (*Seq) Offset

func (self *Seq) Offset(o int)

Set the global offset of the sequence to o.

func (*Seq) Raw

func (self *Seq) Raw() interface{}

Raw returns a pointer to the the underlying []alphabet.Letter slice.

func (*Seq) RevComp

func (self *Seq) RevComp()

Reverse complement the sequence.

Example
s := NewSeq("example DNA", []alphabet.Letter("ATGCtGACTTGGTGCACGT"), alphabet.DNA)
fmt.Println(s)
s.RevComp()
fmt.Println(s)
Output:

ATGCtGACTTGGTGCACGT
ACGTGCACCAAGTCaGCAT

func (*Seq) Reverse

func (self *Seq) Reverse()

Reverse the sequence.

func (*Seq) Set

func (self *Seq) Set(pos seq.Position, l alphabet.QLetter)

Set the letter at position pos to l.

func (*Seq) Start

func (self *Seq) Start() int

Return the start position of the sequence in global coordinates.

func (*Seq) Stitch

func (self *Seq) Stitch(f feat.FeatureSet) (err error)

Join sequentially order disjunct segments of the sequence, returning any error.

Example
s := NewSeq("example DNA", []alphabet.Letter("aAGTATAAgtcagtgcagtgtctggcagTGCTCGTGCgtagtgaagtagGGTTAGTTTa"), alphabet.DNA)
f := feat.FeatureSet{
	&feat.Feature{Start: 1, End: 8},
	&feat.Feature{Start: 28, End: 37},
	&feat.Feature{Start: 49, End: s.Len() - 1},
}
fmt.Println(s)
if err := s.Stitch(f); err == nil {
	fmt.Println(s)
}
Output:

aAGTATAAgtcagtgcagtgtctggcagTGCTCGTGCgtagtgaagtagGGTTAGTTTa
AGTATAATGCTCGTGCGGTTAGTTT

func (*Seq) String

func (self *Seq) String() string

Return a string representation of the sequence. Representation is determined by the Stringify field.

func (*Seq) Subseq

func (self *Seq) Subseq(start int, end int) (sub seq.Sequence, err error)

Return a subsequence from start to end, wrapping if the sequence is circular.

func (*Seq) Truncate

func (self *Seq) Truncate(start int, end int) (err error)

Truncate the sequence from start to end, wrapping if the sequence is circular.

func (*Seq) Validate

func (self *Seq) Validate() (bool, int)

Validate the letters of the sequence according to the specified alphabet.

Example
r := NewSeq("example RNA", []alphabet.Letter("ACGCTGACTTGGTGCACGT"), alphabet.RNA)
fmt.Println(r, r.Moltype())
if ok, pos := r.Validate(); ok {
	fmt.Println("valid RNA")
} else {
	fmt.Println(strings.Repeat(" ", pos-1), "^ first invalid RNA position")
}
Output:

ACGCTGACTTGGTGCACGT RNA
    ^ first invalid RNA position

type Sequence

type Sequence interface {
	seq.Polymer
	seq.Sequence
	seq.Complementer
	Nucleic() // No op function to tag nucleic type sequence data.
}

Sequence describes the interface for nucleic acid sequences.

type Strand

type Strand int8

Strand stores nucleic acid sequence strand information.

const (
	Minus Strand = iota - 1
	None
	Plus
)

func (Strand) String

func (self Strand) String() string

Directories

Path Synopsis
Package alignment handles aligned sequences stored as columns.
Package alignment handles aligned sequences stored as columns.
Package multi handles collections of sequences as alignments or sets.
Package multi handles collections of sequences as alignments or sets.
Package packed provides support for manipulation of single nucleic acid sequences with and without quality data.
Package packed provides support for manipulation of single nucleic acid sequences with and without quality data.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL