alignment

package
v0.0.0-...-8225a94 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 6, 2017 License: BSD-3-Clause Imports: 9 Imported by: 0

Documentation

Overview

Package alignment handles aligned sequences stored as columns.

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type QRow

type QRow struct {
	Align *QSeq
	Row   int
}

A Row is a pointer into an alignment that satisfies the seq.Sequence and seq.Scorer interfaces.

func (QRow) Alphabet

func (r QRow) Alphabet() alphabet.Alphabet

func (QRow) At

func (r QRow) At(i int) alphabet.QLetter

At returns the letter at position i.

func (QRow) Clone

func (r QRow) Clone() seq.Sequence

func (QRow) CloneAnnotation

func (r QRow) CloneAnnotation() *seq.Annotation

func (QRow) Conformation

func (r QRow) Conformation() feat.Conformation

func (QRow) Description

func (r QRow) Description() string

func (QRow) EAt

func (r QRow) EAt(i int) float64

EAt returns the probability of a sequence error at position i.

func (QRow) Encoding

func (r QRow) Encoding() alphabet.Encoding

func (QRow) End

func (r QRow) End() int

End returns the end position of the sequence in coordinates relative to the sequence location.

func (QRow) Format

func (r QRow) Format(fs fmt.State, c rune)

Format is a support routine for fmt.Formatter. It accepts the formats 'v' and 's' (string), 'a' (fasta) and 'q' (fastq). String, fasta and fastq formats support truncated output via the verb's precision. Fasta format supports sequence line specification via the verb's width field. Fastq format supports optional inclusion of the '+' line descriptor line with the '+' flag. The 'v' verb supports the '#' flag for Go syntax output. The 's' and 'v' formats support the '-' flag for omission of the sequence name.

func (QRow) Len

func (r QRow) Len() int

Len returns the length of the alignment.

func (QRow) Location

func (r QRow) Location() feat.Feature

Location returns the feature containing the row's sequence.

func (QRow) Name

func (r QRow) Name() string

func (QRow) New

func (r QRow) New() seq.Sequence

func (QRow) QEncode

func (r QRow) QEncode(i int) byte

QEncode encodes the quality at position i to a letter based on the sequence encoding setting.

func (QRow) RevComp

func (r QRow) RevComp()

func (QRow) Reverse

func (r QRow) Reverse()

func (QRow) Set

func (r QRow) Set(i int, l alphabet.QLetter) error

Set sets the letter at position i to l.

func (QRow) SetConformation

func (r QRow) SetConformation(c feat.Conformation) error

func (QRow) SetE

func (r QRow) SetE(i int, e float64) error

SetE sets the quality at position i to e to reflect the given p(Error).

func (QRow) SetEncoding

func (r QRow) SetEncoding(e alphabet.Encoding) error

func (QRow) SetOffset

func (r QRow) SetOffset(o int) error

func (QRow) SetSlice

func (r QRow) SetSlice(_ alphabet.Slice)

SetSlice unconditionally panics.

func (QRow) Slice

func (r QRow) Slice() alphabet.Slice

Slice unconditionally panics.

func (QRow) Start

func (r QRow) Start() int

Start returns the start position of the sequence in coordinates relative to the sequence location.

func (QRow) String

func (r QRow) String() string

String returns a string representation of the sequence data only.

type QSeq

type QSeq struct {
	seq.Annotation
	SubAnnotations []seq.Annotation
	Seq            alphabet.QColumns
	ColumnConsense seq.ConsenseFunc
	Threshold      alphabet.Qphred // Threshold for returning valid letter.
	QFilter        seq.QFilter     // How to represent below threshold letter.
	Encode         alphabet.Encoding
}

A QSeq is an aligned sequence with quality scores.

func NewQSeq

func NewQSeq(id string, subids []string, ql [][]alphabet.QLetter, alpha alphabet.Alphabet, enc alphabet.Encoding, cons seq.ConsenseFunc) (*QSeq, error)

NewSeq creates a new Seq with the given id, letter sequence and alphabet.

Example
qm, err := NewQSeq("example alignment",
	[]string{"seq 1", "seq 2", "seq 3"},
	[][]alphabet.QLetter{
		{{'A', 40}, {'A', 40}, {'A', 40}},
		{{'C', 40}, {'C', 40}, {'C', 40}},
		{{'G', 40}, {'G', 40}, {'G', 40}},
		{{'C', 40}, {'G', 40}, {'A', 40}},
		{{'T', 40}, {'T', 40}, {'T', 40}},
		{{'G', 40}, {'G', 40}, {'G', 40}},
		{{'A', 40}, {'A', 40}, {'A', 40}},
		{{'C', 40}, {'C', 40}, {'C', 40}},
		{{'T', 40}, {'C', 40}, {'G', 40}},
		{{'T', 40}, {'T', 40}, {'T', 40}},
		{{'G', 40}, {'G', 40}, {'G', 40}},
		{{'G', 40}, {'G', 40}, {'G', 40}},
		{{'T', 40}, {'C', 40}, {'C', 40}},
		{{'G', 40}, {'G', 40}, {'G', 40}},
		{{'C', 40}, {'C', 40}, {'C', 40}},
		{{'A', 40}, {'G', 40}, {'T', 40}},
		{{'C', 40}, {'C', 40}, {'C', 40}},
		{{'G', 40}, {'A', 40}, {'A', 40}},
		{{'T', 40}, {'T', 40}, {'T', 40}},
	},
	alphabet.DNA,
	alphabet.Sanger,
	seq.DefaultQConsensus)
if err == nil {
	fmt.Printf("%-s\n\n%-s\n", qm, qm.Consensus(false))
}
Output:

ACGCTGACTTGGTGCACGT
ACGGTGACCTGGCGCGCAT
ACGATGACGTGGCGCTCAT

acgntgacntggcgcncat

func (*QSeq) Add

func (s *QSeq) Add(n ...seq.Sequence) error

Add sequences n to Alignment. Sequences in n must align start and end with the receiving alignment. Additional sequence will be clipped.

Example
fmt.Printf("%v %-s\n", qm.Rows(), qm.Consensus(false))
qm.Add(linear.NewQSeq("example DNA",
	[]alphabet.QLetter{{'a', 40}, {'c', 39}, {'g', 40}, {'C', 38}, {'t', 35}, {'g', 20}},
	alphabet.DNA, alphabet.Sanger))
fmt.Printf("%v %-s\n", qm.Rows(), qm.Consensus(false))
Output:

3 acgntgacntggcgcncat
4 acgctgacntggcgcncat

func (*QSeq) AppendColumns

func (s *QSeq) AppendColumns(a ...[]alphabet.QLetter) error

AppendColumns appends each Qletter of each element of a to the appropriate sequence in the receiver.

func (*QSeq) AppendEach

func (s *QSeq) AppendEach(a [][]alphabet.QLetter) error

AppendEach appends each []alphabet.QLetter in a to the appropriate sequence in the receiver.

func (*QSeq) Clone

func (s *QSeq) Clone() seq.Rower

Clone returns a copy of the sequence.

func (*QSeq) Column

func (s *QSeq) Column(pos int, _ bool) []alphabet.Letter

Column returns a slice of letters reflecting the column at pos.

func (*QSeq) ColumnQL

func (s *QSeq) ColumnQL(pos int, _ bool) []alphabet.QLetter

ColumnQL returns a slice of quality letters reflecting the column at pos.

func (*QSeq) Consensus

func (s *QSeq) Consensus(_ bool) *linear.QSeq

Consensus returns a quality sequence reflecting the consensus of the receiver determined by the ColumnConsense field.

func (*QSeq) Delete

func (s *QSeq) Delete(i int)

Delete removes the sequence represented at row i of the alignment. It panics if i is out of range.

func (*QSeq) Encoding

func (s *QSeq) Encoding() alphabet.Encoding

Encoding returns the quality encoding scheme.

func (*QSeq) End

func (s *QSeq) End() int

End returns the end position of the sequence in coordinates relative to the sequence location.

func (*QSeq) Format

func (s *QSeq) Format(fs fmt.State, c rune)

Format is a support routine for fmt.Formatter. It accepts the formats 'v' and 's' (string), 'a' (fasta) and 'q' (fastq). String, fasta and fastq formats support truncated output via the verb's precision. Fasta format supports sequence line specification via the verb's width field. Fastq format supports optional inclusion of the '+' line descriptor line with the '+' flag. The 'v' verb supports the '#' flag for Go syntax output. The 's' and 'v' formats support the '-' flag for omission of the sequence name.

func (*QSeq) Len

func (s *QSeq) Len() int

Len returns the length of the alignment.

func (*QSeq) New

func (s *QSeq) New() *QSeq

New returns an empty *QSeq sequence with the same alphabet.

func (*QSeq) RevComp

func (s *QSeq) RevComp()

RevComp reverse complements the sequence. RevComp will panic if the alphabet used by the receiver is not a Complementor.

Example
fmt.Printf("%-s\n\n%-s\n\n", qm, qm.Consensus(false))
qm.RevComp()
fmt.Printf("%-s\n\n%-s\n", qm, qm.Consensus(false))
Output:

ACGCTGACTTGGTGCACGT
ACGGTGACCTGGCGCGCAT
ACGATGACGTGGCGCTCAT
acgCtg-------------

acgctgacntggcgcncat

ACGTGCACCAAGTCAGCGT
ATGCGCGCCAGGTCACCGT
ATGAGCGCCACGTCATCGT
-------------caGcgt

atgngcgccangtcagcgt

func (*QSeq) Reverse

func (s *QSeq) Reverse()

Reverse reverses the order of letters in the the sequence without complementing them.

func (*QSeq) Row

func (s *QSeq) Row(i int) seq.Sequence

Row returns the sequence represented at row i of the alignment. It panics is i is out of range.

func (*QSeq) Rows

func (s *QSeq) Rows() int

Rows returns the number of rows in the alignment.

func (*QSeq) SetEncoding

func (s *QSeq) SetEncoding(e alphabet.Encoding) error

SetEncoding sets the quality encoding scheme to e.

func (*QSeq) SetSlice

func (s *QSeq) SetSlice(sl alphabet.Slice)

SetSlice sets the sequence data represented by the Seq. SetSlice will panic if sl is not a QColumns.

func (*QSeq) Slice

func (s *QSeq) Slice() alphabet.Slice

Slice returns the sequence data as a alphabet.Slice.

func (*QSeq) Start

func (s *QSeq) Start() int

Start returns the start position of the sequence in coordinates relative to the sequence location.

func (*QSeq) String

func (s *QSeq) String() string

type Row

type Row struct {
	Align *Seq
	Row   int
}

A Row is a pointer into an alignment that satisfies the seq.Sequence interface.

func (Row) Alphabet

func (r Row) Alphabet() alphabet.Alphabet

func (Row) At

func (r Row) At(i int) alphabet.QLetter

At returns the letter at position i.

func (Row) Clone

func (r Row) Clone() seq.Sequence

func (Row) CloneAnnotation

func (r Row) CloneAnnotation() *seq.Annotation

func (Row) Conformation

func (r Row) Conformation() feat.Conformation

func (Row) Description

func (r Row) Description() string

func (Row) End

func (r Row) End() int

End returns the end position of the sequence in coordinates relative to the sequence location.

func (Row) Format

func (r Row) Format(fs fmt.State, c rune)

Format is a support routine for fmt.Formatter. It accepts the formats 'v' and 's' (string), 'a' (fasta) and 'q' (fastq). String, fasta and fastq formats support truncated output via the verb's precision. Fasta format supports sequence line specification via the verb's width field. Fastq format supports optional inclusion of the '+' line descriptor line with the '+' flag. The 'v' verb supports the '#' flag for Go syntax output. The 's' and 'v' formats support the '-' flag for omission of the sequence name.

func (Row) Len

func (r Row) Len() int

Len returns the length of the row.

func (Row) Location

func (r Row) Location() feat.Feature

Location returns the feature containing the row's sequence.

func (Row) Name

func (r Row) Name() string

func (Row) New

func (r Row) New() seq.Sequence

func (Row) RevComp

func (r Row) RevComp()

func (Row) Reverse

func (r Row) Reverse()

func (Row) Set

func (r Row) Set(i int, l alphabet.QLetter) error

Set sets the letter at position i to l.

func (Row) SetConformation

func (r Row) SetConformation(c feat.Conformation) error

func (Row) SetOffset

func (r Row) SetOffset(o int) error

func (Row) SetSlice

func (r Row) SetSlice(_ alphabet.Slice)

SetSlice unconditionally panics.

func (Row) Slice

func (r Row) Slice() alphabet.Slice

Slice unconditionally panics.

func (Row) Start

func (r Row) Start() int

Start returns the start position of the sequence in coordinates relative to the sequence location.

func (Row) String

func (r Row) String() string

String returns a string representation of the sequence data only.

type Seq

type Seq struct {
	seq.Annotation
	SubAnnotations []seq.Annotation
	Seq            alphabet.Columns
	ColumnConsense seq.ConsenseFunc
}

A Seq is an aligned sequence.

func NewSeq

func NewSeq(id string, subids []string, b [][]alphabet.Letter, alpha alphabet.Alphabet, cons seq.ConsenseFunc) (*Seq, error)

NewSeq creates a new Seq with the given id, letter sequence and alphabet.

Example
m, err := NewSeq("example alignment",
	[]string{"seq 1", "seq 2", "seq 3"},
	[][]alphabet.Letter{
		[]alphabet.Letter("AAA"),
		[]alphabet.Letter("CCC"),
		[]alphabet.Letter("GGG"),
		[]alphabet.Letter("CGA"),
		[]alphabet.Letter("TTT"),
		[]alphabet.Letter("GGG"),
		[]alphabet.Letter("AAA"),
		[]alphabet.Letter("CCC"),
		[]alphabet.Letter("TCG"),
		[]alphabet.Letter("TTT"),
		[]alphabet.Letter("GGG"),
		[]alphabet.Letter("GGG"),
		[]alphabet.Letter("TCC"),
		[]alphabet.Letter("GGG"),
		[]alphabet.Letter("CCC"),
		[]alphabet.Letter("AGT"),
		[]alphabet.Letter("CCC"),
		[]alphabet.Letter("GAA"),
		[]alphabet.Letter("TTT"),
	},
	alphabet.DNA,
	seq.DefaultConsensus)
if err == nil {
	fmt.Printf("%-s\n\n%-s\n", m, m.Consensus(false))
}
Output:

ACGCTGACTTGGTGCACGT
ACGGTGACCTGGCGCGCAT
ACGATGACGTGGCGCTCAT

acgntgacntggcgcncat

func (*Seq) Add

func (s *Seq) Add(n ...seq.Sequence) error

Add adds the sequences n to Seq. Sequences in n should align start and end with the receiving alignment. Additional sequence will be clipped and missing sequence will be filled with the gap letter.

Example
fmt.Printf("%v %-s\n", m.Rows(), m.Consensus(false))
m.Add(linear.NewQSeq("example DNA",
	[]alphabet.QLetter{{'a', 40}, {'c', 39}, {'g', 40}, {'C', 38}, {'t', 35}, {'g', 20}},
	alphabet.DNA, alphabet.Sanger))
fmt.Printf("%v %-s\n", m.Rows(), m.Consensus(false))
Output:

3 acgntgacntggcgcncat
4 acgctgacntggcgcncat

func (*Seq) AppendColumns

func (s *Seq) AppendColumns(a ...[]alphabet.QLetter) error

AppendColumns appends each Qletter of each element of a to the appropriate sequence in the receiver.

func (*Seq) AppendEach

func (s *Seq) AppendEach(a [][]alphabet.QLetter) error

AppendEach appends each []alphabet.QLetter in a to the appropriate sequence in the receiver.

func (*Seq) Clone

func (s *Seq) Clone() seq.Rower

Clone returns a copy of the sequence.

func (*Seq) Column

func (s *Seq) Column(pos int, _ bool) []alphabet.Letter

Column returns a slice of letters reflecting the column at pos.

func (*Seq) ColumnQL

func (s *Seq) ColumnQL(pos int, _ bool) []alphabet.QLetter

ColumnQL returns a slice of quality letters reflecting the column at pos.

func (*Seq) Consensus

func (s *Seq) Consensus(_ bool) *linear.QSeq

Consensus returns a quality sequence reflecting the consensus of the receiver determined by the ColumnConsense field.

func (*Seq) Delete

func (s *Seq) Delete(i int)

Delete removes the sequence represented at row i of the alignment. It panics if i is out of range.

func (*Seq) End

func (s *Seq) End() int

End returns the end position of the sequence in coordinates relative to the sequence location.

func (*Seq) Format

func (s *Seq) Format(fs fmt.State, c rune)

Format is a support routine for fmt.Formatter. It accepts the formats 'v' and 's' (string), 'a' (fasta) and 'q' (fastq). String, fasta and fastq formats support truncated output via the verb's precision. Fasta format supports sequence line specification via the verb's width field. Fastq format supports optional inclusion of the '+' line descriptor line with the '+' flag. The 'v' verb supports the '#' flag for Go syntax output. The 's' and 'v' formats support the '-' flag for omission of the sequence name.

func (*Seq) Len

func (s *Seq) Len() int

Len returns the length of the alignment.

func (*Seq) New

func (s *Seq) New() *Seq

New returns an empty *Seq sequence with the same alphabet.

func (*Seq) RevComp

func (s *Seq) RevComp()

RevComp reverse complements the sequence. RevComp will panic if the alphabet used by the receiver is not a Complementor.

Example
fmt.Printf("%-s\n\n%-s\n", m, m.Consensus(false))
fmt.Println()
m.RevComp()
fmt.Printf("%-s\n\n%-s\n", m, m.Consensus(false))
Output:

ACGCTGACTTGGTGCACGT
ACGGTGACCTGGCGCGCAT
ACGATGACGTGGCGCTCAT
acgCtg-------------

acgctgacntggcgcncat

ACGTGCACCAAGTCAGCGT
ATGCGCGCCAGGTCACCGT
ATGAGCGCCACGTCATCGT
-------------caGcgt

atgngcgccangtcagcgt

func (*Seq) Reverse

func (s *Seq) Reverse()

Reverse reverses the order of letters in the the sequence without complementing them.

func (*Seq) Row

func (s *Seq) Row(i int) seq.Sequence

Row returns the sequence represented at row i of the alignment. It panics is i is out of range.

func (*Seq) Rows

func (s *Seq) Rows() int

Rows returns the number of rows in the alignment.

func (*Seq) SetSlice

func (s *Seq) SetSlice(sl alphabet.Slice)

SetSlice sets the sequence data represented by the Seq. SetSlice will panic if sl is not a Columns.

func (*Seq) Slice

func (s *Seq) Slice() alphabet.Slice

Slice returns the sequence data as a alphabet.Slice.

func (*Seq) Start

func (s *Seq) Start() int

Start returns the start position of the sequence in coordinates relative to the sequence location.

func (*Seq) String

func (s *Seq) String() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL