align

package
v0.3.0-alpha4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 13, 2018 License: GPL-2.0 Imports: 12 Imported by: 8

Documentation

Index

Constants

View Source
const (
	ALIGN_UP = iota
	ALIGN_LEFT
	ALIGN_DIAG
	ALIGN_STOP

	ALIGN_ALGO_SW = iota
	ALIGN_ALGO_ATG
)
View Source
const (
	AMINOACIDS = 0 // Amino acid sequence alphabet
	NUCLEOTIDS = 1 // Nucleotid sequence alphabet
	BOTH       = 2 // Could be both
	UNKNOWN    = 3 // Unkown alphabet

	GAP   = '-'
	POINT = '.'
	OTHER = '*'

	ALL_AMINO = 'X'
	ALL_NUCLE = 'N'

	PSSM_NORM_NONE = 0 // No normalization
	PSSM_NORM_FREQ = 1 // Normalization by freq in the site
	PSSM_NORM_DATA = 2 // Normalization by aa/nt frequency in data
	PSSM_NORM_UNIF = 3 // Normalization by uniform frequency

	FORMAT_FASTA   = 0
	FORMAT_PHYLIP  = 1
	FORMAT_NEXUS   = 2
	FORMAT_CLUSTAL = 3

	POSITION_IDENTICAL      = 0 // All characters in a position are the same
	POSITION_CONSERVED      = 1 // Same strong group
	POSITION_SEMI_CONSERVED = 2 // Same weak group
	POSITION_NOT_CONSERVED  = 3 // None of the above values
)

Variables

This section is empty.

Functions

func AlphabetFromString added in v0.2.3

func AlphabetFromString(alphabet string) int

func Complement added in v0.3.0

func Complement(seq []rune) (err error)

Complement sequence

func DetectAlphabet

func DetectAlphabet(seq string) int

func NewAlign

func NewAlign(alphabet int) *align

func NewPwAligner added in v0.3.0

func NewPwAligner(seq1, seq2 Sequence, algo int) *pwaligner

func NewSeqBag added in v0.3.0

func NewSeqBag(alphabet int) *seqbag

func NewSequence

func NewSequence(name string, sequence []rune, comment string) *seq

func RandomSequence added in v0.1.3

func RandomSequence(alphabet, length int) ([]rune, error)

func Reverse added in v0.3.0

func Reverse(seq []rune)

Reverses a sequence

Types

type AlignChannel added in v0.2.4

type AlignChannel struct {
	Achan chan Alignment
	Err   error
}

type Alignment

type Alignment interface {
	SeqBag
	AddGaps(rate, lenprop float64)
	AvgAllelesPerSite() float64
	BuildBootstrap() Alignment
	CharStatsSite(site int) (map[rune]int, error)
	Clone() (Alignment, error)
	CodonAlign(ntseqs SeqBag) (codonAl *align, err error)
	Concat(Alignment) error                             // concatenates the given alignment with this alignment
	Entropy(site int, removegaps bool) (float64, error) // Entropy of the given site
	Frameshifts() []struct{ Start, End int }            // Positions of potential framshifts
	Stops() []int                                       // Positions of potential stop in frame
	Length() int                                        // Length of the alignment
	Mask(start, length int) error                       // Masks given positions
	MaxCharStats() ([]rune, []int)
	Mutate(rate float64)                                                                        // Adds uniform substitutions in the alignment (~sequencing errors)
	NbVariableSites() int                                                                       // Nb of variable sites
	Pssm(log bool, pseudocount float64, normalization int) (pssm map[rune][]float64, err error) // Normalization: PSSM_NORM_NONE, PSSM_NORM_UNIF, PSSM_NORM_DATA
	Rarefy(nb int, counts map[string]int) (Alignment, error)                                    // Take a new rarefied sample taking into accounts weights
	RandSubAlign(length int) (Alignment, error)                                                 // Extract a random subalignment with given length from this alignment
	Recombine(rate float64, lenprop float64)
	RemoveGapSeqs(cutoff float64)     // Removes sequences having >= cutoff gaps
	RemoveGapSites(cutoff float64)    // Removes sites having >= cutoff gaps
	Sample(nb int) (Alignment, error) // generate a sub sample of the sequences
	ShuffleSites(rate float64, roguerate float64, randroguefirst bool) []string
	SimulateRogue(prop float64, proplen float64) ([]string, []string) // add "rogue" sequences
	SiteConservation(position int) (int, error)                       // If the site is conserved:
	SubAlign(start, length int) (Alignment, error)                    // Extract a subalignment from this alignment
	Swap(rate float64)
	TrimSequences(trimsize int, fromStart bool) error
}

func RandomAlignment added in v0.1.3

func RandomAlignment(alphabet, length, nbseq int) (Alignment, error)

type PairwiseAligner added in v0.3.0

type PairwiseAligner interface {
	AlignEnds() (int, int)
	AlignStarts() (int, int)
	SetGapOpenScore(open float64)
	SetGapExtendScore(extend float64)
	SetScore(match, mismatch float64)
	MaxScore() float64 // Maximum score of the alignment
	NbMatches() int    // Number of matches
	NbMisMatches() int // Number of mismatches
	NbGaps() int       // Nuber of gaps
	Length() int       // Length of the alignment
	Alignment() (Alignment, error)
	AlignmentStr() string
}

type PhasedSequence added in v0.3.0

type PhasedSequence struct {
	Err      error
	Removed  bool
	Position int
	// phased nt sequence
	NtSeq Sequence
	// phased aa sequence
	AaSeq Sequence
	// Aligned sequences
	// 1st: best found orf
	// 2nd: sequence
	Ali Alignment
}

type Phaser added in v0.3.0

type Phaser interface {
	Phase(orfs, seqs SeqBag) (chan PhasedSequence, error)
	SetLenCutoff(cutoff float64)
	SetMatchCutoff(cutoff float64)
	SetReverse(reverse bool)
	SetCutEnd(cutend bool)
	SetCpus(cpus int)
	SetTranslate(translate bool)
}

* If SetTranslate(true):

align all sequences to the given ORF and trims sequences to the start position If orf is nil, searches for the longest ORF (in 3 or 6 phases depending on reverse arg) in all sequences

To do so, Phase() will:

  1. Translate the given ORF in aminoacids;
  2. For each sequence of the dataset: translate it in the 3 phases (forward) if reverse is false or 6 phases (forward and reverse) if reverse is true, align it with the translated orf, and take the phase giving the best alignment; If no phase gives a good alignment (>lencutoff * orf length, >matchcutoff matches over the align length and starting at first position of the ORF), then the sequence is discarded;
  3. For each sequence, take the Start corresponding to the Start of the ORF, and remove nucleotides before;
  4. Return the trimmed nucleotidic sequences (phased), the corresponding amino-acid sequences (phasedaa) the positions of starts in the nucleotidic sequences, and the removed sequence names.

If cutend is true, then also remove the end of sequences that do not align with orf

It does not modify the input object

* If SetTranslate(false):

align all sequences to the given ORF and trims sequences to the start position, it does not take into account protein information

If orf is nil, searches for the longest ORF (in forward only or both strands depending on reverse arg) in all sequences

To do so:

1. If alignment is bad (>lencutoff * orf length, >matchcutoff matches over the align length and starting at first position of the ORF), then the sequence is discarded; 3. For each sequence, take the Start corresponding to the Start of the ORF, and remove nucleotides before; 4. Return the trimmed nucleotidic sequences (phased), the positions of starts in the nucleotidic sequences, and the removed sequence names. If cutend is true, then also remove the end of sequences that do not align with orf It does not modify the input object

func NewPhaser added in v0.3.0

func NewPhaser() Phaser

type SeqBag added in v0.3.0

type SeqBag interface {
	AddSequence(name string, sequence string, comment string) error
	AddSequenceChar(name string, sequence []rune, comment string) error
	AppendSeqIdentifier(identifier string, right bool)
	Alphabet() int
	AlphabetStr() string
	AlphabetCharacters() []rune
	AutoAlphabet() // detects and sets alphabet automatically for all the sequences
	CharStats() map[rune]int64
	CleanNames()                            // Clean sequence names (newick special char)
	Clear()                                 // Removes all sequences
	CloneSeqBag() (seqs SeqBag, err error)  // Clones the seqqbag
	Deduplicate() error                     // Remove duplicate sequences
	GetSequence(name string) (string, bool) // Get a sequence by names
	GetSequenceById(ith int) (string, bool)
	GetSequenceChar(name string) ([]rune, bool)
	GetSequenceCharById(ith int) ([]rune, bool)
	GetSequenceNameById(ith int) (string, bool)
	SetSequenceChar(ithAlign, ithSite int, char rune) error
	Sequence(ith int) (Sequence, bool)
	SequenceByName(name string) (Sequence, bool)
	Identical(SeqBag) bool
	Iterate(it func(name string, sequence string))
	IterateChar(it func(name string, sequence []rune))
	IterateAll(it func(name string, sequence []rune, comment string))
	Sequences() []Sequence
	SequencesChan() chan Sequence
	LongestORF(reverse bool) (orf Sequence, err error)
	MaxNameLength() int // maximum sequence name length
	NbSequences() int
	Rename(namemap map[string]string)
	RenameRegexp(regex, replace string, namemap map[string]string) error
	ShuffleSequences()               // Shuffle sequence order
	String() string                  // Raw string representation (just write all sequences)
	Translate(phase int) (err error) // Translates nt sequence in aa
	TrimNames(namemap map[string]string, size int) error
	TrimNamesAuto(namemap map[string]string, curid *int) error
	Sort() // Sorts the sequences by name
	Unalign() SeqBag
}

type Sequence

type Sequence interface {
	Sequence() string
	SequenceChar() []rune
	CharAt(int) rune
	Name() string
	SetName(name string)
	Comment() string
	Length() int
	LongestORF() (start, end int) // Detects the longest ORF in forward strand only
	Reverse()
	Complement() error                     // Returns an error if not nucleotide sequence
	Translate(phase int) (Sequence, error) // Translates the sequence using the standard code
	DetectAlphabet() int                   // Try to detect alphabet (nt or aa)
	Clone() Sequence
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL