align

package

v0.3.1-alpha2 Latest Latest Go to latest Published: May 6, 2019 License: GPL-2.0 Imports: 13 Imported by: 8

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/evolbioinfo/goalign

Documentation ¶

Index ¶

Constants
func AA2Index(aa rune) (idx int, err error)
func AlphabetFromString(alphabet string) int
func Complement(seq []rune) (err error)
func DetectAlphabet(seq string) int
func Index2AA(index int) (aa rune, err error)
func Index2Nt(index int) (nt rune, err error)
func NewAlign(alphabet int) *align
func NewPwAligner(seq1, seq2 Sequence, algo int) *pwaligner
func NewSeqBag(alphabet int) *seqbag
func NewSequence(name string, sequence []rune, comment string) *seq
func Nt2Index(nt rune) (idx int, err error)
func RandomSequence(alphabet, length int) ([]rune, error)
func Reverse(seq []rune)
type AlignChannel
type Alignment
- func RandomAlignment(alphabet, length, nbseq int) (Alignment, error)
type PairwiseAligner
type PhasedSequence
type Phaser
- func NewPhaser() Phaser
type SeqBag
type Sequence

Constants ¶

View Source

const (
	ALIGN_UP = iota
	ALIGN_LEFT
	ALIGN_DIAG
	ALIGN_STOP

	ALIGN_ALGO_SW = iota
	ALIGN_ALGO_ATG
)

View Source

const (
	AMINOACIDS = 0 // Amino acid sequence alphabet
	NUCLEOTIDS = 1 // Nucleotid sequence alphabet
	BOTH       = 2 // Could be both
	UNKNOWN    = 3 // Unkown alphabet

	GAP   = '-'
	POINT = '.'
	OTHER = '*'

	ALL_AMINO = 'X'
	ALL_NUCLE = 'N'

	PSSM_NORM_NONE = 0 // No normalization
	PSSM_NORM_FREQ = 1 // Normalization by freq in the site
	PSSM_NORM_DATA = 2 // Normalization by aa/nt frequency in data
	PSSM_NORM_UNIF = 3 // Normalization by uniform frequency
	PSSM_NORM_LOGO = 4 // Normalization like LOGO : v(site)=freq*(log2(alphabet)-H(site)-pseudocount

	FORMAT_FASTA   = 0
	FORMAT_PHYLIP  = 1
	FORMAT_NEXUS   = 2
	FORMAT_CLUSTAL = 3

	POSITION_IDENTICAL      = 0 // All characters in a position are the same
	POSITION_CONSERVED      = 1 // Same strong group
	POSITION_SEMI_CONSERVED = 2 // Same weak group
	POSITION_NOT_CONSERVED  = 3 // None of the above values

	GENETIC_CODE_STANDARD         = 0 // Standard genetic code
	GENETIC_CODE_VETEBRATE_MITO   = 1 // Vertebrate mitochondrial genetic code
	GENETIC_CODE_INVETEBRATE_MITO = 2 // Invertebrate mitochondrial genetic code
)

Variables ¶

This section is empty.

Functions ¶

func AA2Index ¶ added in v0.3.1

func AA2Index(aa rune) (idx int, err error)

func AlphabetFromString ¶ added in v0.2.3

func AlphabetFromString(alphabet string) int

func Complement ¶ added in v0.3.0

func Complement(seq []rune) (err error)

Complement sequence

func DetectAlphabet ¶

func DetectAlphabet(seq string) int

func Index2AA ¶ added in v0.3.1

func Index2AA(index int) (aa rune, err error)

func Index2Nt ¶ added in v0.3.1

func Index2Nt(index int) (nt rune, err error)

Returns the index of each nts 0=A 1=C 2=G 3=T

func NewAlign ¶

func NewAlign(alphabet int) *align

func NewPwAligner ¶ added in v0.3.0

func NewPwAligner(seq1, seq2 Sequence, algo int) *pwaligner

func NewSeqBag ¶ added in v0.3.0

func NewSeqBag(alphabet int) *seqbag

func NewSequence ¶

func NewSequence(name string, sequence []rune, comment string) *seq

func Nt2Index ¶ added in v0.3.1

func Nt2Index(nt rune) (idx int, err error)

Returns the index of each nts A=0 C=1 G=2 T=3

func RandomSequence ¶ added in v0.1.3

func RandomSequence(alphabet, length int) ([]rune, error)

func Reverse ¶ added in v0.3.0

func Reverse(seq []rune)

Reverses a sequence

Types ¶

type AlignChannel ¶ added in v0.2.4

type AlignChannel struct {
	Achan chan Alignment
	Err   error
}

type Alignment ¶

type Alignment interface {
	SeqBag
	AddGaps(rate, lenprop float64)
	AvgAllelesPerSite() float64
	BuildBootstrap() Alignment
	CharStatsSite(site int) (map[rune]int, error)
	Clone() (Alignment, error)
	CodonAlign(ntseqs SeqBag) (codonAl *align, err error)
	// Remove identical patterns/sites and return number of occurence
	// of each pattern (order of patterns/sites may have changed)
	Compress() []int
	// concatenates the given alignment with this alignment
	Concat(Alignment) error
	// Compares all sequences to the first one and counts all differences per sequence
	//
	// - alldiffs: The set of all differences that have been seen at least once
	// - diffs   : The number of occurences of each difference, for each sequence
	//             Sequences are ordered as the original alignment. Differences are
	//             written as REFNEW, ex: diffs["AC"]=12 .
	CountDifferences() (alldiffs []string, diffs []map[string]int)
	// Compares all sequences to the first one and replace identical characters with .
	DiffWithFirst()
	Entropy(site int, removegaps bool) (float64, error) // Entropy of the given site
	// Positions of potential frameshifts
	// if startinggapsasincomplete is true, then considers gaps as the beginning
	// as incomplete sequence, then take the right phase
	Frameshifts(startingGapsAsIncomplete bool) []struct{ Start, End int }
	// Positions of potential stop in frame
	// if startinggapsasincomplete is true, then considers gaps as the beginning
	// as incomplete sequence, then take the right phase
	Stops(startingGapsAsIncomplete bool, geneticode int) (stops []int, err error)
	Length() int                  // Length of the alignment
	Mask(start, length int) error // Masks given positions
	MaxCharStats() ([]rune, []int)
	Mutate(rate float64)                                                                        // Adds uniform substitutions in the alignment (~sequencing errors)
	NbVariableSites() int                                                                       // Nb of variable sites
	Pssm(log bool, pseudocount float64, normalization int) (pssm map[rune][]float64, err error) // Normalization: PSSM_NORM_NONE, PSSM_NORM_UNIF, PSSM_NORM_DATA
	Rarefy(nb int, counts map[string]int) (Alignment, error)                                    // Take a new rarefied sample taking into accounts weights
	RandSubAlign(length int) (Alignment, error)                                                 // Extract a random subalignment with given length from this alignment
	Recombine(rate float64, lenprop float64)
	RemoveGapSeqs(cutoff float64)             // Removes sequences having >= cutoff gaps
	RemoveGapSites(cutoff float64, ends bool) // Removes sites having >= cutoff gaps
	// Replaces match characters (.) by their corresponding characters on the first sequence
	ReplaceMatchChars()
	Sample(nb int) (Alignment, error) // generate a sub sample of the sequences
	ShuffleSites(rate float64, roguerate float64, randroguefirst bool) []string
	SimulateRogue(prop float64, proplen float64) ([]string, []string) // add "rogue" sequences
	SiteConservation(position int) (int, error)                       // If the site is conserved:
	SubAlign(start, length int) (Alignment, error)                    // Extract a subalignment from this alignment
	Swap(rate float64)
	TrimSequences(trimsize int, fromStart bool) error
}

func RandomAlignment ¶ added in v0.1.3

func RandomAlignment(alphabet, length, nbseq int) (Alignment, error)

type PairwiseAligner ¶ added in v0.3.0

type PairwiseAligner interface {
	AlignEnds() (int, int)
	AlignStarts() (int, int)
	Seq1Ali() []rune
	Seq2Ali() []rune
	SetGapOpenScore(open float64)
	SetGapExtendScore(extend float64)
	SetScore(match, mismatch float64)
	MaxScore() float64 // Maximum score of the alignment
	NbMatches() int    // Number of matches
	NbMisMatches() int // Number of mismatches
	NbGaps() int       // Nuber of gaps
	Length() int       // Length of the alignment
	Alignment() (Alignment, error)
	AlignmentStr() string
}

type PhasedSequence ¶ added in v0.3.0

type PhasedSequence struct {
	Err      error
	Removed  bool
	Position int
	// phased nt sequence
	NtSeq Sequence
	// phased nt sequence
	// with first nt corresponding
	// first position of aa codon
	CodonSeq Sequence
	// phased aa sequence
	AaSeq Sequence
	// Aligned sequences
	// 1st: best found orf
	// 2nd: sequence
	Ali Alignment
}

type Phaser ¶ added in v0.3.0

type Phaser interface {
	Phase(orfs, seqs SeqBag) (chan PhasedSequence, error)
	SetLenCutoff(cutoff float64)
	SetMatchCutoff(cutoff float64)
	SetReverse(reverse bool)
	SetCutEnd(cutend bool)
	SetCpus(cpus int)
	SetTranslate(translate bool, geneticcode int) (err error)
	SetAlignScores(match, mismatch float64)
}

* If SetTranslate(true):

align all sequences to the given ORF and trims sequences to the start position If orf is nil, searches for the longest ORF (in 3 or 6 phases depending on reverse arg) in all sequences

To do so, Phase() will:

Translate the given ORF in aminoacids;
For each sequence of the dataset: translate it in the 3 phases (forward) if reverse is false or 6 phases (forward and reverse) if reverse is true, align it with the translated orf, and take the phase giving the best alignment; If no phase gives a good alignment (>lencutoff * orf length, >matchcutoff matches over the align length and starting at first position of the ORF), then the sequence is discarded;
For each sequence, take the Start corresponding to the Start of the ORF, and remove nucleotides before;
Return the trimmed nucleotidic sequences (phased), the corresponding amino-acid sequences (phasedaa) the positions of starts in the nucleotidic sequences, and the removed sequence names.

If cutend is true, then also remove the end of sequences that do not align with orf ¶

It does not modify the input object ¶

* If SetTranslate(false):

align all sequences to the given ORF and trims sequences to the start position, it does not take into account protein information

If orf is nil, searches for the longest ORF (in forward only or both strands depending on reverse arg) in all sequences ¶

To do so:

1. If alignment is bad (>lencutoff * orf length, >matchcutoff matches over the align length and starting at first position of the ORF), then the sequence is discarded; 3. For each sequence, take the Start corresponding to the Start of the ORF, and remove nucleotides before; 4. Return the trimmed nucleotidic sequences (phased), the positions of starts in the nucleotidic sequences, and the removed sequence names. If cutend is true, then also remove the end of sequences that do not align with orf It does not modify the input object

func NewPhaser ¶ added in v0.3.0

func NewPhaser() Phaser

type SeqBag ¶ added in v0.3.0

type SeqBag interface {
	AddSequence(name string, sequence string, comment string) error
	AddSequenceChar(name string, sequence []rune, comment string) error
	AppendSeqIdentifier(identifier string, right bool)
	Alphabet() int
	AlphabetStr() string
	AlphabetCharacters() []rune
	AlphabetCharToIndex(c rune) int // Returns index of the character (nt or aa) in the AlphabetCharacters() array
	AutoAlphabet()                  // detects and sets alphabet automatically for all the sequences
	CharStats() map[rune]int64
	CleanNames(namemap map[string]string)   // Clean sequence names (newick special char)
	Clear()                                 // Removes all sequences
	CloneSeqBag() (seqs SeqBag, err error)  // Clones the seqqbag
	Deduplicate() error                     // Remove duplicate sequences
	GetSequence(name string) (string, bool) // Get a sequence by names
	GetSequenceById(ith int) (string, bool)
	GetSequenceChar(name string) ([]rune, bool)
	GetSequenceCharById(ith int) ([]rune, bool)
	GetSequenceNameById(ith int) (string, bool)
	SetSequenceChar(ithAlign, ithSite int, char rune) error
	Sequence(ith int) (Sequence, bool)
	SequenceByName(name string) (Sequence, bool)
	Identical(SeqBag) bool
	Iterate(it func(name string, sequence string))
	IterateChar(it func(name string, sequence []rune))
	IterateAll(it func(name string, sequence []rune, comment string))
	Sequences() []Sequence
	SequencesChan() chan Sequence
	LongestORF(reverse bool) (orf Sequence, err error)
	MaxNameLength() int // maximum sequence name length
	NbSequences() int
	Rename(namemap map[string]string)
	RenameRegexp(regex, replace string, namemap map[string]string) error
	Replace(old, new string, regex bool) error        // Replaces old string with new string in sequences of the alignment
	ShuffleSequences()                                // Shuffle sequence order
	String() string                                   // Raw string representation (just write all sequences)
	Translate(phase int, geneticcode int) (err error) // Translates nt sequence in aa
	TrimNames(namemap map[string]string, size int) error
	TrimNamesAuto(namemap map[string]string, curid *int) error
	Sort() // Sorts the sequences by name
	Unalign() SeqBag
}

type Sequence ¶

type Sequence interface {
	Sequence() string
	SequenceChar() []rune
	CharAt(int) rune
	Name() string
	SetName(name string)
	Comment() string
	Length() int
	LongestORF() (start, end int) // Detects the longest ORF in forward strand only
	Reverse()
	Complement() error                                      // Returns an error if not nucleotide sequence
	Translate(phase int, geneticcode int) (Sequence, error) // Translates the sequence using the given code
	DetectAlphabet() int                                    // Try to detect alphabet (nt or aa)
	Clone() Sequence
}

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL