Documentation ¶
Index ¶
- Constants
- func AA2Index(aa rune) (idx int, err error)
- func AlphabetFromString(alphabet string) int
- func Complement(seq []rune) (err error)
- func DetectAlphabet(seq string) int
- func Index2AA(index int) (aa rune, err error)
- func Index2Nt(index int) (nt rune, err error)
- func NewAlign(alphabet int) *align
- func NewPwAligner(seq1, seq2 Sequence, algo int) *pwaligner
- func NewSeqBag(alphabet int) *seqbag
- func NewSequence(name string, sequence []rune, comment string) *seq
- func Nt2Index(nt rune) (idx int, err error)
- func RandomSequence(alphabet, length int) ([]rune, error)
- func Reverse(seq []rune)
- type AlignChannel
- type Alignment
- type PairwiseAligner
- type PartitionSet
- func (ps *PartitionSet) AddRange(partName, modelName string, start, end, modulo int) (err error)
- func (ps *PartitionSet) AliLength() int
- func (ps *PartitionSet) CheckSites() (err error)
- func (ps *PartitionSet) ModeleName(code int) string
- func (ps *PartitionSet) NPartitions() int
- func (ps *PartitionSet) Partition(position int) int
- func (ps *PartitionSet) PartitionName(code int) string
- func (ps *PartitionSet) String() string
- type PhasedSequence
- type Phaser
- type SeqBag
- type Sequence
Constants ¶
const ( ALIGN_UP = iota ALIGN_LEFT ALIGN_DIAG ALIGN_STOP ALIGN_ALGO_SW = iota ALIGN_ALGO_ATG )
const ( AMINOACIDS = 0 // Amino acid sequence alphabet NUCLEOTIDS = 1 // Nucleotid sequence alphabet BOTH = 2 // Could be both UNKNOWN = 3 // Unkown alphabet GAP = '-' POINT = '.' OTHER = '*' ALL_AMINO = 'X' ALL_NUCLE = 'N' PSSM_NORM_NONE = 0 // No normalization PSSM_NORM_FREQ = 1 // Normalization by freq in the site PSSM_NORM_DATA = 2 // Normalization by aa/nt frequency in data PSSM_NORM_UNIF = 3 // Normalization by uniform frequency PSSM_NORM_LOGO = 4 // Normalization like LOGO : v(site)=freq*(log2(alphabet)-H(site)-pseudocount FORMAT_FASTA = 0 FORMAT_PHYLIP = 1 FORMAT_NEXUS = 2 FORMAT_CLUSTAL = 3 POSITION_IDENTICAL = 0 // All characters in a position are the same POSITION_CONSERVED = 1 // Same strong group POSITION_SEMI_CONSERVED = 2 // Same weak group POSITION_NOT_CONSERVED = 3 // None of the above values GENETIC_CODE_STANDARD = 0 // Standard genetic code GENETIC_CODE_VETEBRATE_MITO = 1 // Vertebrate mitochondrial genetic code GENETIC_CODE_INVETEBRATE_MITO = 2 // Invertebrate mitochondrial genetic code )
Variables ¶
This section is empty.
Functions ¶
func AlphabetFromString ¶ added in v0.2.3
func DetectAlphabet ¶
func NewPwAligner ¶ added in v0.3.0
func NewSequence ¶
func RandomSequence ¶ added in v0.1.3
Types ¶
type AlignChannel ¶ added in v0.2.4
type Alignment ¶
type Alignment interface { SeqBag AddGaps(rate, lenprop float64) AvgAllelesPerSite() float64 BuildBootstrap() Alignment // Bootstrap alignment CharStatsSite(site int) (map[rune]int, error) Clone() (Alignment, error) CodonAlign(ntseqs SeqBag) (codonAl *align, err error) // Remove identical patterns/sites and return number of occurence // of each pattern (order of patterns/sites may have changed) Compress() []int // concatenates the given alignment with this alignment Concat(Alignment) error // Compares all sequences to the first one and counts all differences per sequence // // - alldiffs: The set of all differences that have been seen at least once // - diffs : The number of occurences of each difference, for each sequence // Sequences are ordered as the original alignment. Differences are // written as REFNEW, ex: diffs["AC"]=12 . CountDifferences() (alldiffs []string, diffs []map[string]int) // Compares all sequences to the first one and replace identical characters with . DiffWithFirst() Entropy(site int, removegaps bool) (float64, error) // Entropy of the given site // Positions of potential frameshifts // if startinggapsasincomplete is true, then considers gaps as the beginning // as incomplete sequence, then take the right phase Frameshifts(startingGapsAsIncomplete bool) []struct{ Start, End int } // Positions of potential stop in frame // if startinggapsasincomplete is true, then considers gaps as the beginning // as incomplete sequence, then take the right phase Stops(startingGapsAsIncomplete bool, geneticode int) (stops []int, err error) Length() int // Length of the alignment Mask(start, length int) error // Masks given positions MaxCharStats() ([]rune, []int) Mutate(rate float64) // Adds uniform substitutions in the alignment (~sequencing errors) NbVariableSites() int // Nb of variable sites Pssm(log bool, pseudocount float64, normalization int) (pssm map[rune][]float64, err error) // Normalization: PSSM_NORM_NONE, PSSM_NORM_UNIF, PSSM_NORM_DATA Rarefy(nb int, counts map[string]int) (Alignment, error) // Take a new rarefied sample taking into accounts weights RandSubAlign(length int) (Alignment, error) // Extract a random subalignment with given length from this alignment Recombine(rate float64, lenprop float64) RemoveGapSeqs(cutoff float64) // Removes sequences having >= cutoff gaps RemoveGapSites(cutoff float64, ends bool) // Removes sites having >= cutoff gaps // Replaces match characters (.) by their corresponding characters on the first sequence ReplaceMatchChars() Sample(nb int) (Alignment, error) // generate a sub sample of the sequences ShuffleSites(rate float64, roguerate float64, randroguefirst bool) []string SimulateRogue(prop float64, proplen float64) ([]string, []string) // add "rogue" sequences SiteConservation(position int) (int, error) // If the site is conserved: Split(part *PartitionSet) ([]Alignment, error) //Splits the alignment given the paritions in argument SubAlign(start, length int) (Alignment, error) // Extract a subalignment from this alignment Swap(rate float64) TrimSequences(trimsize int, fromStart bool) error }
func RandomAlignment ¶ added in v0.1.3
type PairwiseAligner ¶ added in v0.3.0
type PairwiseAligner interface { AlignEnds() (int, int) AlignStarts() (int, int) Seq1Ali() []rune Seq2Ali() []rune SetGapOpenScore(open float64) SetGapExtendScore(extend float64) SetScore(match, mismatch float64) MaxScore() float64 // Maximum score of the alignment NbMatches() int // Number of matches NbMisMatches() int // Number of mismatches NbGaps() int // Nuber of gaps Length() int // Length of the alignment Alignment() (Alignment, error) AlignmentStr() string }
type PartitionSet ¶ added in v0.3.2
type PartitionSet struct {
// contains filtered or unexported fields
}
func NewPartitionSet ¶ added in v0.3.2
func NewPartitionSet(alignmentLength int) (ps *PartitionSet)
func (*PartitionSet) AddRange ¶ added in v0.3.2
func (ps *PartitionSet) AddRange(partName, modelName string, start, end, modulo int) (err error)
func (*PartitionSet) AliLength ¶ added in v0.3.2
func (ps *PartitionSet) AliLength() int
returns the length of the alignment
func (*PartitionSet) CheckSites ¶ added in v0.3.2
func (ps *PartitionSet) CheckSites() (err error)
If not all sites are in a partition, returns an error
func (*PartitionSet) ModeleName ¶ added in v0.3.2
func (ps *PartitionSet) ModeleName(code int) string
Returns the name of the modele associated to the given index If the code does not exist, then returns ""
func (*PartitionSet) NPartitions ¶ added in v0.3.2
func (ps *PartitionSet) NPartitions() int
func (*PartitionSet) Partition ¶ added in v0.3.2
func (ps *PartitionSet) Partition(position int) int
Returns the partition code associated to the given position
If the position is outside the alignment, then returns -1
func (*PartitionSet) PartitionName ¶ added in v0.3.2
func (ps *PartitionSet) PartitionName(code int) string
Returns the name of the partition associated to the given index If the code does not exist, then returns ""
func (*PartitionSet) String ¶ added in v0.3.2
func (ps *PartitionSet) String() string
type PhasedSequence ¶ added in v0.3.0
type PhasedSequence struct { Err error Removed bool Position int // phased nt sequence NtSeq Sequence // phased nt sequence // with first nt corresponding // first position of aa codon CodonSeq Sequence // phased aa sequence AaSeq Sequence // Aligned sequences // 1st: best found orf // 2nd: sequence Ali Alignment }
type Phaser ¶ added in v0.3.0
type Phaser interface { Phase(orfs, seqs SeqBag) (chan PhasedSequence, error) SetLenCutoff(cutoff float64) SetMatchCutoff(cutoff float64) SetReverse(reverse bool) SetCutEnd(cutend bool) SetCpus(cpus int) SetTranslate(translate bool, geneticcode int) (err error) SetAlignScores(match, mismatch float64) SetGapOpen(float64) SetGapExtend(float64) }
* If SetTranslate(true):
align all sequences to the given ORF and trims sequences to the start position If orf is nil, searches for the longest ORF (in 3 or 6 phases depending on reverse arg) in all sequences
To do so, Phase() will:
- Translate the given ORF in aminoacids;
- For each sequence of the dataset: translate it in the 3 phases (forward) if reverse is false or 6 phases (forward and reverse) if reverse is true, align it with the translated orf, and take the phase giving the best alignment; If no phase gives a good alignment (>lencutoff * orf length, >matchcutoff matches over the align length and starting at first position of the ORF), then the sequence is discarded;
- For each sequence, take the Start corresponding to the Start of the ORF, and remove nucleotides before;
- Return the trimmed nucleotidic sequences (phased), the corresponding amino-acid sequences (phasedaa) the positions of starts in the nucleotidic sequences, and the removed sequence names.
If cutend is true, then also remove the end of sequences that do not align with orf ¶
It does not modify the input object ¶
* If SetTranslate(false):
align all sequences to the given ORF and trims sequences to the start position, it does not take into account protein information
If orf is nil, searches for the longest ORF (in forward only or both strands depending on reverse arg) in all sequences ¶
To do so:
1. If alignment is bad (>lencutoff * orf length, >matchcutoff matches over the align length and starting at first position of the ORF), then the sequence is discarded; 3. For each sequence, take the Start corresponding to the Start of the ORF, and remove nucleotides before; 4. Return the trimmed nucleotidic sequences (phased), the positions of starts in the nucleotidic sequences, and the removed sequence names. If cutend is true, then also remove the end of sequences that do not align with orf It does not modify the input object
type SeqBag ¶ added in v0.3.0
type SeqBag interface { AddSequence(name string, sequence string, comment string) error AddSequenceChar(name string, sequence []rune, comment string) error AppendSeqIdentifier(identifier string, right bool) Alphabet() int AlphabetStr() string AlphabetCharacters() []rune AlphabetCharToIndex(c rune) int // Returns index of the character (nt or aa) in the AlphabetCharacters() array AutoAlphabet() // detects and sets alphabet automatically for all the sequences CharStats() map[rune]int64 CleanNames(namemap map[string]string) // Clean sequence names (newick special char) Clear() // Removes all sequences CloneSeqBag() (seqs SeqBag, err error) // Clones the seqqbag Deduplicate() (identical [][]string, err error) // Remove duplicate sequences GetSequence(name string) (string, bool) // Get a sequence by names GetSequenceById(ith int) (string, bool) GetSequenceChar(name string) ([]rune, bool) GetSequenceCharById(ith int) ([]rune, bool) GetSequenceNameById(ith int) (string, bool) SetSequenceChar(ithAlign, ithSite int, char rune) error IgnoreIdentical(bool) // if true, then it won't add the sequence if a sequence with the same name AND same sequence exists Sequence(ith int) (Sequence, bool) SequenceByName(name string) (Sequence, bool) Identical(SeqBag) bool Iterate(it func(name string, sequence string)) IterateChar(it func(name string, sequence []rune)) IterateAll(it func(name string, sequence []rune, comment string)) Sequences() []Sequence SequencesChan() chan Sequence LongestORF(reverse bool) (orf Sequence, err error) MaxNameLength() int // maximum sequence name length NbSequences() int Rename(namemap map[string]string) RenameRegexp(regex, replace string, namemap map[string]string) error Replace(old, new string, regex bool) error // Replaces old string with new string in sequences of the alignment ShuffleSequences() // Shuffle sequence order String() string // Raw string representation (just write all sequences) Translate(phase int, geneticcode int) (err error) // Translates nt sequence in aa TrimNames(namemap map[string]string, size int) error TrimNamesAuto(namemap map[string]string, curid *int) error Sort() // Sorts the sequences by name Unalign() SeqBag }
type Sequence ¶
type Sequence interface { Sequence() string SequenceChar() []rune SameSequence([]rune) bool CharAt(int) rune Name() string SetName(name string) Comment() string Length() int LongestORF() (start, end int) // Detects the longest ORF in forward strand only Reverse() Complement() error // Returns an error if not nucleotide sequence Translate(phase int, geneticcode int) (Sequence, error) // Translates the sequence using the given code DetectAlphabet() int // Try to detect alphabet (nt or aa) Clone() Sequence }