Documentation ¶
Index ¶
- Constants
- Variables
- func CalcSignatureSize(numElements uint64, numHashes int, falsePositiveRate float64) uint64
- func Execute()
- type IndexQuery
- type Match
- type Matches
- type Meta
- type Name2Idx
- type Options
- type Query
- type QueryResult
- type SearchOptions
- type SortByQCov
- type SortBySum
- type SortByTCov
- type UnikFileInfo
- type UnikFileInfoGroup
- type UnikFileInfoGroups
- type UnikFileInfos
- type UnikFileInfosByName
- type UnikIndex
- type UnikIndexDB
- type UnikIndexDBInfo
- type UnikIndexDBSearchEngine
Constants ¶
const PosPopCountBufSize = 128
PosPopCountBufSize defines the buffer size of byte slice feeding to pospopcount (github.com/clausecker/pospop).
Theoretically, size >240 is better, but in this scenario, we need firstly transposing the signature matrix, which is the performance bottleneck. Column size of the matrix is fixed, therefore we must control the row size to balance time of matrix transposing and popopcount.
128 is the best value for my machine (AMD ryzen 2700X).
const UnikIndexDBVersion uint8 = 4
UnikIndexDBVersion is the version of database.
Variables ¶
var BufferSize = 65536 //os.Getpagesize()
BufferSize is size of buffer
var ErrVersionMismatch = errors.New("kmcp/index: version mismatch")
ErrVersionMismatch indicates mismatched version
var RootCmd = &cobra.Command{ Use: "kmcp", Short: "Kmer-based Metagenomics Classification and Profilling", Long: fmt.Sprintf(` Program: kmcp (Kmer-based Metagenomics Classification and Profiling) Version: v%s Documents: https://shenwei356.github.io/kmcp Source code: https://github.com/shenwei356/kmcp kmcp is a tool for metagenomic classification and profiling. `, VERSION), }
RootCmd represents the base command when called without any subcommands
var VERSION = "0.2.0"
VERSION is the version
Functions ¶
func CalcSignatureSize ¶
CalcSignatureSize is from https://github.com/bingmann/cobs/blob/master/cobs/util/calc_signature_size.cpp . but we roundup to 2^n.
def roundup(x):
x -= 1 x |= x >> 1 x |= x >> 2 x |= x >> 4 x |= x >> 8 x |= x >> 16 x |= x >> 32 return (x | x>>64) + 1
f=lambda ne,nh,fpr: math.ceil(-nh/(math.log(1-math.pow(fpr,1/nh)))*ne)
roundup(f(300000, 1, 0.25))
Types ¶
type IndexQuery ¶
type IndexQuery struct { // Kmers []uint64 Hashes [][]uint64 // related to database Ch chan []Match // result chanel }
IndexQuery is a query sent to multiple indices of a database.
type Match ¶
type Match struct { Target []string // target name TargetIdx []uint32 NumKmers int // matched k-mers QCov float64 // coverage of query TCov float64 // coverage of target }
Match is the struct of matching detail.
type Matches ¶
type Matches []Match
Matches is list of Matches, for sorting.
type Meta ¶
type Meta struct { SeqID string `json:"id"` // sequence ID FragIdx uint32 `json:"idx"` // sequence location index Syncmer bool `json:"sm"` // syncmer SyncmerS int `json:"sm-s"` Minimizer bool `json:"mm"` // minimizer MinimizerW int `json:"mm-w"` SplitSeq bool `json:"sp"` // split sequence SplitSize int `json:"sp-s"` SplitOverlap int `json:"sp-o"` }
Meta contains some meta information
type Query ¶
type Query struct { Idx uint64 // id for keep output in order ID []byte Seq *seq.Seq Ch chan QueryResult // result chanel }
Query strands for a query sequence.
type QueryResult ¶
type QueryResult struct { QueryIdx uint64 // id for keep output in order QueryID []byte QueryLen int DBId int // id of database, for getting database name with few space FPR float64 // fpr, p is related to database NumKmers int // number of k-mers Matches []Match // all matches }
QueryResult is the search result of a query sequence.
type SearchOptions ¶
type SearchOptions struct { UseMMap bool Threads int KeepUnmatched bool TopN int SortBy string MinMatched int MinQueryCov float64 MinTargetCov float64 LoadDefaultNameMap bool }
SearchOptions defines options for searching
type UnikFileInfo ¶
UnikFileInfo store basic info of .unik file.
func (UnikFileInfo) String ¶
func (i UnikFileInfo) String() string
type UnikFileInfoGroup ¶
type UnikFileInfoGroup struct { Infos []UnikFileInfo Kmers uint64 }
UnikFileInfoGroup represents a slice of UnikFileInfos
func (UnikFileInfoGroup) String ¶
func (i UnikFileInfoGroup) String() string
type UnikFileInfoGroups ¶
type UnikFileInfoGroups []UnikFileInfoGroup
UnikFileInfoGroups is just a slice of UnikFileInfoGroup
func (UnikFileInfoGroups) Len ¶
func (l UnikFileInfoGroups) Len() int
func (UnikFileInfoGroups) Swap ¶
func (l UnikFileInfoGroups) Swap(i int, j int)
type UnikFileInfos ¶
type UnikFileInfos []UnikFileInfo
UnikFileInfos is list of UnikFileInfo.
func (UnikFileInfos) Len ¶
func (l UnikFileInfos) Len() int
func (UnikFileInfos) Swap ¶
func (l UnikFileInfos) Swap(i int, j int)
type UnikFileInfosByName ¶
type UnikFileInfosByName []UnikFileInfo
UnikFileInfosByName is used to sort infos by name and indices
func (UnikFileInfosByName) Len ¶
func (l UnikFileInfosByName) Len() int
func (UnikFileInfosByName) Swap ¶
func (l UnikFileInfosByName) Swap(i int, j int)
type UnikIndex ¶
type UnikIndex struct { Options SearchOptions InCh chan IndexQuery Path string Header index.Header // contains filtered or unexported fields }
UnikIndex defines a unik index struct.
func NewUnixIndex ¶
func NewUnixIndex(file string, opt SearchOptions) (*UnikIndex, error)
NewUnixIndex create a index from file.
type UnikIndexDB ¶
type UnikIndexDB struct { Options SearchOptions DBId int // id for current database InCh chan Query Info UnikIndexDBInfo Header index.Header Indices []*UnikIndex // contains filtered or unexported fields }
UnikIndexDB is database for multiple .unik indices.
func NewUnikIndexDB ¶
func NewUnikIndexDB(path string, opt SearchOptions, dbID int) (*UnikIndexDB, error)
NewUnikIndexDB opens and read from database directory.
func (*UnikIndexDB) CompatibleWith ¶
func (db *UnikIndexDB) CompatibleWith(db2 *UnikIndexDB) bool
CompatibleWith has loose restric tions for enabling searching from database of different perameters.
func (*UnikIndexDB) String ¶
func (db *UnikIndexDB) String() string
type UnikIndexDBInfo ¶
type UnikIndexDBInfo struct { Version uint8 `yaml:"version"` IndexVersion uint8 `yaml:"unikiVersion"` Alias string `yaml:"alias"` K int `yaml:"k"` Hashed bool `yaml:"hashed"` Canonical bool `yaml:"canonical"` Scaled bool `yaml:"scaled"` Scale uint32 `yaml:"scale"` Minimizer bool `yaml:"minimizer"` MinimizerW uint32 `yaml:"minimizer-w"` Syncmer bool `yaml:"syncmer"` SyncmerS uint32 `yaml:"syncmer-s"` SplitSeq bool `yaml:"split-seq"` SplitSize int `yaml:"split-size"` SplitOverlap int `yaml:"split-overlap"` NumHashes int `yaml:"hashes"` FPR float64 `yaml:"fpr"` NumNames int `yaml:"numNameGroups"` BlockSize int `yaml:"blocksize"` Kmers uint64 `yaml:"totalKmers"` Files []string `yaml:"files"` NameMapping map[string]string `yaml:"name-mapping,omitempty"` MappingNames bool `yaml:"mapping-names,omitempty"` // contains filtered or unexported fields }
UnikIndexDBInfo is the meta data of a database.
func NewUnikIndexDBInfo ¶
func NewUnikIndexDBInfo(files []string) UnikIndexDBInfo
NewUnikIndexDBInfo creates UnikIndexDBInfo from index files, but you have to manually assign other values.
func UnikIndexDBInfoFromFile ¶
func UnikIndexDBInfoFromFile(file string) (UnikIndexDBInfo, error)
UnikIndexDBInfoFromFile creates UnikIndexDBInfo from files.
func (UnikIndexDBInfo) Check ¶
func (i UnikIndexDBInfo) Check() error
Check check if all index files exist.
func (UnikIndexDBInfo) CompatibleWith ¶
func (i UnikIndexDBInfo) CompatibleWith(j UnikIndexDBInfo) bool
CompatibleWith checks whether two databases have the same parameters.
func (UnikIndexDBInfo) String ¶
func (i UnikIndexDBInfo) String() string
type UnikIndexDBSearchEngine ¶
type UnikIndexDBSearchEngine struct { Options SearchOptions DBs []*UnikIndexDB DBNames []string InCh chan Query // queries OutCh chan QueryResult // contains filtered or unexported fields }
UnikIndexDBSearchEngine search sequence on multiple database
func NewUnikIndexDBSearchEngine ¶
func NewUnikIndexDBSearchEngine(opt SearchOptions, dbPaths ...string) (*UnikIndexDBSearchEngine, error)
NewUnikIndexDBSearchEngine returns a search engine based on multiple engines
func (*UnikIndexDBSearchEngine) Close ¶
func (sg *UnikIndexDBSearchEngine) Close() error
Close closes the search engine.