Documentation ¶
Overview ¶
Copyright 2017 Joseph Lewis III <joseph@josephlewis.net> Licensed under the MIT License. See LICENSE file for full details.
Index ¶
- Constants
- Variables
- func FindDbPath(directory string) string
- func FormatDocuments(w io.Writer, docs []Document, templateFormat string, shortSha bool, ...)
- func FormatSearchResults(w io.Writer, docs []SearchResult, templateFormat string, db *ParaphraseDb)
- func GlobToRegexStr(glob string) string
- func NewDocument(path, namespace string, body []byte) (*Document, *DocumentData)
- func RenderDocument(templateFormat string, doc *Document, db *ParaphraseDb, ...) error
- func WriteDocuments(w io.Writer, docs []Document, shortSha bool)
- type BucketSet
- func (hs BucketSet) AddAll(elements []uint64) BucketSet
- func (hs BucketSet) GetOrDefault(key uint64, defaultVal float64) float64
- func (hs BucketSet) Intersect(other BucketSet) BucketSet
- func (hs BucketSet) Mult(other BucketSet) BucketSet
- func (hs BucketSet) OverlapProportion(other BucketSet) float64
- func (hs BucketSet) Sum() float64
- func (hs BucketSet) TfIdf() float64
- func (hs BucketSet) Union(other BucketSet) BucketSet
- type ChangeLogEntry
- type Document
- type DocumentData
- type Fingerprint
- type IndexEntry
- type ParaphraseDb
- func (p *ParaphraseDb) AddDocuments(producer provider.DocumentProducer) (added []Document, ok bool)
- func (p *ParaphraseDb) Close() error
- func (p *ParaphraseDb) CountDocuments() (int, error)
- func (p *ParaphraseDb) CreateDocument(path, namespace string, body []byte) (*Document, error)
- func (p *ParaphraseDb) FindDocumentById(id int64) (*Document, error)
- func (p *ParaphraseDb) FindDocumentDataById(id int64) (*DocumentData, error)
- func (p *ParaphraseDb) FindDocumentsBySha1(sha1 string) (results []Document, err error)
- func (p *ParaphraseDb) FindDocumentsLike(query Document) (results []Document, err error)
- func (p *ParaphraseDb) GetSettings() Settings
- func (p *ParaphraseDb) ImportDocumentsMatching(from *ParaphraseDb, query Document) error
- func (p *ParaphraseDb) QueryById(id int64) (results []SearchResult, err error)
- func (p *ParaphraseDb) QueryByString(query string) (results []SearchResult, err error)
- func (p *ParaphraseDb) QueryByVector(query TermCountVector) (results []SearchResult, err error)
- func (p *ParaphraseDb) WinnowData(bytes []byte) (TermCountVector, error)
- func (p *ParaphraseDb) WriteChanges(writer io.Writer)
- func (p *ParaphraseDb) WriteStats(writer io.Writer)
- type SearchResult
- type Settings
- type TermCountVector
Constants ¶
const ( DbExt = ".ppdb" DbName = "paraphrasedb.ppdb" DocumentBucket = "documents" IndexBucket = "index" SettingsBucket = "settings" FileBucket = "files" MinIndex = "00000000000000000000" MaxIndex = "99999999999999999999" CurrentSettingsVersion = 1 // the version of the settings file, won't match the version of paraphrase )
Variables ¶
var ( SettingsNotDefinedErr = errors.New("No settings found. If you meant to create a database run 'paraphrase init'") AlreadyInitializedErr = errors.New("It looks like paraphrase has already been initialized.") ImportErr = errors.New("Errors encountered while importing documents") DatabaseDNEErr = errors.New("It looks like the database does not exist, try running paraphrase init to create it") )
Functions ¶
func FindDbPath ¶
func FormatDocuments ¶
func FormatDocuments(w io.Writer, docs []Document, templateFormat string, shortSha bool, db *ParaphraseDb)
Writes the documents in fashion suitable for displaying on-screen
func FormatSearchResults ¶
func FormatSearchResults(w io.Writer, docs []SearchResult, templateFormat string, db *ParaphraseDb)
func GlobToRegexStr ¶
GlobToRegexStr converts a basic glob string to a regex e.g. "foo*bar.java" to "^foo.*bar\.java$" everything that isn't a * gets escaped
func NewDocument ¶
func NewDocument(path, namespace string, body []byte) (*Document, *DocumentData)
func RenderDocument ¶
Types ¶
type BucketSet ¶
func NewBucketSet ¶
func NewBucketSet() BucketSet
func (BucketSet) GetOrDefault ¶
func (BucketSet) OverlapProportion ¶
type ChangeLogEntry ¶
type Document ¶
type Document struct { Id int64 `storm:"id,unique"` Path string Namespace string IndexDate time.Time Sha1 string `storm:"index"` Hashes TermCountVector }
func (*Document) NormalizedTermFrequency ¶
type DocumentData ¶
type DocumentData struct { Id int64 `storm:"id,unique"` Path string Namespace string IndexDate time.Time Body []byte }
func NewDocumentData ¶
func NewDocumentData(doc *Document, body []byte) *DocumentData
func (*DocumentData) BodySha1 ¶
func (dd *DocumentData) BodySha1() string
type Fingerprint ¶
type Fingerprint uint64
type IndexEntry ¶
type ParaphraseDb ¶
type ParaphraseDb struct {
// contains filtered or unexported fields
}
func Create ¶
func Create(directory string, settings Settings) (*ParaphraseDb, error)
Creates a new database in the given directory with the given settings
func Open ¶
func Open(directory string) (*ParaphraseDb, error)
Open or create a new paraphrase database in the given directory
func (*ParaphraseDb) AddDocuments ¶
func (p *ParaphraseDb) AddDocuments(producer provider.DocumentProducer) (added []Document, ok bool)
func (*ParaphraseDb) Close ¶
func (p *ParaphraseDb) Close() error
func (*ParaphraseDb) CountDocuments ¶
func (p *ParaphraseDb) CountDocuments() (int, error)
func (*ParaphraseDb) CreateDocument ¶
func (p *ParaphraseDb) CreateDocument(path, namespace string, body []byte) (*Document, error)
func (*ParaphraseDb) FindDocumentById ¶
func (p *ParaphraseDb) FindDocumentById(id int64) (*Document, error)
func (*ParaphraseDb) FindDocumentDataById ¶
func (p *ParaphraseDb) FindDocumentDataById(id int64) (*DocumentData, error)
func (*ParaphraseDb) FindDocumentsBySha1 ¶
func (p *ParaphraseDb) FindDocumentsBySha1(sha1 string) (results []Document, err error)
func (*ParaphraseDb) FindDocumentsLike ¶
func (p *ParaphraseDb) FindDocumentsLike(query Document) (results []Document, err error)
FindDocumentsLike finds documents like the one given. * Ids are matched exactly, * SHA1s are matched as a prefix (you can give the n characters only) * Namespaces are searched like globs * Paths are searched like globs
func (*ParaphraseDb) GetSettings ¶
func (p *ParaphraseDb) GetSettings() Settings
func (*ParaphraseDb) ImportDocumentsMatching ¶
func (p *ParaphraseDb) ImportDocumentsMatching(from *ParaphraseDb, query Document) error
func (*ParaphraseDb) QueryById ¶
func (p *ParaphraseDb) QueryById(id int64) (results []SearchResult, err error)
func (*ParaphraseDb) QueryByString ¶
func (p *ParaphraseDb) QueryByString(query string) (results []SearchResult, err error)
func (*ParaphraseDb) QueryByVector ¶
func (p *ParaphraseDb) QueryByVector(query TermCountVector) (results []SearchResult, err error)
func (*ParaphraseDb) WinnowData ¶
func (p *ParaphraseDb) WinnowData(bytes []byte) (TermCountVector, error)
func (*ParaphraseDb) WriteChanges ¶
func (p *ParaphraseDb) WriteChanges(writer io.Writer)
func (*ParaphraseDb) WriteStats ¶
func (p *ParaphraseDb) WriteStats(writer io.Writer)
Write information about Paraphrase and the database to an output. Output format _may change without warning_.
type SearchResult ¶
type SearchResult struct { Query *TermCountVector Doc *Document // contains filtered or unexported fields }
func (*SearchResult) Similarity ¶
func (sr *SearchResult) Similarity() float64
type Settings ¶
type Settings struct { Version int `storm:"id,unique"` WindowSize int FingerprintSize int RobustHash bool CreatedAt time.Time }
func NewDefaultSettings ¶
func NewDefaultSettings() Settings
type TermCountVector ¶
func (TermCountVector) NormalizedTermFrequency ¶
func (vec TermCountVector) NormalizedTermFrequency() linalg.IFVector
Source Files ¶
Directories ¶
Path | Synopsis |
---|---|
Package snappyjson implements snappy compression of JSON objects for StormDB Package snappyjson implements snappy compression of JSON objects for StormDB
|
Package snappyjson implements snappy compression of JSON objects for StormDB Package snappyjson implements snappy compression of JSON objects for StormDB |