Documentation ¶
Overview ¶
Package gcse is the core supporting library for go-code-search-engine (GCSE). Its exported types and functions are mainly for sub packages. If you want some of the function, copy the code away.
Index ¶
- Constants
- Variables
- func AddBiValueAndProcess(aggr bi.AggregateMethod, name string, value int)
- func AppendPackages(pkgs []string) bool
- func AppendTokens(tokens stringsp.Set, text []byte) stringsp.Set
- func AuthorOfPackage(pkg string) string
- func CalcMatchScore(doc *HitInfo, tokenList []string, textIdfs, nameIdfs []float64) float64
- func CalcPackagePartition(pkg string, totalParts int) int
- func CalcStaticScore(doc *HitInfo) float64
- func CalcTestStaticScore(doc *HitInfo, realImported []string) float64
- func CheckCamel(last, current rune) index.RuneType
- func CheckRuneType(last, current rune) index.RuneType
- func ChooseImportantSentenses(text string, name, pkg string) []string
- func ClearWatcherEvents(watcher *fsnotify.Watcher)
- func CrawlRepoInfo(ctx context.Context, site, user, name string) *gpb.RepoInfo
- func FullProjectOfPackage(pkg string) string
- func GenHttpClient(proxy string) doc.HttpClient
- func HostOfPackage(pkg string) string
- func IdOfPerson(site, username string) string
- func Index(docDB mr.Input, outDir string) (*index.TokenSetSearcher, error)
- func IsBadPackage(err error) bool
- func LikeButton(httpClient doc.HttpClient, Url string) (int, error)
- func NewDocInfo() sophie.Sophier
- func NewNewDocAction() sophie.Sophier
- func NormWord(word string) string
- func ParsePersonId(id string) (site, username string)
- func Plusone(httpClient doc.HttpClient, url string) (int, error)
- func ProjectOfPackage(pkg string) string
- func ReadPackages(segm utils.Segment) ([]string, error)
- func ReadmeToText(fn, data string) string
- func SplitSentences(text string) []string
- func TrimPackageName(pkg string) string
- func WaitForWatcherEvents(watcher *fsnotify.Watcher)
- type BlackRequest
- type CrawlerDB
- func (cdb *CrawlerDB) AppendPackage(pkg string, inDocs func(pkg string) bool)
- func (cdb *CrawlerDB) AppendPerson(site, username string) bool
- func (cdb *CrawlerDB) PushToCrawlPackage(pkg string)
- func (cdb *CrawlerDB) SchedulePackage(pkg string, sTime time.Time, etag string) error
- func (cdb *CrawlerDB) SchedulePerson(id string, sTime time.Time) error
- func (cdb *CrawlerDB) Sync() error
- type CrawlingEntry
- type DocDB
- type DocInfo
- type HitInfo
- type MemDB
- func (mdb *MemDB) Count() int
- func (mdb *MemDB) Delete(key string)
- func (mdb *MemDB) Export(root villa.Path, kind string) error
- func (mdb *MemDB) Get(key string, data interface{}) bool
- func (mdb *MemDB) Iterate(output func(key string, val interface{}) error) error
- func (mdb *MemDB) LastModified() time.Time
- func (mdb *MemDB) Load() error
- func (mdb *MemDB) Modified() bool
- func (mdb *MemDB) Put(key string, data interface{})
- func (mdb *MemDB) Sync() error
- type NewDocAction
- type Package
- type PackedDocDB
- type Person
- type TokenIndexer
- func (ti *TokenIndexer) Export(root villa.Path, kind string) error
- func (ti *TokenIndexer) IdsOfToken(token string) []string
- func (ti *TokenIndexer) LastModified() time.Time
- func (ti *TokenIndexer) Load() error
- func (ti *TokenIndexer) Modified() bool
- func (ti *TokenIndexer) Put(id string, tokens stringsp.Set)
- func (ti *TokenIndexer) Sync() error
- func (ti *TokenIndexer) TokensOfId(id string) []string
Constants ¶
const ( // whole document updated NDA_UPDATE = iota // only stars updated NDA_STARS // deleted NDA_DEL // Original document NDA_ORIGINAL )
const ( KindIndex = "index" KindDocDB = "docdb" KindPackage = "package" KindPerson = "person" KindToCheck = "tocheck" IndexFn = KindIndex + ".gob" )
const ( HitsArrFn = "hits" IndexTextField = "text" IndexNameField = "name" IndexPkgField = "pkg" )
const (
/*
Increase this to ignore etag of last versions to crawl and parse all
packages.
ChangeLog:
0 First version
1 Add TestImports/XTestImports to Imports
2 Parse markdown readme to text before selecting synopsis
from it
3 Add exported tokens to indexes
4 Move TestImports/XTestImports out of Imports, to TestImports
4 A bug of checking CrawlerVersion is fixed
*/
CrawlerVersion = 5
)
const (
DOCS_PARTS = 128
)
Variables ¶
var ( ErrPackageNotModifed = errors.New("package not modified") ErrInvalidPackage = errors.New("invalid package") )
var GithubSpider *github.Spider
Functions ¶
func AddBiValueAndProcess ¶
func AppendPackages ¶
AppendPackages appends a list packages to imports folder for crawler backend to read
func AppendTokens ¶
func AppendTokens(tokens stringsp.Set, text []byte) stringsp.Set
Tokenizes text into the current token set.
func AuthorOfPackage ¶
func CalcMatchScore ¶
func CalcPackagePartition ¶
func CalcStaticScore ¶
func CalcTestStaticScore ¶
func CheckCamel ¶
func CheckCamel(last, current rune) index.RuneType
func CheckRuneType ¶
func CheckRuneType(last, current rune) index.RuneType
func ClearWatcherEvents ¶
func FullProjectOfPackage ¶
func GenHttpClient ¶
func GenHttpClient(proxy string) doc.HttpClient
func HostOfPackage ¶
func IdOfPerson ¶
func IsBadPackage ¶
func LikeButton ¶
func LikeButton(httpClient doc.HttpClient, Url string) (int, error)
func NewDocInfo ¶
Returns a new instance of DocInfo as a sophie.Sophier
func NewNewDocAction ¶
Returns a new instance of *NewDocAction as a Sophier
func ParsePersonId ¶
func ReadmeToText ¶
func SplitSentences ¶
func TrimPackageName ¶
func WaitForWatcherEvents ¶
Types ¶
type BlackRequest ¶
type CrawlerDB ¶
* CrawlerDB including all crawler entires database.
func LoadCrawlerDB ¶
func LoadCrawlerDB() *CrawlerDB
LoadCrawlerDB loads PackageDB and PersonDB and returns a new *CrawlerDB
func (*CrawlerDB) AppendPackage ¶
AppendPackage appends a package. If the package did not exist in either PackageDB or Docs, schedule it (immediately).
func (*CrawlerDB) AppendPerson ¶
AppendPerson appends a person to the PersonDB, schedules to crawl immediately for a new person
func (*CrawlerDB) PushToCrawlPackage ¶
SchedulePackage schedules a package to be crawled at a specific time if not specified earlier.
func (*CrawlerDB) SchedulePackage ¶
SchedulePackage schedules a package to be crawled at a specific time.
func (*CrawlerDB) SchedulePerson ¶
SchedulePerson schedules a person to be crawled at a specific time.
type CrawlingEntry ¶
type DocInfo ¶
type DocInfo struct { Name string // Package name Package string // Package path Author string LastUpdated time.Time StarCount int Synopsis string Description string ProjectURL string ReadmeFn string ReadmeData string Imports []string TestImports []string Exported []string // exported tokens(funcs/types) }
DocInfo is the information stored in backend docDB
type HitInfo ¶
type HitInfo struct { DocInfo Imported []string ImportedLen int TestImported []string TestImportedLen int ImportantSentences []string AssignedStarCount float64 StaticScore float64 TestStaticScore float64 StaticRank int // zero-based }
HitInfo is the information provided to frontend
type MemDB ¶
func (*MemDB) Export ¶
Export saves the data to some space, but not affecting the modified property.
func (*MemDB) Get ¶
Get fetches an entry of specified key. data is a pointer. Return false if not exists
func (*MemDB) LastModified ¶
type NewDocAction ¶
* If Action equals NDA_DEL, DocInfo is undefined.
type Package ¶
type Package struct { Package string Name string Synopsis string Doc string ProjectURL string StarCount int ReadmeFn string ReadmeData string Imports []string TestImports []string Exported []string // exported tokens(funcs/types) References []string Etag string }
Package stores information from crawler
func CrawlPackage ¶
func CrawlPackage(ctx context.Context, httpClient doc.HttpClient, pkg string, etag string) (p *Package, folders []*gpb.FolderInfo, err error)
type PackedDocDB ¶
type PackedDocDB struct {
*MemDB
}
func (PackedDocDB) Get ¶
func (db PackedDocDB) Get(key string, data interface{}) bool
func (PackedDocDB) Iterate ¶
func (db PackedDocDB) Iterate( output func(key string, val interface{}) error) error
func (PackedDocDB) Put ¶
func (db PackedDocDB) Put(key string, data interface{})
type Person ¶
func CrawlPerson ¶
type TokenIndexer ¶
type TokenIndexer struct { index.TokenIndexer sync.RWMutex // contains filtered or unexported fields }
TokenIndexer is thread-safe.
func NewTokenIndexer ¶
func NewTokenIndexer(root villa.Path, kind string) *TokenIndexer
func (*TokenIndexer) Export ¶
func (ti *TokenIndexer) Export(root villa.Path, kind string) error
func (*TokenIndexer) IdsOfToken ¶
func (ti *TokenIndexer) IdsOfToken(token string) []string
func (*TokenIndexer) LastModified ¶
func (ti *TokenIndexer) LastModified() time.Time
func (*TokenIndexer) Load ¶
func (ti *TokenIndexer) Load() error
func (*TokenIndexer) Modified ¶
func (ti *TokenIndexer) Modified() bool
func (*TokenIndexer) Put ¶
func (ti *TokenIndexer) Put(id string, tokens stringsp.Set)
func (*TokenIndexer) Sync ¶
func (ti *TokenIndexer) Sync() error
func (*TokenIndexer) TokensOfId ¶
func (ti *TokenIndexer) TokensOfId(id string) []string
Source Files ¶
Directories ¶
Path | Synopsis |
---|---|
Package configs define and load all configurations.
|
Package configs define and load all configurations. |
pipelines
|
|
crawler
GCSE Crawler background program.
|
GCSE Crawler background program. |
mergedocs
Input FnDocs FnNewDocs
|
Input FnDocs FnNewDocs |
service
|
|
web
GCSE HTTP server.
|
GCSE HTTP server. |
shared
|
|
proto
Package gcsepb is a generated protocol buffer package.
|
Package gcsepb is a generated protocol buffer package. |
Package store handlings all the storage in GCSE backend.
|
Package store handlings all the storage in GCSE backend. |
tools
|
|