Documentation ¶
Overview ¶
Package gcse is the core supporting library for go-code-search-engine (GCSE). Its exported types and functions are mainly for sub packages. If you want some of the function, copy the code away.
Sub-projects ¶
crawler crawling packages
indexer creating index data for web-server
server providing web services, including home/top/search services.
Data-flows ¶
project Read Write ------- ---- ----- crawler fnCrawlerDB fnCrawlerDB
fnDocDB fnDocDB DBOutSegments
indexer DBOutSegments IndexSegments
server IndexSegments
Index ¶
- Constants
- Variables
- func AppendPackages(pkgs []string) bool
- func AppendTokens(tokens stringsp.Set, text []byte) stringsp.Set
- func AuthorOfPackage(pkg string) string
- func CalcMatchScore(doc *HitInfo, tokenList []string, textIdfs, nameIdfs []float64) float64
- func CalcPackagePartition(pkg string, totalParts int) int
- func CalcStaticScore(doc *HitInfo) float64
- func CalcTestStaticScore(doc *HitInfo, realImported []string) float64
- func CheckCamel(last, current rune) index.RuneType
- func CheckRuneType(last, current rune) index.RuneType
- func ChooseImportantSentenses(text string, name, pkg string) []string
- func ClearWatcherEvents(watcher *fsnotify.Watcher)
- func DumpMemStats()
- func FetchAllPackagesInGodoc(httpClient doc.HttpClient) ([]string, error)
- func FullProjectOfPackage(pkg string) string
- func GenHttpClient(proxy string) doc.HttpClient
- func GithubUpdates() (map[string]time.Time, error)
- func HostOfPackage(pkg string) string
- func IdOfPerson(site, username string) string
- func Index(docDB mr.Input) (*index.TokenSetSearcher, error)
- func IsBadPackage(err error) bool
- func LikeButton(httpClient doc.HttpClient, Url string) (int, error)
- func NewDocInfo() sophie.Sophier
- func NewNewDocAction() sophie.Sophier
- func NormWord(word string) string
- func ParsePersonId(id string) (site, username string)
- func Plusone(httpClient doc.HttpClient, url string) (int, error)
- func ProjectOfPackage(pkg string) string
- func ReadJsonFile(fn villa.Path, data interface{}) error
- func ReadPackages(segm Segment) (pkgs []string, err error)
- func ReadmeToText(fn, data string) string
- func SegmentLess(a, b Segment) bool
- func SplitSentences(text string) []string
- func TrimPackageName(pkg string) string
- func WaitForWatcherEvents(watcher *fsnotify.Watcher)
- func WriteJsonFile(fn villa.Path, data interface{}) error
- type BlackRequest
- type CrawlerDB
- func (cdb *CrawlerDB) AppendPackage(pkg string, inDocs func(pkg string) bool)
- func (cdb *CrawlerDB) AppendPerson(site, username string) bool
- func (cdb *CrawlerDB) SchedulePackage(pkg string, sTime time.Time, etag string) error
- func (cdb *CrawlerDB) SchedulePerson(id string, sTime time.Time) error
- func (cdb *CrawlerDB) Sync() error
- type CrawlingEntry
- type DocDB
- type DocInfo
- type HitInfo
- type MemDB
- func (mdb *MemDB) Count() int
- func (mdb *MemDB) Delete(key string)
- func (mdb *MemDB) Export(root villa.Path, kind string) error
- func (mdb *MemDB) Get(key string, data interface{}) bool
- func (mdb *MemDB) Iterate(output func(key string, val interface{}) error) error
- func (mdb *MemDB) LastModified() time.Time
- func (mdb *MemDB) Load() error
- func (mdb *MemDB) Modified() bool
- func (mdb *MemDB) Put(key string, data interface{})
- func (mdb *MemDB) Sync() error
- type NewDocAction
- type Package
- type PackedDocDB
- type Person
- type Segment
- type Segments
- type Size
- type TokenIndexer
- func (ti *TokenIndexer) Export(root villa.Path, kind string) error
- func (ti *TokenIndexer) IdsOfToken(token string) []string
- func (ti *TokenIndexer) LastModified() time.Time
- func (ti *TokenIndexer) Load() error
- func (ti *TokenIndexer) Modified() bool
- func (ti *TokenIndexer) Put(id string, tokens stringsp.Set)
- func (ti *TokenIndexer) Sync() error
- func (ti *TokenIndexer) TokensOfId(id string) []string
Constants ¶
const ( KindIndex = "index" IndexFn = KindIndex + ".gob" KindDocDB = "docdb" FnCrawlerDB = "crawler" KindPackage = "package" KindPerson = "person" KindToCheck = "tocheck" FnToCrawl = "tocrawl" FnPackage = "package" FnPerson = "person" // key: RawString, value: DocInfo FnDocs = "docs" FnNewDocs = "newdocs" )
const ( // whole document updated NDA_UPDATE = iota // only stars updated NDA_STARS // deleted NDA_DEL // Original document NDA_ORIGINAL )
const ( IndexTextField = "text" IndexNameField = "name" IndexPkgField = "pkg" )
const (
DOCS_PARTS = 128
)
Variables ¶
var ( ServerAddr = ":8080" ServerRoot = villa.Path("./server/") LoadTemplatePass = "" AutoLoadTemplate = false DataRoot = villa.Path("./data/") CrawlerDBPath = DataRoot.Join(FnCrawlerDB) DocsDBPath = DataRoot.Join(FnDocs) // producer: server, consumer: crawler ImportPath villa.Path ImportSegments Segments // producer: crawler, consumer: indexer DBOutPath villa.Path DBOutSegments Segments // producer: indexer, consumer: server. // server never delete index segments, indexer clear updated segments. IndexPath villa.Path IndexSegments Segments // configures of crawler CrawlByGodocApi = true CrawlGithubUpdate = true CrawlerDuePerRun = 1 * time.Hour CrawlerGithubClientID = "" CrawlerGithubClientSecret = "" /* Increase this to ignore etag of last versions to crawl and parse all packages. ChangeLog: 0 First version 1 Add TestImports/XTestImports to Imports 2 Parse markdown readme to text before selecting synopsis from it 3 Add exported tokens to indexes 4 Move TestImports/XTestImports out of Imports, to TestImports 4 A bug of checking CrawlerVersion is fixed */ CrawlerVersion = 5 NonCrawlHosts = stringsp.Set{} NonStorePackageRegexps = []string{} )
var ( ErrPackageNotModifed = errors.New("package not modified") ErrInvalidPackage = errors.New("invalid package") )
Functions ¶
func AppendPackages ¶
AppendPackages appends a list packages to imports folder for crawler backend to read
func AppendTokens ¶
func AppendTokens(tokens stringsp.Set, text []byte) stringsp.Set
func AuthorOfPackage ¶
func CalcMatchScore ¶
func CalcPackagePartition ¶
func CalcStaticScore ¶
func CalcTestStaticScore ¶
func CheckCamel ¶
func CheckCamel(last, current rune) index.RuneType
func CheckRuneType ¶
func CheckRuneType(last, current rune) index.RuneType
func ClearWatcherEvents ¶
func DumpMemStats ¶
func DumpMemStats()
func FetchAllPackagesInGodoc ¶
func FetchAllPackagesInGodoc(httpClient doc.HttpClient) ([]string, error)
FetchAllPackagesInGodoc fetches the list of all packages on godoc.org
func FullProjectOfPackage ¶
func GenHttpClient ¶
func GenHttpClient(proxy string) doc.HttpClient
func HostOfPackage ¶
func IdOfPerson ¶
func IsBadPackage ¶
func LikeButton ¶
func LikeButton(httpClient doc.HttpClient, Url string) (int, error)
func NewDocInfo ¶
Returns a new instance of DocInfo as a sophie.Sophier
func NewNewDocAction ¶
Returns a new instance of *NewDocAction as a Sophier
func ParsePersonId ¶
func ReadJsonFile ¶
func ReadJsonFile(fn villa.Path, data interface{}) error
func ReadPackages ¶
func ReadmeToText ¶
func SegmentLess ¶
func SplitSentences ¶
func TrimPackageName ¶
func WaitForWatcherEvents ¶
func WriteJsonFile ¶
func WriteJsonFile(fn villa.Path, data interface{}) error
Types ¶
type BlackRequest ¶
type CrawlerDB ¶
* CrawlerDB including all crawler entires database.
func LoadCrawlerDB ¶
func LoadCrawlerDB() *CrawlerDB
LoadCrawlerDB loads PackageDB and PersonDB and returns a new *CrawlerDB
func (*CrawlerDB) AppendPackage ¶
AppendPackage appends a package. If the package did not exist in either PackageDB or Docs, shedulet it (immediately).
func (*CrawlerDB) AppendPerson ¶
AppendPerson appends a person to the PersonDB, schedules to crawl immediately for a new person
func (*CrawlerDB) SchedulePackage ¶
SchedulePackage schedules a package to be crawled at a specific time.
func (*CrawlerDB) SchedulePerson ¶
SchedulePerson schedules a person to be crawled at a specific time.
type CrawlingEntry ¶
type DocInfo ¶
type DocInfo struct { Name string Package string Author string LastUpdated time.Time StarCount int Synopsis string Description string ProjectURL string ReadmeFn string ReadmeData string Imports []string TestImports []string Exported []string // exported tokens(funcs/types) }
DocInfo is the information stored in backend docDB
type HitInfo ¶
type HitInfo struct { DocInfo Imported []string TestImported []string ImportantSentences []string AssignedStarCount float64 StaticScore float64 TestStaticScore float64 StaticRank int // zero-based }
HitInfo is the information provided to frontend
type MemDB ¶
func (*MemDB) Export ¶
Export saves the data to some space, but not affecting the modified property.
func (*MemDB) Get ¶
Get fetches an entry of specified key. data is a pointer. Return false if not exists
func (*MemDB) LastModified ¶
type NewDocAction ¶
* If Action equals NDA_DEL, DocInfo is undefined.
type Package ¶
type Package struct { Package string Name string Synopsis string Doc string ProjectURL string StarCount int ReadmeFn string ReadmeData string Imports []string TestImports []string Exported []string // exported tokens(funcs/types) References []string Etag string }
Package stores information from crawler
func CrawlPackage ¶
type PackedDocDB ¶
type PackedDocDB struct {
*MemDB
}
func (PackedDocDB) Get ¶
func (db PackedDocDB) Get(key string, data interface{}) bool
func (PackedDocDB) Iterate ¶
func (db PackedDocDB) Iterate( output func(key string, val interface{}) error) error
func (PackedDocDB) Put ¶
func (db PackedDocDB) Put(key string, data interface{})
type Person ¶
func CrawlPerson ¶
func CrawlPerson(httpClient doc.HttpClient, id string) (*Person, error)
type Segments ¶
type Segments interface { Watch(watcher *fsnotify.Watcher) error ListAll() ([]Segment, error) // all done ListDones() ([]Segment, error) // max done FindMaxDone() (Segment, error) // generates an arbitrary new segment GenNewSegment() (Segment, error) // generates a segment greated than all existence GenMaxSegment() (Segment, error) // clear ClearUndones() error }
type TokenIndexer ¶
type TokenIndexer struct { index.TokenIndexer sync.RWMutex // contains filtered or unexported fields }
TokenIndexer is thread-safe.
func NewTokenIndexer ¶
func NewTokenIndexer(root villa.Path, kind string) *TokenIndexer
func (*TokenIndexer) Export ¶
func (ti *TokenIndexer) Export(root villa.Path, kind string) error
func (*TokenIndexer) IdsOfToken ¶
func (ti *TokenIndexer) IdsOfToken(token string) []string
func (*TokenIndexer) LastModified ¶
func (ti *TokenIndexer) LastModified() time.Time
func (*TokenIndexer) Load ¶
func (ti *TokenIndexer) Load() error
func (*TokenIndexer) Modified ¶
func (ti *TokenIndexer) Modified() bool
func (*TokenIndexer) Put ¶
func (ti *TokenIndexer) Put(id string, tokens stringsp.Set)
func (*TokenIndexer) Sync ¶
func (ti *TokenIndexer) Sync() error
func (*TokenIndexer) TokensOfId ¶
func (ti *TokenIndexer) TokensOfId(id string) []string