Documentation ¶
Overview ¶
Package index provides a generic indexing system on top of the abstract Storage interface.
The following keys & values are populated by receiving blobs and queried for search operations:
Recent Permanodes "recpn|<pgp-keyid>|<reverse-modtime>|<claim-blobref>" -> "<permanode-blobref>" where reverse-modtime flips each digit to '9'-<digit> and prepends "rt" (for reverse time) "2011-11-27T01:23:45Z" = "rt7988-88-72T98:76:54Z"
signer blobref of ascii public key -> gpg key id "signerkeyid:sha1-ad87ca5c78bd0ce1195c46f7c98e6025abbaf007" = "2931A67C26F5ABDA"
PermanodeOfSignerAttrValue: "signerattrvalue|<keyid>|<URLEscape(attr)>|<URLEscape(value)>|<reverse-claimtime>|<claim-blobref>" -> "<permanode>" e.g. "signerattrvalue|2931A67C26F5ABDA|camliRoot|rootval|"+ "rt7988-88-71T98:67:60.999876543Z|sha1-bf115940641f1aae2e007edcf36b3b18c17256d9" = "sha1-7a14cce982aa73ab519e63050f82e2a2adfcf039"
Other: "meta:<blobref>" -> "<size>|<mimetype>" "have:<blobref>" -> "<size>" (used for enumeration, which doesn't need mime type)
For GetOwnerClaims(permanode, signer): "claim|<permanode-blobref>|<keyid>|<date>|<claim-blobref>" -> "<URL:type>|<URL:attr>|<URL:value>"
Index ¶
- func ClaimsAttrValue(claims []camtypes.Claim, attr string, at time.Time, signerFilter blob.Ref) string
- func IsBlobReferenceAttribute(attr string) bool
- func IsFulltextAttribute(attr string) bool
- func IsIndexedAttribute(attr string) bool
- func ReindexMaxProcs() int
- func SetReindexMaxProcs(n int)
- func SetVerboseCorpusLogging(v bool)
- type BlobSniffer
- func (sn *BlobSniffer) Body() ([]byte, error)
- func (sn *BlobSniffer) CamliType() string
- func (sn *BlobSniffer) IsTruncated() bool
- func (sn *BlobSniffer) MIMEType() string
- func (sn *BlobSniffer) Parse()
- func (sn *BlobSniffer) SchemaBlob() (meta *schema.Blob, ok bool)
- func (sn *BlobSniffer) Size() int64
- func (sn *BlobSniffer) Write(d []byte) (int, error)
- type Corpus
- func (c *Corpus) AppendClaims(ctx context.Context, dst []camtypes.Claim, permaNode blob.Ref, ...) ([]camtypes.Claim, error)
- func (c *Corpus) AppendPermanodeAttrValues(dst []string, permaNode blob.Ref, attr string, at time.Time, ...) []string
- func (c *Corpus) EnumerateBlobMeta(fn func(camtypes.BlobMeta) bool)
- func (c *Corpus) EnumerateCamliBlobs(camType string, fn func(camtypes.BlobMeta) bool)
- func (c *Corpus) EnumeratePermanodesCreated(fn func(camtypes.BlobMeta) bool, newestFirst bool)
- func (c *Corpus) EnumeratePermanodesLastModified(fn func(camtypes.BlobMeta) bool)
- func (c *Corpus) FileLatLong(fileRef blob.Ref) (lat, long float64, ok bool)
- func (c *Corpus) ForeachClaim(permaNode blob.Ref, at time.Time, fn func(*camtypes.Claim) bool)
- func (c *Corpus) ForeachClaimBack(value blob.Ref, at time.Time, fn func(*camtypes.Claim) bool)
- func (c *Corpus) GetBlobMeta(ctx context.Context, br blob.Ref) (camtypes.BlobMeta, error)
- func (c *Corpus) GetFileInfo(ctx context.Context, fileRef blob.Ref) (fi camtypes.FileInfo, err error)
- func (c *Corpus) GetImageInfo(ctx context.Context, fileRef blob.Ref) (ii camtypes.ImageInfo, err error)
- func (c *Corpus) GetMediaTags(ctx context.Context, fileRef blob.Ref) (map[string]string, error)
- func (c *Corpus) GetWholeRef(ctx context.Context, fileRef blob.Ref) (wholeRef blob.Ref, ok bool)
- func (c *Corpus) IsDeleted(br blob.Ref) bool
- func (c *Corpus) KeyId(ctx context.Context, signer blob.Ref) (string, error)
- func (c *Corpus) PermanodeAnyTime(pn blob.Ref) (t time.Time, ok bool)
- func (c *Corpus) PermanodeAttrValue(permaNode blob.Ref, attr string, at time.Time, signerFilter blob.Ref) string
- func (c *Corpus) PermanodeHasAttrValue(pn blob.Ref, at time.Time, attr, val string) bool
- func (c *Corpus) PermanodeModtime(pn blob.Ref) (t time.Time, ok bool)
- func (c *Corpus) PermanodeTime(pn blob.Ref) (t time.Time, ok bool)
- type Index
- func (x *Index) AppendClaims(ctx context.Context, dst []camtypes.Claim, permaNode blob.Ref, ...) ([]camtypes.Claim, error)
- func (x *Index) Close() error
- func (ix *Index) DisableOutOfOrderIndexing()
- func (x *Index) EdgesTo(ref blob.Ref, opts *camtypes.EdgesToOpts) (edges []*camtypes.Edge, err error)
- func (x *Index) EnumerateBlobMeta(ctx context.Context, fn func(camtypes.BlobMeta) bool) error
- func (ix *Index) EnumerateBlobs(ctx context.Context, dest chan<- blob.SizedRef, after string, limit int) (err error)
- func (x *Index) ExistingFileSchemas(wholeRef blob.Ref) (schemaRefs []blob.Ref, err error)
- func (x *Index) GetBlobMeta(ctx context.Context, br blob.Ref) (camtypes.BlobMeta, error)
- func (x *Index) GetDirMembers(dir blob.Ref, dest chan<- blob.Ref, limit int) (err error)
- func (x *Index) GetFileInfo(ctx context.Context, fileRef blob.Ref) (camtypes.FileInfo, error)
- func (x *Index) GetFileLocation(ctx context.Context, fileRef blob.Ref) (camtypes.Location, error)
- func (x *Index) GetImageInfo(ctx context.Context, fileRef blob.Ref) (camtypes.ImageInfo, error)
- func (x *Index) GetMediaTags(ctx context.Context, fileRef blob.Ref) (tags map[string]string, err error)
- func (x *Index) GetRecentPermanodes(ctx context.Context, dest chan<- camtypes.RecentPermanode, owner blob.Ref, ...) (err error)
- func (x *Index) InitBlobSource(blobSource blobserver.FetcherEnumerator)
- func (x *Index) IsDeleted(br blob.Ref) bool
- func (x *Index) KeepInMemory() (*Corpus, error)
- func (x *Index) KeyId(ctx context.Context, signer blob.Ref) (string, error)
- func (x *Index) Lock()
- func (x *Index) PathLookup(ctx context.Context, signer, base blob.Ref, suffix string, at time.Time) (*camtypes.Path, error)
- func (x *Index) PathsLookup(ctx context.Context, signer, base blob.Ref, suffix string) (paths []*camtypes.Path, err error)
- func (x *Index) PathsOfSignerTarget(ctx context.Context, signer, target blob.Ref) (paths []*camtypes.Path, err error)
- func (x *Index) PermanodeOfSignerAttrValue(ctx context.Context, signer blob.Ref, attr, val string) (permaNode blob.Ref, err error)
- func (x *Index) PreventStorageAccessForTesting()
- func (x *Index) RLock()
- func (x *Index) RUnlock()
- func (ix *Index) ReceiveBlob(blobRef blob.Ref, source io.Reader) (blob.SizedRef, error)
- func (x *Index) Reindex() error
- func (x *Index) SearchPermanodesWithAttr(ctx context.Context, dest chan<- blob.Ref, ...) (err error)
- func (ix *Index) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error
- func (x *Index) Storage() sorted.KeyValue
- func (x *Index) String() string
- func (x *Index) Unlock()
- type Interface
- type LocationHelper
- type PermanodeMeta
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ClaimsAttrValue ¶
func ClaimsAttrValue(claims []camtypes.Claim, attr string, at time.Time, signerFilter blob.Ref) string
ClaimsAttrValue returns the value of attr from claims, or the empty string if not found. Claims should be sorted by claim.Date.
func IsBlobReferenceAttribute ¶
IsBlobReferenceAttribute returns whether attr is an attribute whose value is a blob reference (e.g. camliMember) and thus something the indexers should keep inverted indexes on for parent/child-type relationships.
func IsIndexedAttribute ¶
TODO(bradfitz): rename this? This is really about signer-attr-value (PermanodeOfSignerAttrValue), and not about indexed attributes in general.
func ReindexMaxProcs ¶
func ReindexMaxProcs() int
ReindexMaxProcs returns the maximum number of concurrent goroutines that are used during reindexing.
Types ¶
type BlobSniffer ¶
type BlobSniffer struct {
// contains filtered or unexported fields
}
func NewBlobSniffer ¶
func NewBlobSniffer(ref blob.Ref) *BlobSniffer
func (*BlobSniffer) Body ¶
func (sn *BlobSniffer) Body() ([]byte, error)
Body returns the bytes written to the BlobSniffer.
func (*BlobSniffer) CamliType ¶
func (sn *BlobSniffer) CamliType() string
func (*BlobSniffer) IsTruncated ¶
func (sn *BlobSniffer) IsTruncated() bool
IsTruncated reports whether the BlobSniffer had more than schema.MaxSchemaBlobSize bytes written to it.
func (*BlobSniffer) MIMEType ¶
func (sn *BlobSniffer) MIMEType() string
MIMEType returns the sniffed blob's content-type or the empty string if unknown. If the blob is a Camlistore schema metadata blob, the MIME type will be of the form "application/json; camliType=foo".
func (*BlobSniffer) Parse ¶
func (sn *BlobSniffer) Parse()
func (*BlobSniffer) SchemaBlob ¶
func (sn *BlobSniffer) SchemaBlob() (meta *schema.Blob, ok bool)
func (*BlobSniffer) Size ¶
func (sn *BlobSniffer) Size() int64
Size returns the number of bytes written to the BlobSniffer. It might be more than schema.MaxSchemaBlobSize. See IsTruncated.
func (*BlobSniffer) Write ¶
func (sn *BlobSniffer) Write(d []byte) (int, error)
type Corpus ¶
type Corpus struct {
// contains filtered or unexported fields
}
Corpus is an in-memory summary of all of a user's blobs' metadata.
A Corpus is not safe for concurrent use. Callers should use Lock or RLock on the parent index instead.
func (*Corpus) AppendClaims ¶
func (*Corpus) AppendPermanodeAttrValues ¶
func (c *Corpus) AppendPermanodeAttrValues(dst []string, permaNode blob.Ref, attr string, at time.Time, signerFilter blob.Ref) []string
AppendPermanodeAttrValues appends to dst all the values for the attribute attr set on permaNode. signerFilter is optional. dst must start with length 0 (laziness, mostly)
func (*Corpus) EnumerateBlobMeta ¶
EnumerateBlobMeta calls fn for all known meta blobs in an undefined order. If fn returns false, iteration ends.
func (*Corpus) EnumerateCamliBlobs ¶
EnumerateCamliBlobs calls fn for all known meta blobs.
If camType is not empty, it specifies a filter for which meta blob types to call fn for. If empty, all are emitted.
If fn returns false, iteration ends.
func (*Corpus) EnumeratePermanodesCreated ¶
EnumeratePermanodesCreated calls fn for all permanodes. They are sorted using the contents creation date if any, the permanode modtime otherwise, and in the order specified by newestFirst. Iteration ends prematurely if fn returns false.
func (*Corpus) EnumeratePermanodesLastModified ¶
EnumeratePermanodesLastModified calls fn for all permanodes, sorted by most recently modified first. Iteration ends prematurely if fn returns false.
func (*Corpus) FileLatLong ¶
func (*Corpus) ForeachClaim ¶
ForeachClaim calls fn for each claim of permaNode. If at is zero, all claims are yielded. If at is non-zero, claims after that point are skipped. If fn returns false, iteration ends. Iteration is in an undefined order.
func (*Corpus) ForeachClaimBack ¶
ForeachClaimBack calls fn for each claim with a value referencing br. If at is zero, all claims are yielded. If at is non-zero, claims after that point are skipped. If fn returns false, iteration ends. Iteration is in an undefined order.
func (*Corpus) GetBlobMeta ¶
func (*Corpus) GetFileInfo ¶
func (*Corpus) GetImageInfo ¶
func (*Corpus) GetMediaTags ¶
func (*Corpus) GetWholeRef ¶
func (*Corpus) IsDeleted ¶
IsDeleted reports whether the provided blobref (of a permanode or claim) should be considered deleted.
func (*Corpus) PermanodeAnyTime ¶
PermanodeAnyTime returns the time that best qualifies the permanode. It tries content-specific times first, the permanode modtime otherwise.
func (*Corpus) PermanodeAttrValue ¶
func (c *Corpus) PermanodeAttrValue(permaNode blob.Ref, attr string, at time.Time, signerFilter blob.Ref) string
PermanodeAttrValue returns a single-valued attribute or "".
func (*Corpus) PermanodeHasAttrValue ¶
PermanodeHasAttrValue reports whether the permanode pn at time at (zero means now) has the given attribute with the given value. If the attribute is multi-valued, any may match.
func (*Corpus) PermanodeModtime ¶
PermanodeModtime returns the latest modification time of the given permanode.
The ok value is true only if the permanode is known and has any non-deleted claims. A deleted claim is ignored and neither its claim date nor the date of the delete claim affect the modtime of the permanode.
type Index ¶
type Index struct { *blobserver.NoImplStorage KeyFetcher blob.Fetcher // for verifying claims // contains filtered or unexported fields }
func New ¶
New returns a new index using the provided key/value storage implementation.
func NewMemoryIndex ¶
func NewMemoryIndex() *Index
NewMemoryIndex returns an Index backed only by memory, for use in tests.
func (*Index) AppendClaims ¶
func (*Index) Close ¶
Close closes the underlying sorted.KeyValue, if the storage has a Close method. The return value is the return value of the underlying Close, or nil otherwise.
func (*Index) DisableOutOfOrderIndexing ¶
func (ix *Index) DisableOutOfOrderIndexing()
DisableOutOfOrderIndexing should only be used for tests. It disables the asynchronous, out of order, indexing to demonstrate that e.g. reindexing fails without it.
func (*Index) EdgesTo ¶
func (*Index) EnumerateBlobMeta ¶
EnumerateBlobMeta calls fn for all known meta blobs. If fn returns false, iteration stops and an nil error is returned. If ctx becomes done, iteration stops and ctx.Err() is returned.
func (*Index) EnumerateBlobs ¶
func (*Index) ExistingFileSchemas ¶
func (*Index) GetBlobMeta ¶
func (*Index) GetDirMembers ¶
GetDirMembers sends on dest the children of the static directory dir.
func (*Index) GetFileInfo ¶
func (*Index) GetFileLocation ¶
func (*Index) GetImageInfo ¶
func (*Index) GetMediaTags ¶
func (*Index) GetRecentPermanodes ¶
func (x *Index) GetRecentPermanodes(ctx context.Context, dest chan<- camtypes.RecentPermanode, owner blob.Ref, limit int, before time.Time) (err error)
GetRecentPermanodes sends results to dest filtered by owner, limit, and before. A zero value for before will default to the current time. The results will have duplicates suppressed, with most recent permanode returned. Note, permanodes more recent than before will still be fetched from the index then skipped. This means runtime scales linearly with the number of nodes more recent than before.
func (*Index) InitBlobSource ¶
func (x *Index) InitBlobSource(blobSource blobserver.FetcherEnumerator)
InitBlobSource sets the index's blob source and starts the background out-of-order indexing loop. It panics if the blobSource is already set. If the index's key fetcher is nil, it is also set to the blobSource argument.
func (*Index) IsDeleted ¶
IsDeleted reports whether the provided blobref (of a permanode or claim) should be considered deleted.
func (*Index) PathLookup ¶
func (*Index) PathsLookup ¶
func (*Index) PathsOfSignerTarget ¶
func (*Index) PermanodeOfSignerAttrValue ¶
func (*Index) PreventStorageAccessForTesting ¶
func (x *Index) PreventStorageAccessForTesting()
PreventStorageAccessForTesting causes any access to the index's underlying Storage interface to panic.
func (*Index) ReceiveBlob ¶
func (*Index) SearchPermanodesWithAttr ¶
func (x *Index) SearchPermanodesWithAttr(ctx context.Context, dest chan<- blob.Ref, request *camtypes.PermanodeByAttrRequest) (err error)
This is just like PermanodeOfSignerAttrValue except we return multiple and dup-suppress. If request.Query is "", it is not used in the prefix search.
func (*Index) StatBlobs ¶
func (*Index) Storage ¶
Storage returns the index's underlying Storage implementation.
type Interface ¶
type Interface interface { sync.Locker RLock() RUnlock() // os.ErrNotExist should be returned if the blob isn't known GetBlobMeta(context.Context, blob.Ref) (camtypes.BlobMeta, error) // Should return os.ErrNotExist if not found. GetFileInfo(ctx context.Context, fileRef blob.Ref) (camtypes.FileInfo, error) // Should return os.ErrNotExist if not found. GetImageInfo(ctx context.Context, fileRef blob.Ref) (camtypes.ImageInfo, error) // Should return os.ErrNotExist if not found. GetMediaTags(ctx context.Context, fileRef blob.Ref) (map[string]string, error) // GetFileLocation returns the location info (currently Exif) of the fileRef. // Should return os.ErrNotExist if fileRef is not found, // is not a file, or it has no location info. GetFileLocation(ctx context.Context, fileRef blob.Ref) (camtypes.Location, error) // KeyId returns the GPG keyid (e.g. "2931A67C26F5ABDA) // given the blobref of its ASCII-armored blobref. // The error is ErrNotFound if not found. KeyId(context.Context, blob.Ref) (string, error) // AppendClaims appends to dst claims on the given permanode. // The signerFilter and attrFilter are both optional. If non-zero, // they filter the return items to only claims made by the given signer // or claims about the given attribute, respectively. // Deleted claims are never returned. // The items may be appended in any order. // // TODO: this should take a context and a callback func // instead of a dst, then it can append to a channel instead, // and the context lets it be interrupted. The callback should // take the context too, so the channel send's select can read // from the Done channel. AppendClaims(ctx context.Context, dst []camtypes.Claim, permaNode blob.Ref, signerFilter blob.Ref, attrFilter string) ([]camtypes.Claim, error) // dest must be closed, even when returning an error. // limit <= 0 means unlimited. GetRecentPermanodes(ctx context.Context, dest chan<- camtypes.RecentPermanode, owner blob.Ref, limit int, before time.Time) error // SearchPermanodes finds permanodes matching the provided // request and sends unique permanode blobrefs to dest. // In particular, if request.FuzzyMatch is true, a fulltext // search is performed (if supported by the attribute(s)) // instead of an exact match search. // If request.Query is blank, the permanodes which have // request.Attribute as an attribute (regardless of its value) // are searched. // Additionally, if request.Attribute is blank, all attributes // are searched (as fulltext), otherwise the search is // restricted to the named attribute. // // dest is always closed, regardless of the error return value. SearchPermanodesWithAttr(ctx context.Context, dest chan<- blob.Ref, request *camtypes.PermanodeByAttrRequest) error // ExistingFileSchemas returns 0 or more blobrefs of "bytes" // (TODO(bradfitz): or file?) schema blobs that represent the // bytes of a file given in bytesRef. The file schema blobs // returned are not guaranteed to reference chunks that still // exist on the blobservers, though. It's purely a hint for // clients to avoid uploads if possible. Before re-using any // returned blobref they should be checked. // // Use case: a user drag & drops a large file onto their // browser to upload. (imagine that "large" means anything // larger than a blobserver's max blob size) JavaScript can // first SHA-1 the large file locally, then send the // wholeFileRef to this call and see if they'd previously // uploaded the same file in the past. If so, the upload // can be avoided if at least one of the returned schemaRefs // can be validated (with a validating HEAD request) to still // all exist on the blob server. ExistingFileSchemas(wholeFileRef blob.Ref) (schemaRefs []blob.Ref, err error) // GetDirMembers sends on dest the children of the static // directory dirRef. It returns os.ErrNotExist if dirRef // is nil. // dest must be closed, even when returning an error. // limit <= 0 means unlimited. GetDirMembers(dirRef blob.Ref, dest chan<- blob.Ref, limit int) error // Given an owner key, a camliType 'claim', 'attribute' name, // and specific 'value', find the most recent permanode that has // a corresponding 'set-attribute' claim attached. // Returns os.ErrNotExist if none is found. // Only attributes white-listed by IsIndexedAttribute are valid. // TODO(bradfitz): ErrNotExist here is a weird error message ("file" not found). change. // TODO(bradfitz): use keyId instead of signer? PermanodeOfSignerAttrValue(ctx context.Context, signer blob.Ref, attr, val string) (blob.Ref, error) // PathsOfSignerTarget queries the index about "camliPath:" // URL-dispatch attributes. // // It returns a list of all the path claims that have been signed // by the provided signer and point at the given target. // // This is used when editing a permanode, to figure work up // the name resolution tree backwards ultimately to a // camliRoot permanode (which should know its base URL), and // then the complete URL(s) of a target can be found. PathsOfSignerTarget(ctx context.Context, signer, target blob.Ref) ([]*camtypes.Path, error) // All Path claims for (signer, base, suffix) PathsLookup(ctx context.Context, signer, base blob.Ref, suffix string) ([]*camtypes.Path, error) // Most recent Path claim for (signer, base, suffix) as of // provided time 'at', or most recent if 'at' is nil. PathLookup(ctx context.Context, signer, base blob.Ref, suffix string, at time.Time) (*camtypes.Path, error) // EdgesTo finds references to the provided ref. // // For instance, if ref is a permanode, it might find the parent permanodes // that have ref as a member. // Or, if ref is a static file, it might find static directories which contain // that file. // This is a way to go "up" or "back" in a hierarchy. // // opts may be nil to accept the defaults. EdgesTo(ref blob.Ref, opts *camtypes.EdgesToOpts) ([]*camtypes.Edge, error) // EnumerateBlobMeta calls fn for each blob known to the // indexer (which may be a subset of all total blobs, since // the indexer is typically configured to not see non-metadata // blobs). The blobs may be sent in any order. If the context // finishes, the return error is ctx.Err(). // If the provided function returns false, iteration ends with a nil // return value. EnumerateBlobMeta(context.Context, func(camtypes.BlobMeta) bool) error }
type LocationHelper ¶
type LocationHelper struct {
// contains filtered or unexported fields
}
LocationHelper queries permanode locations.
A LocationHelper is not safe for concurrent use. Callers should use Lock or RLock on the underlying index instead.
func NewLocationHelper ¶
func NewLocationHelper(ix *Index) *LocationHelper
NewLocationHelper returns a new location handler that uses ix to query blob attributes.
func (*LocationHelper) PermanodeLocation ¶
func (lh *LocationHelper) PermanodeLocation(ctx context.Context, permaNode blob.Ref, at time.Time, signer blob.Ref) (camtypes.Location, error)
PermanodeLocation returns the location info for a permanode, from one of the following sources:
- Permanode attributes "latitude" and "longitude"
- Referenced permanode attributes (eg. for "foursquare.com:checkin" its "foursquareVenuePermanode")
- Location in permanode camliContent file metadata
The sources are checked in this order, the location from the first source yielding a valid result is returned.
func (*LocationHelper) SetCorpus ¶
func (lh *LocationHelper) SetCorpus(corpus *Corpus)
SetCorpus sets the corpus to be used for location lookups.
Source Files ¶
Directories ¶
Path | Synopsis |
---|---|
Package indextest contains the unit tests for the indexer so they can be re-used for each specific implementation of the index Storage interface.
|
Package indextest contains the unit tests for the indexer so they can be re-used for each specific implementation of the index Storage interface. |
Package sqlindex implements the sorted.KeyValue interface using an *sql.DB.
|
Package sqlindex implements the sorted.KeyValue interface using an *sql.DB. |