Documentation
¶
Index ¶
- Constants
- func Compress(s string) []byte
- func CountUrls(s string) int
- func Decompress(b []byte) (string, error)
- func GetIthUrl(strs string, num uint64) string
- type Corpus
- func (c *Corpus) ClusterToIndex(i uint) uint
- func (c *Corpus) Clusters() []uint
- func (c *Corpus) GetCompressUrl() bool
- func (c *Corpus) GetEmbedding(index uint64) []int8
- func (c *Corpus) GetEmbeddingSlots() uint64
- func (c *Corpus) GetNumDocs() uint64
- func (c *Corpus) GetParams() Params
- func (c *Corpus) GetSlotBits() uint64
- func (c *Corpus) GetSubcluster(index uint) []byte
- func (c *Corpus) GetUrlBytes() uint64
- func (c *Corpus) GetUrlsInCluster(i uint64) string
- func (c *Corpus) IndexOfSubclusterWithinCluster(cluster, sc uint) int
- func (c *Corpus) NumClusters() int
- func (c *Corpus) NumDocsInCluster(i uint) uint64
- func (c *Corpus) NumSubclusters() int
- func (c *Corpus) NumSubclustersInCluster(i uint) int
- func (c *Corpus) SizeOfSubcluster(i uint) int
- func (c *Corpus) SizeOfSubclusterByIndex(cluster uint, index int) int
- func (c *Corpus) SubclusterToClusterMap() map[uint]uint
- type Params
- type Subcluster
Constants ¶
View Source
const ( SUBCLUSTER_DELIM = "-------------------------" URL_DELIM = " " MAX_URL_LEN = 500 DISALLOW_EMPTY_CLUSTERS = false )
Variables ¶
This section is empty.
Functions ¶
func Decompress ¶
Types ¶
type Corpus ¶
type Corpus struct {
// contains filtered or unexported fields
}
func ReadEmbeddingsCsv ¶
func ReadEmbeddingsTxt ¶
func ReadUrlsCsv ¶
func (*Corpus) ClusterToIndex ¶
func (*Corpus) GetCompressUrl ¶
func (*Corpus) GetEmbedding ¶
func (*Corpus) GetEmbeddingSlots ¶
func (*Corpus) GetNumDocs ¶
func (*Corpus) GetSlotBits ¶
func (*Corpus) GetSubcluster ¶
func (*Corpus) GetUrlBytes ¶
func (*Corpus) GetUrlsInCluster ¶
func (*Corpus) IndexOfSubclusterWithinCluster ¶
func (*Corpus) NumClusters ¶
func (*Corpus) NumDocsInCluster ¶
func (*Corpus) NumSubclusters ¶
func (*Corpus) NumSubclustersInCluster ¶
func (*Corpus) SizeOfSubcluster ¶
Returns size in bytes
func (*Corpus) SizeOfSubclusterByIndex ¶
func (*Corpus) SubclusterToClusterMap ¶
type Params ¶
type Params struct { NumDocs uint64 // number of docs in corpus EmbeddingSlots uint64 // number of slots per embedding SlotBits uint64 // precision of each slot (in bits) UrlBytes uint64 // max bytes/url -- after optional compression CompressUrl bool // whether the urls are compressed with gzip }
func (*Params) Consistent ¶
type Subcluster ¶
type Subcluster struct {
// contains filtered or unexported fields
}
TODO: Change to uint
func NewSubcluster ¶
func NewSubcluster(i, s uint64) *Subcluster
func (*Subcluster) GobDecode ¶
func (c *Subcluster) GobDecode(buf []byte) error
func (*Subcluster) GobEncode ¶
func (c *Subcluster) GobEncode() ([]byte, error)
func (Subcluster) Index ¶
func (sc Subcluster) Index() uint64
func (*Subcluster) SetIndex ¶
func (sc *Subcluster) SetIndex(i uint64)
func (*Subcluster) SetSize ¶
func (sc *Subcluster) SetSize(s uint64)
func (Subcluster) Size ¶
func (sc Subcluster) Size() uint64
Click to show internal directories.
Click to hide internal directories.