Documentation
¶
Index ¶
- Constants
- Variables
- func TrainBayes(nClassifications, nWords int, wordBags <-chan *WordBagClassification) <-chan *Bayes
- type Bayes
- type Classification
- func (Classification) Descriptor() protoreflect.EnumDescriptor
- func (x Classification) Enum() *Classification
- func (Classification) EnumDescriptor() ([]byte, []int)deprecated
- func (x Classification) Number() protoreflect.EnumNumber
- func (x Classification) String() string
- func (Classification) Type() protoreflect.EnumType
- type ClassificationP
- type ClassifiedArticles
- func (*ClassifiedArticles) Descriptor() ([]byte, []int)deprecated
- func (x *ClassifiedArticles) GetArticles() map[uint32]Classification
- func (*ClassifiedArticles) ProtoMessage()
- func (x *ClassifiedArticles) ProtoReflect() protoreflect.Message
- func (x *ClassifiedArticles) Reset()
- func (x *ClassifiedArticles) String() string
- func (x *ClassifiedArticles) ToIDs() []uint
- type ClassifiedIDs
- type ClassifiedTitles
- func (*ClassifiedTitles) Descriptor() ([]byte, []int)deprecated
- func (x *ClassifiedTitles) GetPages() map[string]Classification
- func (*ClassifiedTitles) ProtoMessage()
- func (x *ClassifiedTitles) ProtoReflect() protoreflect.Message
- func (x *ClassifiedTitles) Reset()
- func (x *ClassifiedTitles) String() string
- func (x *ClassifiedTitles) ToClassifiedIDs(idOf map[string]uint32) *ClassifiedIDs
- type Classifier
- type Counts
- type Distribution
- type LogDistribution
- type Model
- type PClassification
- type PClassifications
- type PageClassifications
- func (*PageClassifications) Descriptor() ([]byte, []int)deprecated
- func (x *PageClassifications) GetClassifications() []Classification
- func (*PageClassifications) ProtoMessage()
- func (x *PageClassifications) ProtoReflect() protoreflect.Message
- func (x *PageClassifications) Reset()
- func (x *PageClassifications) String() string
- type PageClassificationsMap
- func (x *PageClassificationsMap) AddPage(known map[uint32]Classification, pageTitles map[uint32]string, pageID uint32, ...)
- func (*PageClassificationsMap) Descriptor() ([]byte, []int)deprecated
- func (x *PageClassificationsMap) GetPages() map[uint32]*PageClassifications
- func (*PageClassificationsMap) ProtoMessage()
- func (x *PageClassificationsMap) ProtoReflect() protoreflect.Message
- func (x *PageClassificationsMap) Reset()
- func (x *PageClassificationsMap) String() string
- type WordBagClassification
Constants ¶
const (
BaseCount = 5
)
const Ignored = 55
Ignored is the top n words to ignore in training and analysis.
Variables ¶
var ( Classification_name = map[int32]string{ 0: "UNKNOWN", 1: "PHILOSOPHY", 2: "PSYCHOLOGY", 3: "RELIGION", 4: "HISTORY", 5: "GEOGRAPHY", 6: "SOCIAL_SCIENCES", 7: "POLITICAL_SCIENCE", 8: "LAW", 9: "EDUCATION", 10: "MUSIC", 11: "FINE_ARTS", 12: "PHILOLOGY_AND_LINGUISTICS", 13: "LITERATURE", 14: "SCIENCE", 15: "MEDICINE", 16: "AGRICULTURE", 17: "TECHNOLOGY", 18: "MILITARY_SCIENCE", 19: "INFORMATION_SCIENCE", } Classification_value = map[string]int32{ "UNKNOWN": 0, "PHILOSOPHY": 1, "PSYCHOLOGY": 2, "RELIGION": 3, "HISTORY": 4, "GEOGRAPHY": 5, "SOCIAL_SCIENCES": 6, "POLITICAL_SCIENCE": 7, "LAW": 8, "EDUCATION": 9, "MUSIC": 10, "FINE_ARTS": 11, "PHILOLOGY_AND_LINGUISTICS": 12, "LITERATURE": 13, "SCIENCE": 14, "MEDICINE": 15, "AGRICULTURE": 16, "TECHNOLOGY": 17, "MILITARY_SCIENCE": 18, "INFORMATION_SCIENCE": 19, } )
Enum value maps for Classification.
var File_pkg_classify_classify_proto protoreflect.FileDescriptor
Functions ¶
func TrainBayes ¶
func TrainBayes(nClassifications, nWords int, wordBags <-chan *WordBagClassification) <-chan *Bayes
Types ¶
type Bayes ¶
func (*Bayes) Classify ¶
func (c *Bayes) Classify(page *ordinality.PageWordBag) []ClassificationP
type Classification ¶
type Classification int32
const ( Classification_UNKNOWN Classification = 0 Classification_PHILOSOPHY Classification = 1 Classification_PSYCHOLOGY Classification = 2 Classification_RELIGION Classification = 3 Classification_HISTORY Classification = 4 Classification_GEOGRAPHY Classification = 5 Classification_SOCIAL_SCIENCES Classification = 6 Classification_POLITICAL_SCIENCE Classification = 7 Classification_LAW Classification = 8 Classification_EDUCATION Classification = 9 Classification_MUSIC Classification = 10 Classification_FINE_ARTS Classification = 11 Classification_PHILOLOGY_AND_LINGUISTICS Classification = 12 Classification_LITERATURE Classification = 13 Classification_SCIENCE Classification = 14 Classification_MEDICINE Classification = 15 Classification_AGRICULTURE Classification = 16 Classification_TECHNOLOGY Classification = 17 Classification_MILITARY_SCIENCE Classification = 18 Classification_INFORMATION_SCIENCE Classification = 19 )
func (Classification) Descriptor ¶
func (Classification) Descriptor() protoreflect.EnumDescriptor
func (Classification) Enum ¶
func (x Classification) Enum() *Classification
func (Classification) EnumDescriptor
deprecated
func (Classification) EnumDescriptor() ([]byte, []int)
Deprecated: Use Classification.Descriptor instead.
func (Classification) Number ¶
func (x Classification) Number() protoreflect.EnumNumber
func (Classification) String ¶
func (x Classification) String() string
func (Classification) Type ¶
func (Classification) Type() protoreflect.EnumType
type ClassificationP ¶
type ClassificationP struct { Classification P float64 }
func Normalize ¶
func Normalize(results []ClassificationP) []ClassificationP
type ClassifiedArticles ¶
type ClassifiedArticles struct { Articles map[uint32]Classification `` /* 179-byte string literal not displayed */ // contains filtered or unexported fields }
ClassifiedArticles is a map from article IDs to their Classification.
func Base ¶
func Base() *ClassifiedArticles
func (*ClassifiedArticles) Descriptor
deprecated
func (*ClassifiedArticles) Descriptor() ([]byte, []int)
Deprecated: Use ClassifiedArticles.ProtoReflect.Descriptor instead.
func (*ClassifiedArticles) GetArticles ¶
func (x *ClassifiedArticles) GetArticles() map[uint32]Classification
func (*ClassifiedArticles) ProtoMessage ¶
func (*ClassifiedArticles) ProtoMessage()
func (*ClassifiedArticles) ProtoReflect ¶
func (x *ClassifiedArticles) ProtoReflect() protoreflect.Message
func (*ClassifiedArticles) Reset ¶
func (x *ClassifiedArticles) Reset()
func (*ClassifiedArticles) String ¶
func (x *ClassifiedArticles) String() string
func (*ClassifiedArticles) ToIDs ¶
func (x *ClassifiedArticles) ToIDs() []uint
type ClassifiedIDs ¶
type ClassifiedIDs struct { Pages map[uint32]Classification `` /* 173-byte string literal not displayed */ // contains filtered or unexported fields }
func (*ClassifiedIDs) Descriptor
deprecated
func (*ClassifiedIDs) Descriptor() ([]byte, []int)
Deprecated: Use ClassifiedIDs.ProtoReflect.Descriptor instead.
func (*ClassifiedIDs) GetPages ¶
func (x *ClassifiedIDs) GetPages() map[uint32]Classification
func (*ClassifiedIDs) ProtoMessage ¶
func (*ClassifiedIDs) ProtoMessage()
func (*ClassifiedIDs) ProtoReflect ¶
func (x *ClassifiedIDs) ProtoReflect() protoreflect.Message
func (*ClassifiedIDs) Reset ¶
func (x *ClassifiedIDs) Reset()
func (*ClassifiedIDs) String ¶
func (x *ClassifiedIDs) String() string
type ClassifiedTitles ¶
type ClassifiedTitles struct { Pages map[string]Classification `` /* 172-byte string literal not displayed */ // contains filtered or unexported fields }
func (*ClassifiedTitles) Descriptor
deprecated
func (*ClassifiedTitles) Descriptor() ([]byte, []int)
Deprecated: Use ClassifiedTitles.ProtoReflect.Descriptor instead.
func (*ClassifiedTitles) GetPages ¶
func (x *ClassifiedTitles) GetPages() map[string]Classification
func (*ClassifiedTitles) ProtoMessage ¶
func (*ClassifiedTitles) ProtoMessage()
func (*ClassifiedTitles) ProtoReflect ¶
func (x *ClassifiedTitles) ProtoReflect() protoreflect.Message
func (*ClassifiedTitles) Reset ¶
func (x *ClassifiedTitles) Reset()
func (*ClassifiedTitles) String ¶
func (x *ClassifiedTitles) String() string
func (*ClassifiedTitles) ToClassifiedIDs ¶
func (x *ClassifiedTitles) ToClassifiedIDs(idOf map[string]uint32) *ClassifiedIDs
type Classifier ¶
type Classifier interface { // Classify returns the sorted, normalized log probabilities of // each possible classification. Classify(page *ordinality.PageWordBag) []ClassificationP }
type Distribution ¶
type Distribution []float64
Distribution is a (possibly non-normalized) collection of non-negative probabilities.
func (Distribution) Normalize ¶
func (d Distribution) Normalize()
func (Distribution) ToLogDistribution ¶
func (d Distribution) ToLogDistribution() LogDistribution
type LogDistribution ¶
type LogDistribution []float64
LogDistribution is a (possibly non-normalized) collection of log probabilities.
func (LogDistribution) Add ¶
func (d LogDistribution) Add(other LogDistribution)
Add adds the elements of other to d. The result may not be normalized.
func (LogDistribution) ToDistribution ¶
func (d LogDistribution) ToDistribution() Distribution
ToDistribution returns an equivalent normalized Distribution.
type Model ¶
type Model [][]float64
Model represents log probabilities of a random word in an article of a given classification being that particular word.
The first array is by word index. The second is by classification ID.
type PClassification ¶
type PClassification struct { P float64 Classification }
PClassification is a Classification and its probability.
type PClassifications ¶
type PClassifications []PClassification
func ToPClassifications ¶
func ToPClassifications(d Distribution) PClassifications
ToPClassifications converts a distribution to the respective Classifications and probabilities it represents.
func (PClassifications) Sort ¶
func (r PClassifications) Sort()
Sort sorts r from the highest probability to the lowest probability.
type PageClassifications ¶
type PageClassifications struct { Classifications []Classification `protobuf:"varint,1,rep,packed,name=classifications,proto3,enum=Classification" json:"classifications,omitempty"` // contains filtered or unexported fields }
func (*PageClassifications) Descriptor
deprecated
func (*PageClassifications) Descriptor() ([]byte, []int)
Deprecated: Use PageClassifications.ProtoReflect.Descriptor instead.
func (*PageClassifications) GetClassifications ¶
func (x *PageClassifications) GetClassifications() []Classification
func (*PageClassifications) ProtoMessage ¶
func (*PageClassifications) ProtoMessage()
func (*PageClassifications) ProtoReflect ¶
func (x *PageClassifications) ProtoReflect() protoreflect.Message
func (*PageClassifications) Reset ¶
func (x *PageClassifications) Reset()
func (*PageClassifications) String ¶
func (x *PageClassifications) String() string
type PageClassificationsMap ¶
type PageClassificationsMap struct { Pages map[uint32]*PageClassifications `` /* 152-byte string literal not displayed */ // contains filtered or unexported fields }
func (*PageClassificationsMap) AddPage ¶
func (x *PageClassificationsMap) AddPage(known map[uint32]Classification, pageTitles map[uint32]string, pageID uint32, categories []uint32, pageCategories *documents.PageCategories)
func (*PageClassificationsMap) Descriptor
deprecated
func (*PageClassificationsMap) Descriptor() ([]byte, []int)
Deprecated: Use PageClassificationsMap.ProtoReflect.Descriptor instead.
func (*PageClassificationsMap) GetPages ¶
func (x *PageClassificationsMap) GetPages() map[uint32]*PageClassifications
func (*PageClassificationsMap) ProtoMessage ¶
func (*PageClassificationsMap) ProtoMessage()
func (*PageClassificationsMap) ProtoReflect ¶
func (x *PageClassificationsMap) ProtoReflect() protoreflect.Message
func (*PageClassificationsMap) Reset ¶
func (x *PageClassificationsMap) Reset()
func (*PageClassificationsMap) String ¶
func (x *PageClassificationsMap) String() string
type WordBagClassification ¶
type WordBagClassification struct { Classification *ordinality.PageWordBag }