classify

package
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 5, 2024 License: Apache-2.0 Imports: 10 Imported by: 0

Documentation

Index

Constants

View Source
const (
	BaseCount = 5
)
View Source
const Ignored = 55

Ignored is the top n words to ignore in training and analysis.

Variables

View Source
var (
	Classification_name = map[int32]string{
		0:  "UNKNOWN",
		1:  "PHILOSOPHY",
		2:  "PSYCHOLOGY",
		3:  "RELIGION",
		4:  "HISTORY",
		5:  "GEOGRAPHY",
		6:  "SOCIAL_SCIENCES",
		7:  "POLITICAL_SCIENCE",
		8:  "LAW",
		9:  "EDUCATION",
		10: "MUSIC",
		11: "FINE_ARTS",
		12: "PHILOLOGY_AND_LINGUISTICS",
		13: "LITERATURE",
		14: "SCIENCE",
		15: "MEDICINE",
		16: "AGRICULTURE",
		17: "TECHNOLOGY",
		18: "MILITARY_SCIENCE",
		19: "INFORMATION_SCIENCE",
	}
	Classification_value = map[string]int32{
		"UNKNOWN":                   0,
		"PHILOSOPHY":                1,
		"PSYCHOLOGY":                2,
		"RELIGION":                  3,
		"HISTORY":                   4,
		"GEOGRAPHY":                 5,
		"SOCIAL_SCIENCES":           6,
		"POLITICAL_SCIENCE":         7,
		"LAW":                       8,
		"EDUCATION":                 9,
		"MUSIC":                     10,
		"FINE_ARTS":                 11,
		"PHILOLOGY_AND_LINGUISTICS": 12,
		"LITERATURE":                13,
		"SCIENCE":                   14,
		"MEDICINE":                  15,
		"AGRICULTURE":               16,
		"TECHNOLOGY":                17,
		"MILITARY_SCIENCE":          18,
		"INFORMATION_SCIENCE":       19,
	}
)

Enum value maps for Classification.

View Source
var File_pkg_classify_classify_proto protoreflect.FileDescriptor

Functions

func TrainBayes

func TrainBayes(nClassifications, nWords int, wordBags <-chan *WordBagClassification) <-chan *Bayes

Types

type Bayes

type Bayes struct {
	Classifications int
	Words           int

	Model
}

func (*Bayes) Classify

func (c *Bayes) Classify(page *ordinality.PageWordBag) []ClassificationP

type Classification

type Classification int32
const (
	Classification_UNKNOWN                   Classification = 0
	Classification_PHILOSOPHY                Classification = 1
	Classification_PSYCHOLOGY                Classification = 2
	Classification_RELIGION                  Classification = 3
	Classification_HISTORY                   Classification = 4
	Classification_GEOGRAPHY                 Classification = 5
	Classification_SOCIAL_SCIENCES           Classification = 6
	Classification_POLITICAL_SCIENCE         Classification = 7
	Classification_LAW                       Classification = 8
	Classification_EDUCATION                 Classification = 9
	Classification_MUSIC                     Classification = 10
	Classification_FINE_ARTS                 Classification = 11
	Classification_PHILOLOGY_AND_LINGUISTICS Classification = 12
	Classification_LITERATURE                Classification = 13
	Classification_SCIENCE                   Classification = 14
	Classification_MEDICINE                  Classification = 15
	Classification_AGRICULTURE               Classification = 16
	Classification_TECHNOLOGY                Classification = 17
	Classification_MILITARY_SCIENCE          Classification = 18
	Classification_INFORMATION_SCIENCE       Classification = 19
)

func (Classification) Descriptor

func (Classification) Enum

func (x Classification) Enum() *Classification

func (Classification) EnumDescriptor deprecated

func (Classification) EnumDescriptor() ([]byte, []int)

Deprecated: Use Classification.Descriptor instead.

func (Classification) Number

func (Classification) String

func (x Classification) String() string

func (Classification) Type

type ClassificationP

type ClassificationP struct {
	Classification
	P float64
}

func Normalize

func Normalize(results []ClassificationP) []ClassificationP

type ClassifiedArticles

type ClassifiedArticles struct {
	Articles map[uint32]Classification `` /* 179-byte string literal not displayed */
	// contains filtered or unexported fields
}

ClassifiedArticles is a map from article IDs to their Classification.

func Base

func Base() *ClassifiedArticles

func (*ClassifiedArticles) Descriptor deprecated

func (*ClassifiedArticles) Descriptor() ([]byte, []int)

Deprecated: Use ClassifiedArticles.ProtoReflect.Descriptor instead.

func (*ClassifiedArticles) GetArticles

func (x *ClassifiedArticles) GetArticles() map[uint32]Classification

func (*ClassifiedArticles) ProtoMessage

func (*ClassifiedArticles) ProtoMessage()

func (*ClassifiedArticles) ProtoReflect

func (x *ClassifiedArticles) ProtoReflect() protoreflect.Message

func (*ClassifiedArticles) Reset

func (x *ClassifiedArticles) Reset()

func (*ClassifiedArticles) String

func (x *ClassifiedArticles) String() string

func (*ClassifiedArticles) ToIDs

func (x *ClassifiedArticles) ToIDs() []uint

type ClassifiedIDs

type ClassifiedIDs struct {
	Pages map[uint32]Classification `` /* 173-byte string literal not displayed */
	// contains filtered or unexported fields
}

func (*ClassifiedIDs) Descriptor deprecated

func (*ClassifiedIDs) Descriptor() ([]byte, []int)

Deprecated: Use ClassifiedIDs.ProtoReflect.Descriptor instead.

func (*ClassifiedIDs) GetPages

func (x *ClassifiedIDs) GetPages() map[uint32]Classification

func (*ClassifiedIDs) ProtoMessage

func (*ClassifiedIDs) ProtoMessage()

func (*ClassifiedIDs) ProtoReflect

func (x *ClassifiedIDs) ProtoReflect() protoreflect.Message

func (*ClassifiedIDs) Reset

func (x *ClassifiedIDs) Reset()

func (*ClassifiedIDs) String

func (x *ClassifiedIDs) String() string

type ClassifiedTitles

type ClassifiedTitles struct {
	Pages map[string]Classification `` /* 172-byte string literal not displayed */
	// contains filtered or unexported fields
}

func (*ClassifiedTitles) Descriptor deprecated

func (*ClassifiedTitles) Descriptor() ([]byte, []int)

Deprecated: Use ClassifiedTitles.ProtoReflect.Descriptor instead.

func (*ClassifiedTitles) GetPages

func (x *ClassifiedTitles) GetPages() map[string]Classification

func (*ClassifiedTitles) ProtoMessage

func (*ClassifiedTitles) ProtoMessage()

func (*ClassifiedTitles) ProtoReflect

func (x *ClassifiedTitles) ProtoReflect() protoreflect.Message

func (*ClassifiedTitles) Reset

func (x *ClassifiedTitles) Reset()

func (*ClassifiedTitles) String

func (x *ClassifiedTitles) String() string

func (*ClassifiedTitles) ToClassifiedIDs

func (x *ClassifiedTitles) ToClassifiedIDs(idOf map[string]uint32) *ClassifiedIDs

type Classifier

type Classifier interface {
	// Classify returns the sorted, normalized log probabilities of
	// each possible classification.
	Classify(page *ordinality.PageWordBag) []ClassificationP
}

type Counts

type Counts [][]uint32

func NewCounts

func NewCounts(nClassifications, nWords int) Counts

type Distribution

type Distribution []float64

Distribution is a (possibly non-normalized) collection of non-negative probabilities.

func (Distribution) Normalize

func (d Distribution) Normalize()

func (Distribution) ToLogDistribution

func (d Distribution) ToLogDistribution() LogDistribution

type LogDistribution

type LogDistribution []float64

LogDistribution is a (possibly non-normalized) collection of log probabilities.

func (LogDistribution) Add

func (d LogDistribution) Add(other LogDistribution)

Add adds the elements of other to d. The result may not be normalized.

func (LogDistribution) ToDistribution

func (d LogDistribution) ToDistribution() Distribution

ToDistribution returns an equivalent normalized Distribution.

type Model

type Model [][]float64

Model represents log probabilities of a random word in an article of a given classification being that particular word.

The first array is by word index. The second is by classification ID.

func NewModel

func NewModel(nClassifications, nWords int) Model

type PClassification

type PClassification struct {
	P float64
	Classification
}

PClassification is a Classification and its probability.

type PClassifications

type PClassifications []PClassification

func ToPClassifications

func ToPClassifications(d Distribution) PClassifications

ToPClassifications converts a distribution to the respective Classifications and probabilities it represents.

func (PClassifications) Sort

func (r PClassifications) Sort()

Sort sorts r from the highest probability to the lowest probability.

type PageClassifications

type PageClassifications struct {
	Classifications []Classification `protobuf:"varint,1,rep,packed,name=classifications,proto3,enum=Classification" json:"classifications,omitempty"`
	// contains filtered or unexported fields
}

func (*PageClassifications) Descriptor deprecated

func (*PageClassifications) Descriptor() ([]byte, []int)

Deprecated: Use PageClassifications.ProtoReflect.Descriptor instead.

func (*PageClassifications) GetClassifications

func (x *PageClassifications) GetClassifications() []Classification

func (*PageClassifications) ProtoMessage

func (*PageClassifications) ProtoMessage()

func (*PageClassifications) ProtoReflect

func (x *PageClassifications) ProtoReflect() protoreflect.Message

func (*PageClassifications) Reset

func (x *PageClassifications) Reset()

func (*PageClassifications) String

func (x *PageClassifications) String() string

type PageClassificationsMap

type PageClassificationsMap struct {
	Pages map[uint32]*PageClassifications `` /* 152-byte string literal not displayed */
	// contains filtered or unexported fields
}

func (*PageClassificationsMap) AddPage

func (x *PageClassificationsMap) AddPage(known map[uint32]Classification, pageTitles map[uint32]string, pageID uint32, categories []uint32, pageCategories *documents.PageCategories)

func (*PageClassificationsMap) Descriptor deprecated

func (*PageClassificationsMap) Descriptor() ([]byte, []int)

Deprecated: Use PageClassificationsMap.ProtoReflect.Descriptor instead.

func (*PageClassificationsMap) GetPages

func (*PageClassificationsMap) ProtoMessage

func (*PageClassificationsMap) ProtoMessage()

func (*PageClassificationsMap) ProtoReflect

func (x *PageClassificationsMap) ProtoReflect() protoreflect.Message

func (*PageClassificationsMap) Reset

func (x *PageClassificationsMap) Reset()

func (*PageClassificationsMap) String

func (x *PageClassificationsMap) String() string

type WordBagClassification

type WordBagClassification struct {
	Classification
	*ordinality.PageWordBag
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL