extractors

package
v0.0.0-...-c5d900d Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 10, 2019 License: Apache-2.0 Imports: 38 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var AntiwordPath string
View Source
var ExifToolBinary = "exiftool"
View Source
var ExifToolService = fx.Provide(func(lc fx.Lifecycle, logger log15.Logger) ExifTool {
	t := NewExifTool(logger)
	if t != nil {
		utils.Append(lc, t, logger)
	}
	return t
})
View Source
var PDFInfoPath string
View Source
var PDFToTextPath string
View Source
var StopWordsEnglish = set.NewSet()
View Source
var StopWordsFrench = set.NewSet()

Functions

func Absent

func Absent(exe string) error

func Asset

func Asset(name string) ([]byte, error)

Asset loads and returns the asset for the given name. It returns an error if the asset could not be found or could not be loaded.

func AssetDigest

func AssetDigest(name string) ([sha256.Size]byte, error)

AssetDigest returns the digest of the file with the given name. It returns an error if the asset could not be found or the digest could not be loaded.

func AssetDir

func AssetDir(name string) ([]string, error)

AssetDir returns the file names below a certain directory embedded in the file by go-bindata. For example if you run go-bindata on data/... and data contains the following hierarchy:

data/
  foo.txt
  img/
    a.png
    b.png

then AssetDir("data") would return []string{"foo.txt", "img"}, AssetDir("data/img") would return []string{"a.png", "b.png"}, AssetDir("foo.txt") and AssetDir("notexist") would return an error, and AssetDir("") will return []string{"data"}.

func AssetInfo

func AssetInfo(name string) (os.FileInfo, error)

AssetInfo loads and returns the asset info for the given name. It returns an error if the asset could not be found or could not be loaded.

func AssetNames

func AssetNames() []string

AssetNames returns the names of the assets.

func AssetString

func AssetString(name string) (string, error)

AssetString returns the asset contents as a string (instead of a []byte).

func BagOfWords

func BagOfWords(text string, language string) map[string]int

func ConvertBytesDoc

func ConvertBytesDoc(b []byte) (content string, err error)

func ConvertBytesDocx

func ConvertBytesDocx(b []byte) (content string, props map[string]interface{}, hasMacro bool, err error)

func ConvertBytesODT

func ConvertBytesODT(b []byte) (content string, props map[string]interface{}, err error)

func ConvertDoc

func ConvertDoc(filename string) (string, error)

func ConvertDocx

func ConvertDocx(filename string) (string, map[string]interface{}, bool, error)

func ConvertODT

func ConvertODT(filename string) (string, map[string]interface{}, error)

func Digests

func Digests() (map[string][sha256.Size]byte, error)

Digests returns a map of all known files and their checksums.

func DocxXMLToText

func DocxXMLToText(r io.Reader) (string, error)

func ExtractWords

func ExtractWords(text string) (words []string)

func HTML2Text

func HTML2Text(h string) (text string, links []string, images []string)

func IsExecutable

func IsExecutable(mimetype string) bool

func Keywords

func Keywords(content string, stems map[string]string, language string) ([]string, []string)

func Language

func Language(content string) string

func Language2

func Language2(content string) string

func MustAsset

func MustAsset(name string) []byte

MustAsset is like Asset but panics when Asset would return an error. It simplifies safe initialization of global variables.

func MustAssetString

func MustAssetString(name string) string

MustAssetString is like AssetString but panics when Asset would return an error. It simplifies safe initialization of global variables.

func PDFBytesInfo

func PDFBytesInfo(pdf []byte, meta *models.PDFMeta) (*models.PDFMeta, error)

func PDFBytesToText

func PDFBytesToText(content []byte) (result string, err error)

func PDFInfo

func PDFInfo(filename string, meta *models.PDFMeta) (*models.PDFMeta, error)

func PDFToText

func PDFToText(filename string) (string, error)

func ParseReceivedHeader

func ParseReceivedHeader(h string, geoip utils.GeoIP, logger log15.Logger) (e models.ReceivedElement)

func RestoreAsset

func RestoreAsset(dir, name string) error

RestoreAsset restores an asset under the given directory.

func RestoreAssets

func RestoreAssets(dir, name string) error

RestoreAssets restores an asset under the given directory recursively.

func Stems

func Stems(bag map[string]int, language string) map[string]string

func TextRank

func TextRank(content string, stems map[string]string, language string) ([]rank.SingleWord, []rank.Phrase)

func XMLToMap

func XMLToMap(r io.Reader) (map[string]interface{}, error)

XMLToMap converts XML to a nested string map.

func XMLToText

func XMLToText(r io.Reader, breaks []string, skip []string, strict bool) (string, error)

Types

type AbsentUtil

type AbsentUtil struct {
	Exe string
}

func (*AbsentUtil) Error

func (err *AbsentUtil) Error() string

type ExifTool

type ExifTool interface {
	utils.Service
	utils.Prestartable
	utils.Closeable
	Extract(content []byte, meta map[string]interface{}, flags ...string) (map[string]interface{}, error)
	ExtractFromFile(filename string, meta map[string]interface{}, flags ...string) (map[string]interface{}, error)
}

func NewExifTool

func NewExifTool(logger log15.Logger) ExifTool

type ExifToolImpl

type ExifToolImpl struct {
	// contains filtered or unexported fields
}

func (*ExifToolImpl) Close

func (w *ExifToolImpl) Close() error

func (*ExifToolImpl) Extract

func (w *ExifToolImpl) Extract(content []byte, meta map[string]interface{}, flags ...string) (map[string]interface{}, error)

func (*ExifToolImpl) ExtractFromFile

func (w *ExifToolImpl) ExtractFromFile(filename string, meta map[string]interface{}, flags ...string) (map[string]interface{}, error)

func (*ExifToolImpl) Name

func (w *ExifToolImpl) Name() string

func (*ExifToolImpl) Prestart

func (w *ExifToolImpl) Prestart() error

type IcalConsumer

type IcalConsumer struct {
	Events []*models.Event
}

func (*IcalConsumer) ConsumeICal

func (c *IcalConsumer) ConsumeICal(d *goics.Calendar, err error) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL