pipeline

package
v1.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 10, 2024 License: GPL-3.0 Imports: 20 Imported by: 0

Documentation

Overview

pipeline is a package used by the bookpipeline command, which handles the core functionality, using channels heavily to coordinate jobs. Note that it is considered an "internal" package, not intended for external use, and no guarantee is made of the stability of any interfaces provided.

Index

Constants

View Source
const HeartbeatSeconds = 60

Variables

This section is empty.

Functions

func Analyse

func Analyse(conn Downloader, mkfullpdf bool) func(context.Context, chan string, chan string, chan error, *log.Logger)

func CheckImages

func CheckImages(ctx context.Context, dir string) error

CheckImages checks that all files with a ".jpg" or ".png" suffix in a directory are images that can be decoded (skipping dotfiles)

func DetectQueueType

func DetectQueueType(dir string, conn Queuer, nowipe bool) string

DetectQueueType detects which queue to use based on the preponderance of files of a particular extension in a directory

func DownloadAll

func DownloadAll(dir string, name string, conn DownloadLister) error

func DownloadAnalyses

func DownloadAnalyses(dir string, name string, conn Downloader) error

func DownloadBestPages

func DownloadBestPages(dir string, name string, conn Downloader) error

func DownloadBestPngs added in v1.0.0

func DownloadBestPngs(dir string, name string, conn Downloader) error

func DownloadPdfs

func DownloadPdfs(dir string, name string, conn Downloader) error

func GetMailSettings

func GetMailSettings() (mailSettings, error)

func HideCmd added in v1.0.0

func HideCmd(cmd *exec.Cmd)

HideCmd adds a flag to hide any console window from being displayed, if necessary for the platform

func Ocr

func Ocr(training string, tesscmd string) func(context.Context, chan string, chan string, chan error, *log.Logger)

func OcrPage

func OcrPage(ctx context.Context, msg bookpipeline.Qmsg, conn Pipeliner, process func(context.Context, chan string, chan string, chan error, *log.Logger), fromQueue string, toQueue string) error

OcrPage OCRs a page based on a message. It may make sense to roll this back into processBook (on which it is based) once working well.

func Preprocess

func Preprocess(thresholds []float64, nowipe bool) func(context.Context, chan string, chan string, chan error, *log.Logger)

func ProcessBook

func ProcessBook(ctx context.Context, msg bookpipeline.Qmsg, conn Pipeliner, process func(context.Context, chan string, chan string, chan error, *log.Logger), match *regexp.Regexp, fromQueue string, toQueue string) error

func SaveLogs

func SaveLogs(conn Uploader, starttime int64, hostname string) error

func UploadImages

func UploadImages(ctx context.Context, dir string, bookname string, conn Uploader) error

UploadImages uploads all files with a suffix of ".jpg" or ".png" (except those which start with a ".") from a directory (recursively) into conn.WIPStorageId(), prefixed with the given bookname and a slash. It also appends all file names with sequential numbers, like 0001, to ensure they are appropriately named for further processing in the pipeline.

func Wipe

func Wipe(ctx context.Context, towipe chan string, up chan string, errc chan error, logger *log.Logger)

Types

type DownloadLister added in v0.5.0

type DownloadLister interface {
	Download(bucket string, key string, fn string) error
	ListObjects(bucket string, prefix string) ([]string, error)
	Log(v ...interface{})
	WIPStorageId() string
}

type Downloader added in v0.5.0

type Downloader interface {
	Download(bucket string, key string, fn string) error
	Log(v ...interface{})
	WIPStorageId() string
}

type Lister added in v0.5.0

type Lister interface {
	ListObjects(bucket string, prefix string) ([]string, error)
	Log(v ...interface{})
	WIPStorageId() string
}

type MinPipeliner

type MinPipeliner interface {
	Pipeliner
	MinimalInit() error
}

type NullWriter

type NullWriter bool

null writer to enable non-verbose logging to be discarded

func (NullWriter) Write

func (w NullWriter) Write(p []byte) (n int, err error)

type Pipeliner

type Pipeliner interface {
	AddToQueue(url string, msg string) error
	AnalyseQueueId() string
	CheckQueue(url string, timeout int64) (bookpipeline.Qmsg, error)
	DelFromQueue(url string, handle string) error
	Download(bucket string, key string, fn string) error
	GetLogger() *log.Logger
	Init() error
	ListObjects(bucket string, prefix string) ([]string, error)
	Log(v ...interface{})
	OCRPageQueueId() string
	PreNoWipeQueueId() string
	PreQueueId() string
	QueueHeartbeat(msg bookpipeline.Qmsg, qurl string, duration int64) (bookpipeline.Qmsg, error)
	Upload(bucket string, key string, path string) error
	WipeQueueId() string
	WIPStorageId() string
}

type Queuer added in v0.5.0

type Queuer interface {
	AddToQueue(url string, msg string) error
	AnalyseQueueId() string
	CheckQueue(url string, timeout int64) (bookpipeline.Qmsg, error)
	DelFromQueue(url string, handle string) error
	Log(v ...interface{})
	OCRPageQueueId() string
	PreNoWipeQueueId() string
	PreQueueId() string
	QueueHeartbeat(msg bookpipeline.Qmsg, qurl string, duration int64) (bookpipeline.Qmsg, error)
	WipeQueueId() string
}

type UploadQueuer added in v0.5.0

type UploadQueuer interface {
	Log(v ...interface{})
	Upload(bucket string, key string, path string) error
	WIPStorageId() string
	PreNoWipeQueueId() string
	PreQueueId() string
	WipeQueueId() string
	OCRPageQueueId() string
	AnalyseQueueId() string
	CheckQueue(url string, timeout int64) (bookpipeline.Qmsg, error)
	AddToQueue(url string, msg string) error
	DelFromQueue(url string, handle string) error
	QueueHeartbeat(msg bookpipeline.Qmsg, qurl string, duration int64) (bookpipeline.Qmsg, error)
}

type Uploader added in v0.5.0

type Uploader interface {
	Log(v ...interface{})
	Upload(bucket string, key string, path string) error
	WIPStorageId() string
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL