rinser

package
v0.24.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 22, 2025 License: MIT Imports: 43 Imported by: 0

Documentation

Index

Constants

View Source
const FormFileKey = "file"
View Source
const FormLangKey = "lang"
View Source
const FormURLKey = "url"
View Source
const WorkerImage = "ghcr.io/linkdata/rinseworker"

Variables

View Source
var ErrContentEncoded = errors.New("Content-Encoding is set")
View Source
var ErrDocumentTooLarge = errors.New("document too large")
View Source
var ErrDuplicateUUID = errors.New("duplicate UUID")
View Source
var ErrIllegalLanguage = errors.New("illegal language string")
View Source
var ErrIllegalURLScheme = errors.New("illegal URL scheme")
View Source
var ErrImageSeenTwice = errors.New("image file seen twice")
View Source
var ErrMissingDocument = errors.New("no document found")
View Source
var ErrMultipleDocuments = errors.New("multiple documents found")
View Source
var ErrNoJWTFoundInHeader = fmt.Errorf("no JWT found in header")
View Source
var ErrWorkerRootDirNotFound = errors.New("/opt/rinseworker not found")
View Source
var LanguageCode = map[string]string{}/* 103 elements not displayed */
View Source
var LanguageTika = map[string]string{
	"da": "dan",
	"de": "deu",
	"et": "est",
	"el": "ell",
	"en": "eng",
	"es": "spa",
	"fi": "fin",
	"fr": "fra",
	"hu": "hun",
	"is": "isl",
	"it": "ita",
	"nl": "nld",
	"no": "nor",
	"pl": "pol",
	"pt": "por",
	"ru": "rus",
	"sv": "swe",
	"th": "tha",
}

Functions

func GetJWTFromHeader added in v0.19.0

func GetJWTFromHeader(r *http.Request) (string, error)

Parses Authorization header and matches pattern {string}.{string}.{string} to find the potential JWT. So if the header looks like e.g. 'Authorization':'Bearer {JWT}' only the actual JWT is returned. Returns error if not found or invalid format.

func HTTPJSON

func HTTPJSON(hw http.ResponseWriter, code int, obj any)

func SendHTTPError

func SendHTTPError(hw http.ResponseWriter, code int, err error)

Types

type AddJobURL

type AddJobURL struct {
	URL           string `json:"url" example:"https://getsamplefiles.com/download/pdf/sample-1.pdf"`
	Lang          string `json:"lang" example:"auto"`
	MaxSizeMB     int    `json:"maxsizemb" example:"2048"`
	MaxTimeSec    int    `json:"maxtimesec" example:"86400"`
	TimeoutSec    int    `json:"timeoutsec" example:"60"`
	CleanupSec    int    `json:"cleanupsec" example:"86400"`
	CleanupGotten bool   `json:"cleanupgotten" example:"true"`
	Private       bool   `json:"private" example:"false"`
}

type HTTPError

type HTTPError struct {
	Code  int
	Error string
}

type Job

type Job struct {
	Rinse         *Rinse        `json:"-"`
	Workdir       string        `json:"workdir" example:"/tmp/rinse-550e8400-e29b-41d4-a716-446655440000"`
	Datadir       string        `json:"-"`
	Name          string        `json:"name" example:"example.docx"`
	Created       time.Time     `json:"created" example:"2024-01-01T12:00:00+00:00" format:"dateTime"`
	UUID          uuid.UUID     `json:"uuid" example:"550e8400-e29b-41d4-a716-446655440000" format:"uuid"`
	MaxSizeMB     int           `json:"maxsizemb" example:"2048"`
	MaxTimeSec    int           `json:"maxtimesec" example:"86400"`
	CleanupSec    int           `json:"cleanupsec" example:"600"`
	TimeoutSec    int           `json:"timeoutsec" example:"60"`
	CleanupGotten bool          `json:"cleanupgotten" example:"true"`
	Private       bool          `json:"private" example:"false"`
	Email         string        `json:"email,omitempty" example:"user@example.com"`
	StoppedCh     chan struct{} `json:"-"` // closed when job stopped

	Error     error  `json:"error,omitempty"`
	PdfName   string `json:"pdfname,omitempty" example:"example-docx-rinsed.pdf"` // rinsed PDF file name
	Language  string `json:"lang,omitempty" example:"auto"`
	Done      bool   `json:"done,omitempty" example:"false"`
	Diskuse   int64  `json:"diskuse,omitempty" example:"1234"`
	Pages     int    `json:"pages,omitempty" example:"1"`
	Downloads int    `json:"downloads,omitempty" example:"0"`
	// contains filtered or unexported fields
}

func NewJob

func NewJob(rns *Rinse, name, lang string, maxsizemb, maxtimesec, cleanupsec, timeoutsec int, cleanupgotten, private bool, email string) (job *Job, err error)

func (*Job) Button

func (job *Job) Button() jaws.ClickHandler

func (*Job) Close

func (job *Job) Close(err error)

func (*Job) DocumentName

func (job *Job) DocumentName() (s string)

func (*Job) HasLog added in v0.24.0

func (job *Job) HasLog() (yes bool)

func (*Job) HasMeta added in v0.8.2

func (job *Job) HasMeta() (yes bool)

func (*Job) Lang

func (job *Job) Lang() (s string)

func (*Job) LogPath added in v0.24.0

func (job *Job) LogPath() string

func (*Job) MaxUploadSize added in v0.9.8

func (job *Job) MaxUploadSize() (n int64)

func (*Job) MetaPath added in v0.8.2

func (job *Job) MetaPath() string

func (*Job) Preview

func (job *Job) Preview(numPages, imgWidth int) (b []byte, err error)

func (*Job) Previewable

func (job *Job) Previewable() (yes bool)

func (*Job) ResultName

func (job *Job) ResultName() (s string)

func (*Job) ResultPath

func (job *Job) ResultPath() string

func (*Job) Start

func (job *Job) Start() (err error)

func (*Job) State

func (job *Job) State() (state JobState)

func (*Job) Stopped added in v0.9.8

func (job *Job) Stopped() (t time.Time)

func (*Job) UiJobLog added in v0.24.0

func (job *Job) UiJobLog() jaws.Updater

func (*Job) UiJobMeta added in v0.8.2

func (job *Job) UiJobMeta() jaws.Updater

func (*Job) UiJobPreview added in v0.8.2

func (job *Job) UiJobPreview() jaws.Updater
func (job *Job) UiLink() jaws.HTMLGetter

func (*Job) UiStatus

func (job *Job) UiStatus() (ui jaws.HTMLGetter)

type JobState

type JobState int
const (
	JobNew JobState = iota
	JobStarting
	JobDownload
	JobExtractMeta
	JobDetectLanguage
	JobDocToPdf
	JobPdfToImages
	JobTesseract
	JobEnding
	JobFinished
	JobFailed
)

type Rinse

type Rinse struct {
	Config     *webserv.Config
	Jaws       *jaws.Jaws
	JawsAuth   *jawsauth.Server
	RunscBin   string
	RootDir    string
	FaviconURI string
	Languages  []string

	OAuth2Settings jawsauth.Config

	JWTPublicKeys jwt.JSONWebKeySet
	// contains filtered or unexported fields
}

func New

func New(cfg *webserv.Config, mux *http.ServeMux, jw *jaws.Jaws, devel bool) (rns *Rinse, err error)

func (*Rinse) AddJob

func (rns *Rinse) AddJob(job *Job) (err error)

func (*Rinse) AuthFn added in v0.9.9

func (rns *Rinse) AuthFn(fn http.HandlerFunc) http.Handler

func (*Rinse) CheckAuth added in v0.19.0

func (rns *Rinse) CheckAuth(w http.ResponseWriter, r *http.Request, fn http.HandlerFunc)

Checks for JWT in header, if no JWT is found, redirects to login If JWT is found in header but is invalid, error response is return to caller. If JWT is found in header and valid, sets EmailKey in session to the 'username' gotten from the JWT

func (*Rinse) CleanupSec added in v0.9.8

func (rns *Rinse) CleanupSec() (n int)

func (*Rinse) Close

func (rns *Rinse) Close()

func (*Rinse) Error added in v0.24.0

func (rns *Rinse) Error(msg string, keyValuePairs ...any)

func (*Rinse) FindJob

func (rns *Rinse) FindJob(s string) *Job

func (*Rinse) FormFileKey

func (rns *Rinse) FormFileKey() string

func (*Rinse) FormLangKey

func (rns *Rinse) FormLangKey() string

func (*Rinse) FormURLKey

func (rns *Rinse) FormURLKey() string

func (*Rinse) FoundValidJWTInSession added in v0.19.0

func (rns *Rinse) FoundValidJWTInSession() (bool, error)

func (*Rinse) GetEmail added in v0.14.0

func (rns *Rinse) GetEmail(hr *http.Request) (s string)

func (*Rinse) GetExternalIP added in v0.13.0

func (rns *Rinse) GetExternalIP() (publicip string, err error)

func (*Rinse) Info added in v0.24.0

func (rns *Rinse) Info(msg string, keyValuePairs ...any)

func (*Rinse) IsAdmin added in v0.14.0

func (rns *Rinse) IsAdmin(email string) (yes bool)

func (*Rinse) IsClosed

func (rns *Rinse) IsClosed() (yes bool)

func (*Rinse) JawsContains

func (rns *Rinse) JawsContains(e *jaws.Element) (contents []jaws.UI)

JawsContains implements jaws.Container.

func (*Rinse) JobList

func (rns *Rinse) JobList(email string) (jobs []*Job)

func (*Rinse) LanguageName

func (rns *Rinse) LanguageName(code string) string

func (*Rinse) MaxConcurrent

func (rns *Rinse) MaxConcurrent() (n int)

func (*Rinse) MaxTimeSec added in v0.9.8

func (rns *Rinse) MaxTimeSec() (n int)

func (*Rinse) MaybeStartJob

func (rns *Rinse) MaybeStartJob() (err error)

func (*Rinse) PkgName

func (rns *Rinse) PkgName() string

func (*Rinse) PkgVersion

func (rns *Rinse) PkgVersion() string

func (*Rinse) ProxyURL added in v0.15.0

func (rns *Rinse) ProxyURL() string

func (*Rinse) RESTDELETEJobsUUID

func (rns *Rinse) RESTDELETEJobsUUID(hw http.ResponseWriter, hr *http.Request)

RESTDELETEJobsUUID godoc

@Summary		Delete a job
@Description	Delete by job UUID
@Tags			jobs
@Accept			*/*
@Produce		json
@Param			Authorization	header		string	false	"JWT token"
@Param			uuid			path		string	true	"49d1e304-d2b8-46bf-b6a6-f1e9b797e1b0"
@Success		200				{object}	Job
@Failure		404				{object}	HTTPError
@Router			/jobs/{uuid} [delete]

func (*Rinse) RESTGETJobs

func (rns *Rinse) RESTGETJobs(hw http.ResponseWriter, hr *http.Request)

RESTGETJobs godoc

@Summary		List jobs
@Description	Get a list of all jobs.
@Tags			jobs
@Accept			*/*
@Produce		json
@Param			Authorization	header	string	false	"JWT token"
@Success		200				{array}	Job
@Router			/jobs [get]

func (*Rinse) RESTGETJobsUUID

func (rns *Rinse) RESTGETJobsUUID(hw http.ResponseWriter, hr *http.Request)

RESTGETJobsUUID godoc

@Summary		Get job metadata.
@Description	Get job metadata by UUID.
@Tags			jobs
@Accept			json
@Produce		json
@Param			Authorization	header		string	false	"JWT token"
@Param			uuid			path		string	true	"49d1e304-d2b8-46bf-b6a6-f1e9b797e1b0"
@Success		200				{object}	Job
@Failure		404				{object}	HTTPError
@Router			/jobs/{uuid} [get]

func (*Rinse) RESTGETJobsUUIDLog added in v0.24.0

func (rns *Rinse) RESTGETJobsUUIDLog(hw http.ResponseWriter, hr *http.Request)

RESTGETJobsUUIDLog godoc

@Summary		Get the jobs log.
@Description	Get the jobs log.
@Tags			jobs
@Accept			*/*
@Produce		text/plain
@Param			uuid	path		string	true	"49d1e304-d2b8-46bf-b6a6-f1e9b797e1b0"
@Success		200		{file}		file	""
@Success		202		{object}	Job		"Log not yet ready."
@Failure		404		{object}	HTTPError
@Failure		410		{object}	HTTPError "Job failed."
@Failure		500		{object}	HTTPError
@Router			/jobs/{uuid}/log [get]

func (*Rinse) RESTGETJobsUUIDMeta added in v0.8.1

func (rns *Rinse) RESTGETJobsUUIDMeta(hw http.ResponseWriter, hr *http.Request)

RESTGETJobsUUIDMeta godoc

@Summary		Get the jobs document metadata.
@Description	Get the jobs document metadata.
@Tags			jobs
@Accept			*/*
@Produce		json
@Param			Authorization	header		string	false	"JWT token"
@Param			uuid			path		string	true	"49d1e304-d2b8-46bf-b6a6-f1e9b797e1b0"
@Success		200				{file}		file	""
@Success		202				{object}	Job		"Metadata not yet ready."
@Failure		404				{object}	HTTPError
@Failure		410				{object}	HTTPError	"Job failed."
@Failure		500				{object}	HTTPError
@Router			/jobs/{uuid}/meta [get]

func (*Rinse) RESTGETJobsUUIDPreview

func (rns *Rinse) RESTGETJobsUUIDPreview(w http.ResponseWriter, r *http.Request)

RESTGETJobsUUIDPreview godoc

@Summary		Show a job preview image
@Description	show job preview image by UUID
@Tags			jobs
@Accept			*/*
@Produce		html
@Produce		jpeg
@Param			Authorization	header		string	false	"JWT token"
@Param			uuid			path		string	true	"49d1e304-d2b8-46bf-b6a6-f1e9b797e1b0"
@Param			pages			query		int		false	"1"
@Param			width			query		int		false	"172"
@Success		200				{html}		html	""
@Success		200				{jpeg}		jpeg	""
@Success		202				{object}	Job		"Preview not yet ready."
@Failure		400				{object}	HTTPError
@Failure		404				{object}	HTTPError
@Failure		410				{object}	HTTPError	"Job failed."
@Failure		500				{object}	HTTPError
@Router			/jobs/{uuid}/preview [get]

func (*Rinse) RESTGETJobsUUIDRinsed

func (rns *Rinse) RESTGETJobsUUIDRinsed(hw http.ResponseWriter, hr *http.Request)

RESTGETJobsUUIDRinsed godoc

@Summary		Get the jobs rinsed document.
@Description	Get the jobs rinsed document.
@Tags			jobs
@Accept			*/*
@Produce		application/pdf
@Produce		json
@Param			Authorization	header		string	false	"JWT token"
@Param			uuid			path		string	true	"49d1e304-d2b8-46bf-b6a6-f1e9b797e1b0"
@Success		200				{file}		file	""
@Success		202				{object}	Job		"Rinsed version not yet ready."
@Failure		404				{object}	HTTPError
@Failure		410				{object}	HTTPError	"Job failed."
@Failure		500				{object}	HTTPError
@Router			/jobs/{uuid}/rinsed [get]

func (*Rinse) RESTPOSTJobs

func (rns *Rinse) RESTPOSTJobs(hw http.ResponseWriter, hr *http.Request)

RESTPOSTJobs godoc

@Summary		Add a job
@Description	Add job with either a file using multipart/form-data or a URL using json.
@Tags			jobs
@Accept			json
@Accept			multipart/form-data
@Produce		json
@Param			Authorization	header		string		false	"JWT token"
@Param			addjoburl		body		AddJobURL	false	"Add job by URL"
@Param			file			formData	file		false	"this is a test file"
@Param			lang			query		string		false	"eng"
@Param			maxsizemb		query		int			false	"2048"
@Param			maxtimesec		query		int			false	"86400"
@Param			cleanupsec		query		int			false	"600"
@Param			timeoutsec		query		int			false	"600"
@Param			cleanupgotten	query		bool		false	"true"
@Param			private			query		bool		false	"false"
@Success		200				{object}	Job
@Failure		400				{object}	HTTPError
@Failure		404				{object}	HTTPError
@Failure		415				{object}	HTTPError
@Failure		500				{object}	HTTPError
@Router			/jobs [post]

func (*Rinse) RedirectAuthFn added in v0.19.0

func (rns *Rinse) RedirectAuthFn(fn http.HandlerFunc) http.Handler

func (*Rinse) RemoveJob

func (rns *Rinse) RemoveJob(job *Job)

func (*Rinse) SelfTest added in v0.20.0

func (rns *Rinse) SelfTest() int

func (*Rinse) SettingsFile added in v0.10.0

func (rns *Rinse) SettingsFile() string

func (*Rinse) TimeoutSec added in v0.22.0

func (rns *Rinse) TimeoutSec() (n int)

func (*Rinse) UiAdmins added in v0.14.0

func (rns *Rinse) UiAdmins() jaws.ClickHandler

func (*Rinse) UiAutoCleanup

func (rns *Rinse) UiAutoCleanup() jaws.HTMLGetter

func (*Rinse) UiCleanupGotten added in v0.9.8

func (rns *Rinse) UiCleanupGotten() any

func (*Rinse) UiClock

func (rns *Rinse) UiClock() jaws.HTMLGetter

func (*Rinse) UiExternalIP added in v0.13.0

func (rns *Rinse) UiExternalIP() (ui jaws.HTMLGetter)

func (*Rinse) UiImpersonate added in v0.14.0

func (rns *Rinse) UiImpersonate() jaws.ClickHandler

func (*Rinse) UiMaxConcurrent

func (rns *Rinse) UiMaxConcurrent() jaws.HTMLGetter

func (*Rinse) UiMaxRuntime

func (rns *Rinse) UiMaxRuntime() jaws.HTMLGetter

func (*Rinse) UiMaxSize

func (rns *Rinse) UiMaxSize() jaws.HTMLGetter
func (rns *Rinse) UiNavLink(rq *http.Request, url, title string) template.HTML

func (*Rinse) UiProxy added in v0.13.0

func (rns *Rinse) UiProxy() *uiProxy

func (*Rinse) UiTimeout added in v0.22.0

func (rns *Rinse) UiTimeout() jaws.HTMLGetter

func (*Rinse) UiUser added in v0.9.9

func (rns *Rinse) UiUser() jaws.HTMLGetter

func (*Rinse) UpdateExternalIP added in v0.13.0

func (rns *Rinse) UpdateExternalIP()

func (*Rinse) Warn added in v0.24.0

func (rns *Rinse) Warn(msg string, keyValuePairs ...any)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL