Documentation ¶
Index ¶
- Constants
- Variables
- func CheckForAcceptRequest(urlQueue, urlStat string) bool
- func GenerateLandingPage(appStop, technicalError bool, version string) string
- func InstrumentHttpStatusHandler(ocrHttpHandler *OcrHTTPStatusHandler) http.Handler
- func SetResManagerState(ampqAPIConfig *RabbitConfig)
- func StripPasswordFromUrl(urlToLog *url.URL) string
- type ConvertPdf
- type FlagFunction
- type FlagFunctionWorker
- type IdentityPreprocessor
- type MockEngine
- type OcrEngine
- type OcrEngineType
- type OcrHTTPStatusHandler
- type OcrHttpMultipartHandler
- type OcrHttpStatusHandler
- type OcrQueueManager
- type OcrRequest
- type OcrResult
- type OcrRpcClient
- type OcrRpcWorker
- type Preprocessor
- type PreprocessorRpcWorker
- type RabbitConfig
- type SandwichEngine
- type SandwichEngineArgs
- type StrokeWidthTransformer
- type TesseractEngine
- type TesseractEngineArgs
- type WorkerConfig
Constants ¶
const ( EngineTesseract = OcrEngineType(iota) EngineGoTesseract EngineSandwichTesseract EngineMock )
const ( PreprocessorIdentity = "identity" PreprocessorStrokeWidthTransform = "stroke-width-transform" PreprocessorConvertPdf = "convert-pdf" )
const MockEngineResponse = "mock engine decoder response"
Variables ¶
var ( // AppStop and ServiceCanAccept are global. Used to set the flag for logging and stopping the application AppStop bool ServiceCanAccept bool ServiceCanAcceptMu sync.RWMutex )
var ( // StopChan is used to gracefully stop http daemon StopChan = make(chan bool, 1) TechnicalErrorResManager bool )
var ( RequestsTrack = sync.Map{} RequestTrackLength = uint32(0) )
Functions ¶
func CheckForAcceptRequest ¶
CheckForAcceptRequest will check by reading the RabbitMQ API if resources for incoming request are available
func GenerateLandingPage ¶
GenerateLandingPage will generate a simple landing page
func InstrumentHttpStatusHandler ¶
func InstrumentHttpStatusHandler(ocrHttpHandler *OcrHTTPStatusHandler) http.Handler
InstrumentHttpStatusHandler wraps httpHandler to provide prometheus metrics
func SetResManagerState ¶
func SetResManagerState(ampqAPIConfig *RabbitConfig)
SetResManagerState sets boolean value of resource manager; if memory of rabbitMQ and the number messages is not exceeding the limit
func StripPasswordFromUrl ¶ added in v1.8.0
StripPasswordFromUrl strips passwords from URL
Types ¶
type ConvertPdf ¶
type ConvertPdf struct{}
type FlagFunction ¶
type FlagFunction func()
func NoOpFlagFunction ¶
func NoOpFlagFunction() FlagFunction
type FlagFunctionWorker ¶
type FlagFunctionWorker func()
FlagFunctionWorker will be used as argument type for DefaultConfigFlagsWorkerOverride
func NoOpFlagFunctionWorker ¶
func NoOpFlagFunctionWorker() FlagFunctionWorker
NoOpFlagFunctionWorker will return an empty set of cli parameters. In this case default parameter will be used
type IdentityPreprocessor ¶
type IdentityPreprocessor struct{}
type MockEngine ¶
type MockEngine struct{}
func (MockEngine) ProcessRequest ¶
func (MockEngine) ProcessRequest(_ *OcrRequest, _ *WorkerConfig) (OcrResult, error)
ProcessRequest will process incoming OCR request by routing it through the whole process chain
type OcrEngine ¶
type OcrEngine interface {
ProcessRequest(ocrRequest *OcrRequest, workerConfig *WorkerConfig) (OcrResult, error)
}
func NewOcrEngine ¶
func NewOcrEngine(engineType OcrEngineType) OcrEngine
type OcrEngineType ¶
type OcrEngineType int
func (OcrEngineType) String ¶
func (e OcrEngineType) String() string
func (*OcrEngineType) UnmarshalJSON ¶
func (e *OcrEngineType) UnmarshalJSON(b []byte) (err error)
type OcrHTTPStatusHandler ¶
type OcrHTTPStatusHandler struct {
RabbitConfig RabbitConfig
}
OcrHTTPStatusHandler is for initial handling of ocr request
func NewOcrHttpHandler ¶
func NewOcrHttpHandler(r *RabbitConfig) *OcrHTTPStatusHandler
func (*OcrHTTPStatusHandler) ServeHTTP ¶
func (s *OcrHTTPStatusHandler) ServeHTTP(w http.ResponseWriter, req *http.Request)
type OcrHttpMultipartHandler ¶
type OcrHttpMultipartHandler struct {
RabbitConfig RabbitConfig
}
func NewOcrHttpMultipartHandler ¶
func NewOcrHttpMultipartHandler(r *RabbitConfig) *OcrHttpMultipartHandler
func (*OcrHttpMultipartHandler) ServeHTTP ¶
func (s *OcrHttpMultipartHandler) ServeHTTP(w http.ResponseWriter, req *http.Request)
type OcrHttpStatusHandler ¶
type OcrHttpStatusHandler struct{}
func NewOcrHttpStatusHandler ¶
func NewOcrHttpStatusHandler() *OcrHttpStatusHandler
func (*OcrHttpStatusHandler) ServeHTTP ¶
func (*OcrHttpStatusHandler) ServeHTTP(w http.ResponseWriter, req *http.Request)
type OcrQueueManager ¶
type OcrQueueManager struct { NumMessages uint `json:"messages"` // TODO: do not read the number of messages from API because it is slow, and the clients of this product may not behave and put too many requests in too fast. NumConsumers uint `json:"consumers"` MessageBytes uint `json:"message_bytes"` }
OcrQueueManager is used as a main component of resource manager
type OcrRequest ¶
type OcrRequest struct { ImgUrl string `json:"img_url"` ImgBase64 string `json:"img_base64"` EngineType OcrEngineType `json:"engine"` ImgBytes []byte `json:"img_bytes"` PreprocessorChain []string `json:"preprocessors"` PreprocessorArgs map[string]interface{} `json:"preprocessor-args"` EngineArgs map[string]interface{} `json:"engine_args"` Deferred bool `json:"deferred"` ReplyTo string `json:"reply_to"` DocType string `json:"doc_type"` RequestID string `json:"req_id"` PageNumber uint16 `json:"page_number"` UserAgent string `json:"user_agent"` TimeOut uint `json:"time_out"` ReferenceID string `json:"reference_id"` // decode ocr in http handler rather than putting in queue InplaceDecode bool `json:"inplace_decode"` }
func (*OcrRequest) String ¶
func (ocrRequest *OcrRequest) String() string
type OcrResult ¶
type OcrResult struct { Text string `json:"text"` Status string `json:"status"` ID string `json:"id"` }
func CheckOcrStatusByID ¶
CheckOcrStatusByID checks status of an ocr request based on origin of request
func HandleOcrRequest ¶
func HandleOcrRequest(ocrRequest *OcrRequest, workerConfig *RabbitConfig) (OcrResult, int, error)
HandleOcrRequest will process incoming OCR request by routing it through the whole process chain
type OcrRpcClient ¶
type OcrRpcClient struct {
// contains filtered or unexported fields
}
func NewOcrRpcClient ¶
func NewOcrRpcClient(rc *RabbitConfig) (*OcrRpcClient, error)
func (*OcrRpcClient) DecodeImage ¶
func (c *OcrRpcClient) DecodeImage(ocrRequest *OcrRequest) (OcrResult, int, error)
DecodeImage is the main function to do an ocr on incoming request. It's handling the parameter and the whole workflow
type OcrRpcWorker ¶
type OcrRpcWorker struct { Done chan error // contains filtered or unexported fields }
func NewOcrRpcWorker ¶
func NewOcrRpcWorker(wc *WorkerConfig) (*OcrRpcWorker, error)
NewOcrRpcWorker is needed to establish a connection to a message broker
func (*OcrRpcWorker) Run ¶
func (w *OcrRpcWorker) Run() error
func (*OcrRpcWorker) Shutdown ¶
func (w *OcrRpcWorker) Shutdown() error
type Preprocessor ¶
type Preprocessor interface {
// contains filtered or unexported methods
}
type PreprocessorRpcWorker ¶
type PreprocessorRpcWorker struct { Done chan error // contains filtered or unexported fields }
func NewPreprocessorRpcWorker ¶
func NewPreprocessorRpcWorker(rc *RabbitConfig, preprocessor string) (*PreprocessorRpcWorker, error)
func (*PreprocessorRpcWorker) Run ¶
func (w *PreprocessorRpcWorker) Run() error
func (*PreprocessorRpcWorker) Shutdown ¶
func (w *PreprocessorRpcWorker) Shutdown() error
type RabbitConfig ¶
type RabbitConfig struct { AmqpURI string Exchange string ExchangeType string RoutingKey string Reliable bool AmqpAPIURI string APIPathQueue string APIQueueName string APIPathStats string QueuePrio map[string]uint8 QueuePrioArg string /* ResponseCacheTimeout sets default(!!!) global timeout in seconds for request engine will be killed after reaching the time limit, user will get timeout error */ ResponseCacheTimeout uint // MaximalResponseCacheTimeout client won't be able to set the ResponseCacheTimeout higher of it's value MaximalResponseCacheTimeout uint FactorForMessageAccept uint }
func DefaultConfigFlagsOverride ¶
func DefaultConfigFlagsOverride(flagFunction FlagFunction) RabbitConfig
func DefaultTestConfig ¶
func DefaultTestConfig() RabbitConfig
type SandwichEngine ¶
type SandwichEngine struct{}
SandwichEngine calls pdfsandwich via exec This implementation returns either the pdf with ocr layer only or merged variant of pdf plus ocr layer with the ability to optimize the output pdf file by calling "gs" tool
func (SandwichEngine) ProcessRequest ¶
func (t SandwichEngine) ProcessRequest(ocrRequest *OcrRequest, workerConfig *WorkerConfig) (OcrResult, error)
ProcessRequest will process incoming OCR request by routing it through the whole process chain
type SandwichEngineArgs ¶
type SandwichEngineArgs struct {
// contains filtered or unexported fields
}
func NewSandwichEngineArgs ¶
func NewSandwichEngineArgs(ocrRequest *OcrRequest, workerConfig *WorkerConfig) (*SandwichEngineArgs, error)
NewSandwichEngineArgs generates arguments for SandwichEngine which will be used to start involved tools
func (*SandwichEngineArgs) Export ¶
func (t *SandwichEngineArgs) Export() []string
Export return a slice that can be passed to tesseract binary as command line args, eg, ["-c", "tessedit_char_whitelist=0123456789", "-c", "foo=bar"]
type StrokeWidthTransformer ¶
type StrokeWidthTransformer struct{}
type TesseractEngine ¶
type TesseractEngine struct{}
TesseractEngine calls tesseract via exec
func (TesseractEngine) ProcessRequest ¶
func (t TesseractEngine) ProcessRequest(ocrRequest *OcrRequest, _ *WorkerConfig) (OcrResult, error)
ProcessRequest will process incoming OCR request by routing it through the whole process chain
type TesseractEngineArgs ¶
type TesseractEngineArgs struct {
// contains filtered or unexported fields
}
func NewTesseractEngineArgs ¶
func NewTesseractEngineArgs(ocrRequest *OcrRequest) (*TesseractEngineArgs, error)
func (TesseractEngineArgs) Export ¶
func (t TesseractEngineArgs) Export() []string
Export return a slice that can be passed to tesseract binary as command line args, eg, ["-c", "tessedit_char_whitelist=0123456789", "-c", "foo=bar"]
type WorkerConfig ¶
type WorkerConfig struct { AmqpURI string Exchange string ExchangeType string RoutingKey string Reliable bool AmqpAPIURI string APIPathQueue string APIQueueName string APIPathStats string SaveFiles bool Debug bool Tiff2pdfConverter string NumParallelJobs uint FlgVersion bool }
WorkerConfig will be passed to ocr engines and is used to establish connection to a message broker
func DefaultConfigFlagsWorkerOverride ¶
func DefaultConfigFlagsWorkerOverride(flagFunction FlagFunctionWorker) (WorkerConfig, error)
func DefaultWorkerConfig ¶
func DefaultWorkerConfig() WorkerConfig
DefaultWorkerConfig will set the default set of worker parameters which are needed for testing and connecting to a broker
Source Files ¶
- convert-pdf.go
- generate_landing_page.go
- mock_engine.go
- ocr_engine.go
- ocr_http_handler.go
- ocr_http_multipart_handler.go
- ocr_http_status_handler.go
- ocr_postback_client.go
- ocr_request.go
- ocr_res_manager.go
- ocr_results_storage.go
- ocr_rpc_client.go
- ocr_rpc_worker.go
- ocr_util.go
- preprocessor.go
- preprocessor_rpc_worker.go
- prometheus_metrics.go
- rabbit_config.go
- sandwich_engine.go
- stroke_width_transform.go
- tesseract_engine.go
- worker_config.go