Documentation ¶
Overview ¶
Package converter implements function for converting files to PDF
Index ¶
- Constants
- Variables
- func DupFilter(ctx context.Context, inch <-chan i18nmail.MailPart, ...)
- func ExtractingFilter(ctx context.Context, inch <-chan i18nmail.MailPart, ...)
- func FixContentType(body []byte, contentType, fileName string) (ct string)
- func GetRequestID(ctx context.Context) string
- func HTMLPartFilter(ctx context.Context, inch <-chan i18nmail.MailPart, ...)
- func HTMLToPdf(ctx context.Context, destfn string, r io.Reader, contentType string) error
- func ImageToPdf(ctx context.Context, destfn string, r io.Reader, contentType string) error
- func ImageToPdfGm(ctx context.Context, w io.Writer, r io.Reader, contentType string) error
- func ImageToPdfPdfCPU(w io.Writer, r io.Reader) error
- func LoadConfig(ctx context.Context, fn string) error
- func MIMEMatch(b []byte) string
- func MPRelatedToPdf(ctx context.Context, destfn string, r io.Reader, contentType string) error
- func MailToPdfZip(ctx context.Context, destfn string, body io.Reader, contentType string) error
- func MailToSplittedPdfZip(ctx context.Context, destfn string, body io.Reader, contentType string, ...) error
- func MailToTree(ctx context.Context, outdir string, r io.Reader) error
- func MailToZip(ctx context.Context, destfn string, body io.Reader, contentType string) error
- func NewB64QuoPriDecoder(r io.Reader) io.Reader
- func NewCidMapper(cids map[string]string, subDir string, r io.Reader) io.Reader
- func NewEqsignStripper(r io.Reader) io.Reader
- func NewOLEStorageReader(ctx context.Context, r io.Reader) (io.ReadCloser, error)
- func NewQuoPriDecoder(r io.Reader) io.Reader
- func NewScannerReader(s *bufio.Scanner) io.Reader
- func NewTextReader(ctx context.Context, r io.Reader, charset string) io.Reader
- func NewULID() ulid.ULID
- func OfficeToPdf(ctx context.Context, destfn string, r io.Reader, contentType string) error
- func OutlookToEML(ctx context.Context, destfn string, r io.Reader, contentType string) error
- func PdfClean(ctx context.Context, fn string) (err error)
- func PdfDumpFdf(ctx context.Context, destfn, inpfn string) error
- func PdfDumpFields(ctx context.Context, inpfn string) ([]string, error)
- func PdfFillFdf(ctx context.Context, destfn, inpfn string, values map[string]string) error
- func PdfMerge(ctx context.Context, destfn string, filenames ...string) error
- func PdfPageNum(ctx context.Context, srcfn string) (numberofpages int, err error)
- func PdfRewrite(ctx context.Context, destfn, srcfn string) error
- func PdfSplit(ctx context.Context, srcfn string, pages []uint16) (filenames []string, cleanup func() error, err error)
- func PdfToImage(ctx context.Context, w io.Writer, r io.Reader, contentType, size string) error
- func PdfToImageCairo(ctx context.Context, w io.Writer, r io.Reader, contentType, size string) error
- func PdfToImageGm(ctx context.Context, w io.Writer, r io.Reader, contentType, size string) error
- func PdfToImageMulti(ctx context.Context, sfiles []string, imgmime, imgsize string) (imgfilenames []string, err error)
- func PdfToPdf(ctx context.Context, destfn string, r io.Reader, _ string) error
- func PdfToPs(ctx context.Context, destfn, srcfn string) error
- func PngToImage(ctx context.Context, w io.Writer, imgtyp string, r io.Reader) error
- func PrependHeaderFilter(ctx context.Context, inch <-chan i18nmail.MailPart, ...)
- func PsToPdf(ctx context.Context, destfn, srcfn string) error
- func SaveOriHTMLFilter(ctx context.Context, inch <-chan i18nmail.MailPart, ...)
- func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error)
- func SetLogger(lgr *slog.Logger)
- func SetRequestID(ctx context.Context, reqID string) context.Context
- func SetupFilters(ctx context.Context, inch <-chan i18nmail.MailPart, ...) <-chan i18nmail.MailPart
- func Skip(ctx context.Context, destfn string, r io.Reader, contentType string) error
- func SlurpMail(ctx context.Context, partch chan<- i18nmail.MailPart, errch chan<- error, ...)
- func TextDecodeFilter(ctx context.Context, inch <-chan i18nmail.MailPart, ...)
- func TextToPdf(ctx context.Context, destfn string, r io.Reader, contentType string) error
- func ZipFiles(dest io.Writer, skipOnError, unsafeArchFn bool, files ...ArchFileItem) (err error)
- func ZipTree(dest io.Writer, root string, skipOnError, unsafeArchFn bool) (err error)
- type ArchFileItem
- type ArchItems
- type Converter
- type FieldSetter
- type FileLike
- type FileMIMEDetector
- type FilterFunc
- type Gotenberg
- type HTTPMIMEDetector
- type MIMEDetector
- type MagicMIMEDetector
- type MultiMIMEDetector
- type PortLock
- type RateLimiter
- type ReadCloserFileLike
- type ScannerReader
- type Statter
- type Token
- type VasileMIMEDetector
Constants ¶
const DefaultMaxSubprocMemoryBytes = 2 << 30 // 2GiB
const ErrTextFn = "ZZZ-errors.txt"
name of errors list in resulting archive
const LofficeLockPort = 27999
port for LibreOffice locking (only one instance should be running)
Variables ¶
var ( // ConfPdftk is the path for PdfTk ConfPdftk = config.String("pdftk", lookPath("pdftk")) // ConfPdfseparate is the path for pdfseparate (member of poppler-utils ConfPdfseparate = config.String("pdfseparate", "pdfseparate") // ConfLoffice is the path for LibreOffice ConfLoffice = config.String("loffice", lookPath("loffice")) // ConfGm is the path for GraphicsMagick ConfGm = config.String("gm", lookPath("gm")) // ConfGs is the path for GhostScript ConfGs = config.String("gs", lookPath("gs")) // ConfPdfClean is the path for pdfclean ConfPdfClean = config.String("pdfclean", lookPath("pdfclean")) // ConfMutool is the path for mutool ConfMutool = config.String("mutool", lookPath("mutool")) // ConvWkhtmltopdf is the parth for wkhtmltopdf ConfWkhtmltopdf = config.String("wkhtmltopdf", lookPath("wkhtmltopdf")) // ConfSortBeforeMerge should be true if generally we should sort files by filename before merge ConfSortBeforeMerge = config.Bool("sortBeforeMerge", false) // ConfChildTimeout is the time before the child gets killed ConfChildTimeout = config.Duration("childTimeout", 10*time.Minute) // ConfLofficeTimeout is the time before LibreOffice gets killed. ConfLofficeTimeout = config.Duration("lofficeTimeout", time.Minute) // ConcLimit limits the concurrently running child processes ConcLimit = NewRateLimiter(Concurrency) // ConfWorkdir is the working directory (will be os.TempDir() if empty) ConfWorkdir = config.String("workdir", "") // ConfListenAddr is a listen address for HTTP requests ConfListenAddr = config.String("listen", ":9500") // ConfDefaultIsService decides whether start as service without args ConfDefaultIsService = config.Bool("defaultIsService", false) // ConfUseLofficePortLock defines whether to limit Loffice usage by a port lock ConfLofficeUsePortLock = config.Bool("lofficeUsePortLock", !osgroup.IsInsideDocker()) // ConfLogFile specifies the file to log - instead of command line. ConfLogFile = config.String("logfile", "") // ConfKeepRemoteImage specifiec whether to keep the remote sources of images (mg src="http://mailtrack..."). ConfKeepRemoteImage = config.Bool("keepRemoteImage", false) // ConfGotenbertURL is the working Gotenbert (https://pkg.go.dev/github.com/gotenberg/gotenberg/v7) service URL ConfGotenbergURL = &gotenberg.URL // ConfMaxSubprocMemoryBytes is the limit for subprocess' memory. ConfMaxSubprocMemoryBytes = config.Uint64("max-subproc-mem-bytes", DefaultMaxSubprocMemoryBytes) )
var Cache *filecache.Cache
var Concurrency = int(8)
Concurrency is the default concurrent goroutines number
var DefaultMIMEDetector = MIMEDetector(MultiMIMEDetector{Detectors: []MIMEDetector{ FileMIMEDetector{}, HTTPMIMEDetector{}, VasileMIMEDetector{}, MagicMIMEDetector{}, }})
var ErrBadPDF = errors.New("bad pdf")
var ErrPasswordProtected = errors.New("password protected")
var ErrSkip = errors.New("skip this part")
var Exec procRunner
var ExtContentType = map[string]string{
"doc": "application/vnd.ms-word",
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"dotx": "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
"xls": "application/vnd.ms-excel",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"xltx": "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
"ppt": "application/vnd.ms-powerpoint",
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
"ppsx": "application/vnd.openxmlformats-officedocument.presentationml.slideshow",
"potx": "application/vnd.openxmlformats-officedocument.presentationml.template",
"odg": "application/vnd.oasis.opendocument.graphics",
"otg": "application/vnd.oasis.opendocument.graphics-template",
"otp": "application/vnd.oasis.opendocument.presentation-template",
"odp": "application/vnd.oasis.opendocument.presentation",
"odm": "application/vnd.oasis.opendocument.text-master",
"odt": "application/vnd.oasis.opendocument.text",
"oth": "application/vnd.oasis.opendocument.text-web",
"ott": "application/vnd.oasis.opendocument.text-template",
"ods": "application/vnd.oasis.spreadsheet",
"ots": "application/vnd.oasis.spreadsheet-template",
"odc": "application/vnd.oasis.chart",
"odf": "application/vnd.oasis.formula",
"odb": "application/vnd.oasis.database",
"odi": "application/vnd.oasis.image",
"txt": textPlain,
"msg": mimeOutlook,
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"gif": "image/gif",
"png": "image/png",
"tif": "image/tif",
"tiff": "image/tiff",
}
file extension -> content-type map
var Filters = make([]FilterFunc, 0, 6)
Filters is the filter pipeline - order is application order
var LeaveTempFiles = false
LeaveTempFiles should be true only for debugging purposes (leaves temp files)
var OtherToPdf = OfficeToPdf
OtherToPdf is the default converter
var PrependHeaders = []string{"From", "To", "Cc", "Subject", "Date"}
PrependHeaders are the headers which should be prepended to the printed mail
var SaveOriginalHTML = false
save original html (do not delete it)
var Workdir = os.TempDir()
Workdir is the main working directory
Functions ¶
func ExtractingFilter ¶
func ExtractingFilter(ctx context.Context, inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, files chan<- ArchFileItem, errch chan<- error, )
ExtractingFilter is a filter for the mail pipeline which extracts archives
func FixContentType ¶
FixContentType ensures proper content-type (uses magic for "" and application/octet-stream)
func GetRequestID ¶ added in v0.2.0
func HTMLPartFilter ¶
func HTMLPartFilter(ctx context.Context, inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, files chan<- ArchFileItem, errch chan<- error, )
HTMLPartFilter reads multipart/alternative (text/plain + text/html), preferring the html part + groups the multipart/related images which are referred in the html.
multipart/related encapsulates multipart/alternative, which contains text/plain and text/html, the related part contains images, too - at least usually.
func ImageToPdf ¶
ImageToPdf convert image (image/...) to PDF
func ImageToPdfGm ¶
ImageToPdfGm converts image to PDF using GraphicsMagick
func ImageToPdfPdfCPU ¶ added in v0.2.0
ImageToPdfPdfCPU converts image to PDF using pdfcpu
func LoadConfig ¶
LoadConfig loads TOML config file
func MPRelatedToPdf ¶
MPRelatedToPdf converts multipart/related to PDF
func MailToPdfZip ¶
func MailToSplittedPdfZip ¶
func MailToSplittedPdfZip(ctx context.Context, destfn string, body io.Reader, contentType string, split bool, imgmime, imgsize string, pages []uint16, ) error
MailToSplittedPdfZip converts mail to ZIP of PDFs and images
func MailToTree ¶
MailToTree writes mail parts as files starting at outdir as root, trying to reimplement the mime hierarchy in the directory hierarchy
func NewB64QuoPriDecoder ¶
NewB64QuoPriDecoder replaces bork encoding (+base64-)
func NewCidMapper ¶
NewCidMapper remaps Content-Id urls to ContentDir/filename and returns the map
func NewEqsignStripper ¶
NewEqsignStripper returns a reader which strips equal signs from line endings
func NewOLEStorageReader ¶
NewOLEStorageReader converts Outlook .msg files to .eml RFC822 email files. For this it uses perl Email::Outlook::Message (thanks, @matijs), and returns an io.Reader with the converted data.
This calls out to perl, and needs Email::Outlook::Message (can be installed with `cpan -i Email::Outlook::Message`).
func NewQuoPriDecoder ¶
NewQuoPriDecoder replaces =A0= with \n
func NewScannerReader ¶
NewScannerReader turns a bufio.Scanner to an io.Reader
func NewTextReader ¶
NewTextReader wraps a reader with a proper charset converter
func OfficeToPdf ¶
OfficeToPdf converts other to PDF with LibreOffice
func OutlookToEML ¶ added in v0.0.3
func PdfDumpFdf ¶
PdfDumpFdf dumps the FDF from the given PDF.
func PdfDumpFields ¶
PdfDumpFields dumps the field names from the given PDF.
func PdfFillFdf ¶
PdfFillFdf fills the FDF and generates PDF.
func PdfPageNum ¶
PdfPageNum returns the number of pages
func PdfRewrite ¶
PdfRewrite converts PDF to PDF (rewrites as PDF->PS->PDF)
func PdfSplit ¶
func PdfSplit(ctx context.Context, srcfn string, pages []uint16) (filenames []string, cleanup func() error, err error)
PdfSplit splits pdf to pages, returns those filenames
func PdfToImage ¶
PdfToImage converts PDF to image using PdfToImageGm if available and the result is OK, then PdfToImageCairo.
func PdfToImageCairo ¶
PdfToImageCairo converts PDF to image using pdftocairo from poppler-utils.
func PdfToImageGm ¶
PdfToImageGm converts PDF to image using GraphicsMagick.
func PdfToImageMulti ¶
func PdfToImageMulti(ctx context.Context, sfiles []string, imgmime, imgsize string) (imgfilenames []string, err error)
PdfToImageMulti converts PDF pages to images, using parallel threads
func PngToImage ¶ added in v0.2.0
func PrependHeaderFilter ¶
func PrependHeaderFilter(ctx context.Context, inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, files chan<- ArchFileItem, errch chan<- error, )
PrependHeaderFilter writes Subject, From... headers at the beginning of the html/plain parts.
func SaveOriHTMLFilter ¶
func SaveOriHTMLFilter(ctx context.Context, inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, files chan<- ArchFileItem, errch chan<- error, )
SaveOriHTMLFilter reads text/html and saves it.
func ScanLines ¶
ScanLines is a split function for a Scanner that returns each line of text, unmodified. The returned line may be empty. The end-of-line marker is one optional carriage return followed by one mandatory newline. In regular expression notation, it is `\r?\n`. The last non-empty line of input will be returned even if it has no newline.
func SetRequestID ¶ added in v0.2.0
func SetupFilters ¶
func SetupFilters( ctx context.Context, inch <-chan i18nmail.MailPart, resultch chan<- ArchFileItem, errch chan<- error, ) <-chan i18nmail.MailPart
SetupFilters applies filters on parts received on inch, and returns them on outch
func SlurpMail ¶
func SlurpMail(ctx context.Context, partch chan<- i18nmail.MailPart, errch chan<- error, body io.Reader, contentType string)
SlurpMail splits mail to parts, returns parts and/or error on the given channels
func TextDecodeFilter ¶
func TextDecodeFilter(ctx context.Context, inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, files chan<- ArchFileItem, errch chan<- error, )
TextDecodeFilter writes Subject, From... headers at the beginning of the html/plain parts.
Types ¶
type ArchFileItem ¶
type ArchFileItem struct { File FileLike //opened file handle Error error //error Filename string //name of the file Archive string //name in the archive }
ArchFileItem groups an archive item
func MailToPdfFiles ¶
func MailToPdfFiles(ctx context.Context, r io.Reader, contentType string) (files []ArchFileItem, err error)
MailToPdfFiles converts email to PDF files all mail part goes through all filter in Filters, in reverse order (last first)
func (ArchFileItem) ArchiveName ¶
func (a ArchFileItem) ArchiveName() string
ArchiveName returns the archive name - Archive, Filename if set, otherwise File's name
type ArchItems ¶
type ArchItems []ArchFileItem
ArchItems is a wrapper for []ArchFileItem for sort.Sort
type Converter ¶
Converter converts to Pdf (destination filename, source reader and source content-type)
func GetConverter ¶
GetConverter gets converter for the content-type
func NewTextConverter ¶
NewTextConverter converts encoded text to pdf - by decoding it
type FieldSetter ¶
type FileMIMEDetector ¶ added in v0.0.3
type FileMIMEDetector struct{}
func (FileMIMEDetector) Match ¶ added in v0.0.3
func (d FileMIMEDetector) Match(b []byte) string
type FilterFunc ¶
type FilterFunc func(context.Context, <-chan i18nmail.MailPart, chan<- i18nmail.MailPart, chan<- ArchFileItem, chan<- error)
FilterFunc is the type for the pipeline filter function must close out channel on finish!
type Gotenberg ¶ added in v0.2.0
func (*Gotenberg) PostFileNames ¶ added in v0.2.0
type HTTPMIMEDetector ¶ added in v0.0.3
type HTTPMIMEDetector struct{}
func (HTTPMIMEDetector) Match ¶ added in v0.0.3
func (d HTTPMIMEDetector) Match(b []byte) string
type MIMEDetector ¶ added in v0.0.3
type MagicMIMEDetector ¶ added in v0.2.4
type MagicMIMEDetector struct{}
func (MagicMIMEDetector) Match ¶ added in v0.2.4
func (d MagicMIMEDetector) Match(b []byte) string
type MultiMIMEDetector ¶ added in v0.0.3
type MultiMIMEDetector struct { Detectors []MIMEDetector Parallel bool }
func (MultiMIMEDetector) Match ¶ added in v0.0.3
func (d MultiMIMEDetector) Match(b []byte) string
type PortLock ¶
type PortLock struct {
// contains filtered or unexported fields
}
PortLock is a locker which locks by binding to a port on the loopback IPv4 interface
type RateLimiter ¶
type RateLimiter interface { //Acquire acquires a token (blocks if none accessible) Acquire() Token //Release releases the token Release(Token) }
RateLimiter is the interface for rate limiting
type ReadCloserFileLike ¶
type ScannerReader ¶
type ScannerReader struct {
// contains filtered or unexported fields
}
ScannerReader uses a bufio.Scanner as an io.Reader
type VasileMIMEDetector ¶ added in v0.0.3
type VasileMIMEDetector struct{}
func (VasileMIMEDetector) Match ¶ added in v0.0.3
func (d VasileMIMEDetector) Match(b []byte) string