Documentation
¶
Overview ¶
Package converter implements function for converting files to PDF
Index ¶
- Constants
- Variables
- func ExtractingFilter(inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, ...)
- func FixContentType(body []byte, contentType, fileName string) string
- func GetCidMap(text []byte, subDir string) ([]byte, map[string]string, error)
- func HTMLPartFilter(inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, ...)
- func HTMLToPdf(destfn string, r io.Reader, contentType string, ctx *Context) error
- func ImageToPdf(destfn string, r io.Reader, contentType string, ctx *Context) error
- func ImageToPdfGm(w io.Writer, r io.Reader, contentType string) error
- func LoadConfig(fn string) error
- func MPRelatedToPdf(destfn string, r io.Reader, contentType string, ctx *Context) error
- func MailToPdfZip(destfn string, body io.Reader, contentType string, ctx *Context) error
- func MailToSplittedPdfZip(destfn string, body io.Reader, contentType string, split bool, ...) error
- func MailToTree(outdir string, r io.Reader) error
- func MailToZip(destfn string, body io.Reader, contentType string, ctx *Context) error
- func NewB64QuoPriDecoder(r io.Reader) io.Reader
- func NewCidMapper(cids map[string]string, subDir string, r io.Reader) io.Reader
- func NewEqsignStripper(r io.Reader) io.Reader
- func NewOLEStorageReader(r io.Reader) (io.ReadCloser, error)
- func NewQuoPriDecoder(r io.Reader) io.Reader
- func NewScannerReader(s *bufio.Scanner) io.Reader
- func NewTextReader(r io.Reader, charset string) io.Reader
- func OfficeToPdf(destfn string, r io.Reader, contentType string, ctx *Context) error
- func PdfClean(fn string) (err error)
- func PdfDumpFdf(destfn, inpfn string) error
- func PdfDumpFields(inpfn string) ([]string, error)
- func PdfFillFdf(destfn, inpfn string, values map[string]string) error
- func PdfMerge(ctx context.Context, destfn string, filenames ...string) error
- func PdfPageNum(srcfn string) (numberofpages int, err error)
- func PdfRewrite(destfn, srcfn string) error
- func PdfSplit(srcfn string) (filenames []string, err error)
- func PdfToImage(w io.Writer, r io.Reader, contentType, size string) error
- func PdfToImageCairo(w io.Writer, r io.Reader, contentType, size string) error
- func PdfToImageGm(w io.Writer, r io.Reader, contentType, size string) error
- func PdfToImageMulti(sfiles []string, imgmime, imgsize string) (imgfilenames []string, err error)
- func PdfToPdf(destfn string, r io.Reader, _ string, ctx *Context) error
- func PdfToPs(destfn, srcfn string) error
- func PrependHeaderFilter(inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, ...)
- func PsToPdf(destfn, srcfn string) error
- func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error)
- func SetupFilters(inch <-chan i18nmail.MailPart, resultch chan<- ArchFileItem, ...) <-chan i18nmail.MailPart
- func SlurpMail(partch chan<- i18nmail.MailPart, errch chan<- error, body io.Reader)
- func TextDecodeFilter(inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, ...)
- func TextToPdf(destfn string, r io.Reader, contentType string, ctx *Context) error
- func ZipFiles(dest io.Writer, skipOnError, unsafeArchFn bool, files ...ArchFileItem) (err error)
- func ZipTree(dest io.Writer, root string, skipOnError, unsafeArchFn bool) (err error)
- type ArchFileItem
- type ArchItems
- type Context
- type Converter
- type FieldSetter
- type FileLike
- type FilterFunc
- type PortLock
- type RateLimiter
- type ScannerReader
- type Token
Constants ¶
const ErrTextFn = "ZZZ-errors.txt"
name of errors list in resulting archive
const LofficeLockPort = 27999
port for LibreOffice locking (only one instance should be running)
const SaveOriginalHTML = false
save original html (do not delete it)
Variables ¶
var ( // ConfPdftk is the path for PdfTk ConfPdftk = config.String("pdftk", "pdftk") // ConfPdfseparate is the path for pdfseparate (member of poppler-utils ConfPdfseparate = config.String("pdfseparate", "pdfseparate") // ConfLoffice is the path for LibreOffice ConfLoffice = config.String("loffice", "loffice") // ConfGm is the path for GraphicsMagick ConfGm = config.String("gm", "gm") // ConfGs is the path for GhostScript ConfGs = config.String("gs", "gs") // ConfPdfClean is the path for pdfclean ConfPdfClean = config.String("pdfclean", "pdfclean") // ConfMutool is the path for mutool ConfMutool = config.String("mutool", "mutool") // ConfSortBeforeMerge should be true if generally we should sort files by filename before merge ConfSortBeforeMerge = config.Bool("sortBeforeMerge", false) // ConfChildTimeout is the time before the child gets killed ConfChildTimeout = config.Duration("childTimeout", 1*time.Hour) // ConcLimit limits the concurrently running child processes ConcLimit = NewRateLimiter(Concurrency) // ConfWorkdir is the working directory (will be os.TempDir() if empty) ConfWorkdir = config.String("workdir", "") // ConfListenAddr is a listen address for HTTP requests ConfListenAddr = config.String("listen", ":9500") // ConfDefaultIsService decides whether start as service without args ConfDefaultIsService = config.Bool("defaultIsService", false) )
var Concurrency = int(8)
Concurrency is the default concurrent goroutines number
var ExtContentType = map[string]string{
"doc": "application/vnd.ms-word",
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"dotx": "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
"xls": "application/vnd.ms-excel",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"xltx": "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
"ppt": "application/vnd.ms-powerpoint",
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
"ppsx": "application/vnd.openxmlformats-officedocument.presentationml.slideshow",
"potx": "application/vnd.openxmlformats-officedocument.presentationml.template",
"odg": "application/vnd.oasis.opendocument.graphics",
"otg": "application/vnd.oasis.opendocument.graphics-template",
"otp": "application/vnd.oasis.opendocument.presentation-template",
"odp": "application/vnd.oasis.opendocument.presentation",
"odm": "application/vnd.oasis.opendocument.text-master",
"odt": "application/vnd.oasis.opendocument.text",
"oth": "application/vnd.oasis.opendocument.text-web",
"ott": "application/vnd.oasis.opendocument.text-template",
"ods": "application/vnd.oasis.spreadsheet",
"ots": "application/vnd.oasis.spreadsheet-template",
"odc": "application/vnd.oasis.chart",
"odf": "application/vnd.oasis.formula",
"odb": "application/vnd.oasis.database",
"odi": "application/vnd.oasis.image",
"txt": "text/plain",
"msg": "application/x-ole-storage",
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"gif": "image/gif",
"png": "image/png",
}
file extension -> content-type map
var Filters = make([]FilterFunc, 0, 4)
Filters is the filter pipeline - order is application order
var LeaveTempFiles = false
LeaveTempFiles should be true only for debugging purposes (leaves temp files)
var Logger = new(log.SwapLogger)
Swappable logger,
var OtherToPdf = OfficeToPdf
OtherToPdf is the default converter
var PrependHeaders = []string{"From", "To", "Cc", "Subject", "Date"}
PrependHeaders are the headers which should be prepended to the printed mail
var Workdir = os.TempDir()
Workdir is the main working directory
Functions ¶
func ExtractingFilter ¶
func ExtractingFilter(inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, files chan<- ArchFileItem, errch chan<- error, ctx *Context)
ExtractingFilter is a filter for the mail pipeline which extracts archives
func FixContentType ¶
FixContentType ensures proper content-type (uses mimemagic for "" and application/octet-stream)
func HTMLPartFilter ¶
func HTMLPartFilter(inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, files chan<- ArchFileItem, errch chan<- error, ctx *Context)
HTMLPartFilter reads multipart/alternative (text/plain + text/html), preferring the html part + groups the multipart/related images which are referred in the html.
multipart/related encapsulates multipart/alternative, which contains text/plain and text/html, the related part contains images, too - at least usually.
func ImageToPdf ¶
ImageToPdf convert image (image/...) to PDF
func ImageToPdfGm ¶
ImageToPdfGm converts image to PDF using GraphicsMagick
func MPRelatedToPdf ¶
MPRelatedToPdf converts multipart/related to PDF
func MailToPdfZip ¶
MailToPdfZip converts mail to ZIP of PDFs
func MailToSplittedPdfZip ¶
func MailToSplittedPdfZip(destfn string, body io.Reader, contentType string, split bool, imgmime, imgsize string, ctx *Context) error
MailToSplittedPdfZip converts mail to ZIP of PDFs and images
func MailToTree ¶
MailToTree writes mail parts as files starting at outdir as root, trying to reimplement the mime hierarchy in the directory hierarchy
func NewB64QuoPriDecoder ¶
NewB64QuoPriDecoder replaces bork encoding (+base64-)
func NewCidMapper ¶
NewCidMapper remaps Content-Id urls to ContentDir/filename and returns the map
func NewEqsignStripper ¶
NewEqsignStripper returns a reader which strips equal signs from line endings
func NewOLEStorageReader ¶
func NewOLEStorageReader(r io.Reader) (io.ReadCloser, error)
NewOLEStorageReader converts Outlook .msg files to .eml RFC822 email files. For this it uses perl Email::Outlook::Message (thanks, @matijs), and returns an io.Reader with the converted data.
This calls out to perl, and needs Email::Outlook::Message (can be installed with `cpan -i Email::Outlook::Message`).
func NewQuoPriDecoder ¶
NewQuoPriDecoder replaces =A0= with \n
func NewScannerReader ¶
NewScannerReader turns a bufio.Scanner to an io.Reader
func NewTextReader ¶
NewTextReader wraps a reader with a proper charset converter
func OfficeToPdf ¶
OfficeToPdf converts other to PDF with LibreOffice
func PdfDumpFdf ¶
PdfDumpFdf dumps the FDF from the given PDF.
func PdfDumpFields ¶
PdfDumpFields dumps the field names from the given PDF.
func PdfFillFdf ¶
PdfFillFdf fills the FDF and generates PDF.
func PdfPageNum ¶
PdfPageNum returns the number of pages
func PdfRewrite ¶
PdfRewrite converts PDF to PDF (rewrites as PDF->PS->PDF)
func PdfToImage ¶
PdfToImage converts PDF to image using PdfToImageGm if available and the result is OK, then PdfToImageCairo.
func PdfToImageCairo ¶
PdfToImageCairo converts PDF to image using pdftocairo from poppler-utils.
func PdfToImageGm ¶
PdfToImageGm converts PDF to image using GraphicsMagick.
func PdfToImageMulti ¶
PdfToImageMulti converts PDF pages to images, using parallel threads
func PrependHeaderFilter ¶
func PrependHeaderFilter(inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, files chan<- ArchFileItem, errch chan<- error, ctx *Context, )
PrependHeaderFilter writes Subject, From... headers at the beginning of the html/plain parts.
func ScanLines ¶
ScanLines is a split function for a Scanner that returns each line of text, unmodified. The returned line may be empty. The end-of-line marker is one optional carriage return followed by one mandatory newline. In regular expression notation, it is `\r?\n`. The last non-empty line of input will be returned even if it has no newline.
func SetupFilters ¶
func SetupFilters(inch <-chan i18nmail.MailPart, resultch chan<- ArchFileItem, errch chan<- error, ctx *Context, ) <-chan i18nmail.MailPart
SetupFilters applies filters on parts received on inch, and returns them on outch
func TextDecodeFilter ¶
func TextDecodeFilter(inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart, files chan<- ArchFileItem, errch chan<- error, ctx *Context)
TextDecodeFilter writes Subject, From... headers at the beginning of the html/plain parts.
Types ¶
type ArchFileItem ¶
type ArchFileItem struct { File FileLike //opened file handle Filename string //name of the file Archive string //name in the archive Error error //error }
ArchFileItem groups an archive item
func MailToPdfFiles ¶
func MailToPdfFiles(r io.Reader, ctx *Context) (files []ArchFileItem, err error)
MailToPdfFiles converts email to PDF files all mail part goes through all filter in Filters, in reverse order (last first)
func (ArchFileItem) ArchiveName ¶
func (a ArchFileItem) ArchiveName() string
ArchiveName returns the archive name - Archive, Filename if set, otherwise File's name
type ArchItems ¶
type ArchItems []ArchFileItem
ArchItems is a wrapper for []ArchFileItem for sort.Sort
type Context ¶
type Context struct {
Dir string //base workdir
}
Context contains the execution context (workdir, atm)
type Converter ¶
Converter converts to Pdf (destination filename, source reader and source content-type)
func GetConverter ¶
GetConverter gets converter for the content-type
func NewTextConverter ¶
NewTextConverter converts encoded text to pdf - by decoding it
type FieldSetter ¶
type FilterFunc ¶
type FilterFunc func(<-chan i18nmail.MailPart, chan<- i18nmail.MailPart, chan<- ArchFileItem, chan<- error, *Context)
FilterFunc is the type for the pipeline filter function must close out channel on finish!
type PortLock ¶
type PortLock struct {
// contains filtered or unexported fields
}
PortLock is a locker which locks by binding to a port on the loopback IPv4 interface
type RateLimiter ¶
type RateLimiter interface { //Acquire acquires a token (blocks if none accessible) Acquire() Token //Release releases the token Release(Token) }
RateLimiter is the interface for rate limiting
type ScannerReader ¶
type ScannerReader struct {
// contains filtered or unexported fields
}
ScannerReader uses a bufio.Scanner as an io.Reader