Documentation ¶
Index ¶
- Constants
- func ConvertDoc(r io.Reader) (string, map[string]string, error)
- func ConvertDocx(r io.Reader) (string, string, string, map[string]string, error)
- func ConvertGB2312toUTF8(meta map[string]string) (map[string]string, error)
- func ConvertHtml(reader io.Reader) (pageText string, err error)
- func ConvertOds(file io.ReaderAt, size int64, writer io.Writer, limit int64) (written int64, err error)
- func ConvertOdt(r io.Reader) (string, map[string]string, error)
- func ConvertPDF(path string) (string, map[string]string, error)
- func ConvertPDF_O(path string) (BodyResult, MetaResult, error)
- func ConvertPpt(r io.Reader) (string, map[string]string, error)
- func ConvertPptx(r io.Reader) (string, map[string]string, error)
- func ConvertRTF(fileName string) (string, map[string]string, error)
- func ConvertXML(r io.Reader) (string, map[string]string, error)
- func ConvertXlsO(reader io.ReadSeeker, writer io.Writer, size int64) (written int64, err error)
- func ConvertXlsx(file io.ReaderAt, size int64, writer io.Writer, limit int64, rowLimit int) (written int64, err error)
- func DocxXMLToText(r io.Reader) (string, error)
- func GenSection(sep string) string
- func GetOffice2k3Meta(r io.Reader) (map[string]string, error)
- func GetXlsxText(r io.Reader) (string, string, string, map[string]string, error)
- func HTML2TextAndLinks(reader io.Reader, baseURL string) (pageText string, links []string, err error)
- func IsFileXLS(data []byte) bool
- func IsFileXLSX(data []byte) bool
- func ODS2Cells(file io.ReaderAt, size int64) (cells []string, err error)
- func PrintFileText(header string, footer string, body string, meta map[string]string) bool
- func Tidy(r io.Reader, xmlIn bool) ([]byte, error)
- func XLS2Cells(reader io.ReadSeeker) (cells []string, err error)
- func XLSX2Cells(file io.ReaderAt, size int64, rowLimit int) (cells []string, err error)
- func XMLToMap(r io.Reader) (map[string]string, error)
- func XMLToText(r io.Reader, breaks []string, skip []string, strict bool) (string, error)
- type BodyResult
- type LocalFile
- type MetaResult
Constants ¶
const SizeLimit = 20 * 1024 * 1024
Variables ¶
This section is empty.
Functions ¶
func ConvertDoc ¶
ConvertDoc converts an MS Word .doc to text.
func ConvertDocx ¶
ConvertDocx converts an MS Word docx file to text: header, footer, body, meta
func ConvertGB2312toUTF8 ¶
func ConvertOds ¶
func ConvertPDF_O ¶
func ConvertPDF_O(path string) (BodyResult, MetaResult, error)
func ConvertPpt ¶
ConvertDoc converts an MS Word .doc to text.
func ConvertPptx ¶
ConvertPptx converts an MS PowerPoint pptx file to text.
func ConvertRTF ¶
ConvertRTF converts RTF files to text.
func ConvertXML ¶
ConvertXML converts an XML file to text.
func ConvertXlsO ¶
func ConvertXlsx ¶
func ConvertXlsx(file io.ReaderAt, size int64, writer io.Writer, limit int64, rowLimit int) (written int64, err error)
XLSX2Text extracts text of an Excel sheet Size is the full size of the input file. Limit is the output limit in bytes. rowLimit defines how many rows per sheet to extract. -1 means unlimited. This exists as protection against some XLSX files that may use excessive amount of memory.
func DocxXMLToText ¶
DocxXMLToText converts Docx XML into plain text.
func GenSection ¶
https://www.fileformat.info/info/unicode/char/3000/index.htm UTF-8 (hex) 0xE3 0x80 0x80 (e38080)
func HTML2TextAndLinks ¶
func HTML2TextAndLinks(reader io.Reader, baseURL string) (pageText string, links []string, err error)
HTML2TextAndLinks extracts the text from the HTML and all links from <a> and <img> tags of a HTML If the base URL is provided, relative links will be converted to absolute ones.
func IsFileXLS ¶
IsFileXLS checks if the data indicates a XLS file XLS has a signature of D0 CF 11 E0 A1 B1 1A E1
func IsFileXLSX ¶
IsFileXLSX checks if the data indicates a XLSX file XLSX has a signature of 50 4B 03 04 Warning: This collides with ZIP, DOCX and other zip-based files.
func ODS2Cells ¶
ODS2Cells converts an ODS file to individual cells Size is the full size of the input file.
func PrintFileText ¶
func XLS2Cells ¶
func XLS2Cells(reader io.ReadSeeker) (cells []string, err error)
XLS2Cells converts an XLS file to individual cells
func XLSX2Cells ¶
XLSX2Cells converts an XLSX file to individual cells Size is the full size of the input file. rowLimit defines how many rows per sheet to extract. -1 means unlimited. This exists as protection against some XLSX files that may use excessive amount of memory.
Types ¶
type BodyResult ¶
type BodyResult struct {
// contains filtered or unexported fields
}
type LocalFile ¶
wraps an *os.File
func NewLocalFile ¶
NewLocalFile ensures that there is a file which contains the data provided by r. If r is actually an instance of *os.File then this file is used, otherwise a temporary file is created and the data from r copied into it. Callers must call Done() when the LocalFile is no longer needed to ensure all resources are cleaned up.
type MetaResult ¶
type MetaResult struct {
// contains filtered or unexported fields
}
Source Files ¶
Directories ¶
Path | Synopsis |
---|---|
This package implements rudimentary support for reading Open Document Spreadsheet files.
|
This package implements rudimentary support for reading Open Document Spreadsheet files. |
xls package use to parse the 97 -2004 microsoft xls file(".xls" suffix, NOT ".xlsx" suffix )
|
xls package use to parse the 97 -2004 microsoft xls file(".xls" suffix, NOT ".xlsx" suffix ) |