Documentation ¶
Index ¶
- Variables
- type AnalyzeExpenseOutput
- type AnalyzeIDOutput
- type BoundingBox
- func (bb *BoundingBox) Area() float64
- func (bb *BoundingBox) Bottom() float64
- func (bb *BoundingBox) Height() float64
- func (bb *BoundingBox) HorizontalCenter() float64
- func (bb *BoundingBox) Intersection(other *BoundingBox) *BoundingBox
- func (bb *BoundingBox) Left() float64
- func (bb *BoundingBox) Right() float64
- func (bb *BoundingBox) String() string
- func (bb *BoundingBox) Top() float64
- func (bb *BoundingBox) VerticalCenter() float64
- func (bb *BoundingBox) Width() float64
- type BoundingBoxAccessor
- type Cell
- type CellAtOptions
- type Document
- func (d *Document) KeyValues() []*KeyValue
- func (d *Document) Lines() []*Line
- func (d *Document) Pages() []*Page
- func (d *Document) Signatures() []*Signature
- func (d *Document) Tables() []*Table
- func (d *Document) Text(optFns ...func(*TextLinearizationOptions)) string
- func (d *Document) Words() []*Word
- type DocumentAPIOutput
- type ExpenseDocument
- type ExpenseField
- type IdentityDocument
- type IdentityDocumentField
- func (idf *IdentityDocumentField) Confidence() float64
- func (idf *IdentityDocumentField) FieldType() IdentityDocumentFieldType
- func (idf *IdentityDocumentField) IsNormalized() bool
- func (idf *IdentityDocumentField) NormalizedValue() *NormalizedIdentityDocumentFieldValue
- func (idf *IdentityDocumentField) Value() string
- type IdentityDocumentFieldType
- type IdentityDocumentType
- type Key
- func (b *Key) BlockType() types.BlockType
- func (b *Key) BoundingBox() *BoundingBox
- func (b *Key) Confidence() float64
- func (b *Key) ID() string
- func (b *Key) PageNumber() int
- func (b *Key) Polygon() Polygon
- func (b *Key) Raw() types.Block
- func (k *Key) String() string
- func (k *Key) Text() string
- func (k *Key) Words() []*Word
- type KeyValue
- func (b *KeyValue) BlockType() types.BlockType
- func (kv *KeyValue) BoundingBox() *BoundingBox
- func (kv *KeyValue) Confidence() float64
- func (b *KeyValue) ID() string
- func (kv *KeyValue) Key() *Key
- func (b *KeyValue) PageNumber() int
- func (kv *KeyValue) Polygon() Polygon
- func (b *KeyValue) Raw() types.Block
- func (kv *KeyValue) String() string
- func (kv *KeyValue) Text(optFns ...func(*TextLinearizationOptions)) string
- func (kv *KeyValue) Value() *Value
- func (kv *KeyValue) Words() []*Word
- type Layout
- func (l *Layout) AddChildren(children ...LayoutChild)
- func (b *Layout) BlockType() types.BlockType
- func (b *Layout) BoundingBox() *BoundingBox
- func (b *Layout) Confidence() float64
- func (b *Layout) ID() string
- func (b *Layout) PageNumber() int
- func (b *Layout) Polygon() Polygon
- func (b *Layout) Raw() types.Block
- func (l *Layout) Text(optFns ...func(*TextLinearizationOptions)) string
- type LayoutChild
- type Line
- func (b *Line) BlockType() types.BlockType
- func (b *Line) BoundingBox() *BoundingBox
- func (b *Line) Confidence() float64
- func (b *Line) ID() string
- func (b *Line) PageNumber() int
- func (b *Line) Polygon() Polygon
- func (b *Line) Raw() types.Block
- func (l *Line) String() string
- func (l *Line) Text(_ ...func(*TextLinearizationOptions)) string
- func (l *Line) Words() []*Word
- type NormalizedIdentityDocumentFieldValue
- type Orientation
- type Page
- func (p *Page) AddLayouts(layouts ...*Layout)
- func (p *Page) Height() float64
- func (p *Page) ID() string
- func (p *Page) KeyValues() []*KeyValue
- func (p *Page) Layouts() []*Layout
- func (p *Page) Lines() []*Line
- func (p *Page) Number() int
- func (p *Page) Queries() []*Query
- func (p *Page) SearchValueByKey(key string) []*KeyValue
- func (p *Page) Signatures() []*Signature
- func (p *Page) Tables() []*Table
- func (p *Page) Text(optFns ...func(*TextLinearizationOptions)) string
- func (p *Page) Width() float64
- func (p *Page) Words() []*Word
- type Point
- type Polygon
- type Query
- type QueryResult
- func (b *QueryResult) BlockType() types.BlockType
- func (b *QueryResult) BoundingBox() *BoundingBox
- func (b *QueryResult) Confidence() float64
- func (b *QueryResult) ID() string
- func (b *QueryResult) PageNumber() int
- func (b *QueryResult) Polygon() Polygon
- func (b *QueryResult) Raw() types.Block
- func (qr *QueryResult) Text() string
- type RowCellsAtOptions
- type RowsOptions
- type SelectionElement
- func (b *SelectionElement) BlockType() types.BlockType
- func (b *SelectionElement) BoundingBox() *BoundingBox
- func (b *SelectionElement) Confidence() float64
- func (b *SelectionElement) ID() string
- func (se *SelectionElement) IsSelected() bool
- func (b *SelectionElement) PageNumber() int
- func (b *SelectionElement) Polygon() Polygon
- func (b *SelectionElement) Raw() types.Block
- func (se *SelectionElement) Status() types.SelectionStatus
- func (se *SelectionElement) String() string
- func (se *SelectionElement) Text(optFns ...func(*TextLinearizationOptions)) string
- type Signature
- func (b *Signature) BlockType() types.BlockType
- func (b *Signature) BoundingBox() *BoundingBox
- func (b *Signature) Confidence() float64
- func (b *Signature) ID() string
- func (b *Signature) PageNumber() int
- func (b *Signature) Polygon() Polygon
- func (b *Signature) Raw() types.Block
- func (s *Signature) Text(optFns ...func(*TextLinearizationOptions)) string
- func (s *Signature) Words() []*Word
- type Table
- func (b *Table) BlockType() types.BlockType
- func (b *Table) BoundingBox() *BoundingBox
- func (t *Table) CellAt(rowIndex, columnIndex int, optFns ...func(*CellAtOptions)) Cell
- func (b *Table) Confidence() float64
- func (b *Table) ID() string
- func (b *Table) PageNumber() int
- func (b *Table) Polygon() Polygon
- func (b *Table) Raw() types.Block
- func (t *Table) RowCellsAt(rowIndex int, optFns ...func(*RowCellsAtOptions)) []Cell
- func (t *Table) RowCount() int
- func (t *Table) Rows(optFns ...func(*RowsOptions)) []*TableRow
- func (t *Table) Text(optFns ...func(*TextLinearizationOptions)) string
- func (t *Table) Words() []*Word
- type TableCell
- func (c *TableCell) IsColumnHeader() bool
- func (c *TableCell) IsMerged() bool
- func (c *TableCell) IsTableFooter() bool
- func (c *TableCell) IsTableSectionTitle() bool
- func (c *TableCell) IsTableSummary() bool
- func (c *TableCell) IsTableTitle() bool
- func (tc *TableCell) SelectionElement() *SelectionElement
- func (tc *TableCell) Text(optFns ...func(*TextLinearizationOptions)) string
- func (tc *TableCell) Words() []*Word
- type TableFooter
- func (b *TableFooter) BlockType() types.BlockType
- func (b *TableFooter) BoundingBox() *BoundingBox
- func (b *TableFooter) Confidence() float64
- func (b *TableFooter) ID() string
- func (b *TableFooter) PageNumber() int
- func (b *TableFooter) Polygon() Polygon
- func (b *TableFooter) Raw() types.Block
- func (tf *TableFooter) Text(_ ...func(*TextLinearizationOptions)) string
- func (tf *TableFooter) Words() []*Word
- type TableMergedCell
- func (c *TableMergedCell) IsColumnHeader() bool
- func (c *TableMergedCell) IsMerged() bool
- func (c *TableMergedCell) IsTableFooter() bool
- func (c *TableMergedCell) IsTableSectionTitle() bool
- func (c *TableMergedCell) IsTableSummary() bool
- func (c *TableMergedCell) IsTableTitle() bool
- func (tmc *TableMergedCell) Text(_ ...func(*TextLinearizationOptions)) string
- func (tmc *TableMergedCell) Words() []*Word
- type TableRow
- type TableTitle
- func (b *TableTitle) BlockType() types.BlockType
- func (b *TableTitle) BoundingBox() *BoundingBox
- func (b *TableTitle) Confidence() float64
- func (b *TableTitle) ID() string
- func (b *TableTitle) PageNumber() int
- func (b *TableTitle) Polygon() Polygon
- func (b *TableTitle) Raw() types.Block
- func (tt *TableTitle) Text(_ ...func(*TextLinearizationOptions)) string
- func (tt *TableTitle) Words() []*Word
- type TextLinearizationOptions
- type Value
- func (b *Value) BlockType() types.BlockType
- func (b *Value) BoundingBox() *BoundingBox
- func (b *Value) Confidence() float64
- func (b *Value) ID() string
- func (b *Value) PageNumber() int
- func (b *Value) Polygon() Polygon
- func (b *Value) Raw() types.Block
- func (v *Value) SelectionElement() *SelectionElement
- func (v *Value) String() string
- func (v *Value) Text(optFns ...func(*TextLinearizationOptions)) string
- func (v *Value) Words() []*Word
- type Word
- func (b *Word) BlockType() types.BlockType
- func (b *Word) BoundingBox() *BoundingBox
- func (b *Word) Confidence() float64
- func (b *Word) ID() string
- func (w *Word) IsHandwriting() bool
- func (w *Word) IsPrinted() bool
- func (b *Word) PageNumber() int
- func (b *Word) Polygon() Polygon
- func (b *Word) Raw() types.Block
- func (w *Word) Text() string
- func (w *Word) TextType() types.TextType
Constants ¶
This section is empty.
Variables ¶
var DefaultLinerizationOptions = TextLinearizationOptions{ MaxNumberOfConsecutiveNewLines: 2, HideHeaderLayout: false, HideFooterLayout: false, HideFigureLayout: false, HidePageNumberLayout: false, PageNumberPrefix: "", PageNumberSuffix: "", SameParagraphSeparator: " ", LayoutElementSeparator: "\n\n", ListElementSeparator: "\n", ListLayoutPrefix: "", ListLayoutSuffix: "", ListElementPrefix: "", ListElementSuffix: "", RemoveNewLinesInListElements: true, TitlePrefix: "", TitleSuffix: "", TableLayoutPrefix: "\n\n", TableLayoutSuffix: "\n", TableLinearizationFormat: "plaintext", TableMinTableWords: 0, TableColumnSeparator: "\t", TablePrefix: "", TableSuffix: "", TableRowSeparator: "\n", TableRowPrefix: "", TableRowSuffix: "", TableCellPrefix: "", TableCellSuffix: "", SectionHeaderPrefix: "", SectionHeaderSuffix: "", KeyValueLayoutPrefix: "\n\n", KeyValueLayoutSuffix: "", KeyValuePrefix: "", KeyValueSuffix: "", KeyPrefix: "", KeySuffix: "", ValuePrefix: "", ValueSuffix: "", SelectionElementSelected: "[X]", SelectionElementNotSelected: "[ ]", HeuristicHTolerance: 0.3, HeuristicOverlapRatio: 0.5, SignatureToken: "[SIGNATURE]", }
Functions ¶
This section is empty.
Types ¶
type AnalyzeExpenseOutput ¶ added in v0.0.3
type AnalyzeExpenseOutput struct { DocumentMetadata *types.DocumentMetadata `json:"DocumentMetadata"` ExpenseDocuments []types.ExpenseDocument `json:"ExpenseDocuments"` }
AnalyzeExpenseOutput represents the output of the Textract Analyze Expense API.
type AnalyzeIDOutput ¶ added in v0.0.4
type AnalyzeIDOutput struct { DocumentMetadata *types.DocumentMetadata `json:"DocumentMetadata"` IdentityDocuments []types.IdentityDocument `json:"IdentityDocuments"` }
AnalyzeIDOutput represents the output of the Textract Analyze ID API.
type BoundingBox ¶
type BoundingBox struct {
// contains filtered or unexported fields
}
func NewEnclosingBoundingBox ¶ added in v0.0.4
func NewEnclosingBoundingBox[T BoundingBoxAccessor](accessors ...T) *BoundingBox
NewEnclosingBoundingBox returns a new bounding box that represents the union of multiple bounding boxes.
func (*BoundingBox) Area ¶ added in v0.0.4
func (bb *BoundingBox) Area() float64
Area calculates and returns the area of the bounding box. If either the width or height of the bounding box is less than zero, the area is considered zero to prevent negative area values.
func (*BoundingBox) Bottom ¶
func (bb *BoundingBox) Bottom() float64
Bottom returns the bottom coordinate of the bounding box.
func (*BoundingBox) Height ¶
func (bb *BoundingBox) Height() float64
func (*BoundingBox) HorizontalCenter ¶
func (bb *BoundingBox) HorizontalCenter() float64
HorizontalCenter returns the horizontal center coordinate of the bounding box.
func (*BoundingBox) Intersection ¶
func (bb *BoundingBox) Intersection(other *BoundingBox) *BoundingBox
Intersection returns a new bounding box that represents the intersection of two bounding boxes.
func (*BoundingBox) Left ¶
func (bb *BoundingBox) Left() float64
func (*BoundingBox) Right ¶
func (bb *BoundingBox) Right() float64
Right returns the right coordinate of the bounding box.
func (*BoundingBox) String ¶ added in v0.0.2
func (bb *BoundingBox) String() string
String returns a string representation of the bounding box.
func (*BoundingBox) Top ¶
func (bb *BoundingBox) Top() float64
func (*BoundingBox) VerticalCenter ¶
func (bb *BoundingBox) VerticalCenter() float64
VerticalCenter returns the vertical center coordinate of the bounding box.
func (*BoundingBox) Width ¶
func (bb *BoundingBox) Width() float64
type BoundingBoxAccessor ¶ added in v0.0.4
type BoundingBoxAccessor interface {
BoundingBox() *BoundingBox
}
type Cell ¶
type Cell interface { Words() []*Word Text(optFns ...func(*TextLinearizationOptions)) string Confidence() float64 IsColumnHeader() bool IsTableTitle() bool IsTableSummary() bool IsTableSectionTitle() bool IsMerged() bool }
Cell defines the interface for a table cell in Textract.
type CellAtOptions ¶ added in v0.0.7
type CellAtOptions struct {
IgnoreMergedCells bool
}
type Document ¶
type Document struct {
// contains filtered or unexported fields
}
func ParseDocumentAPIOutput ¶ added in v0.0.4
func ParseDocumentAPIOutput(output *DocumentAPIOutput) (*Document, error)
ParseDocumentAPIOutput parses the Textract Document API output into a Document.
func (*Document) Signatures ¶ added in v0.0.4
func (*Document) Text ¶ added in v0.0.4
func (d *Document) Text(optFns ...func(*TextLinearizationOptions)) string
type DocumentAPIOutput ¶ added in v0.0.4
type DocumentAPIOutput struct { DocumentMetadata *types.DocumentMetadata `json:"DocumentMetadata"` Blocks []types.Block `json:"Blocks"` }
DocumentAPIOutput represents the output of the Textract Document API.
type ExpenseDocument ¶ added in v0.0.6
type ExpenseDocument struct {
// contains filtered or unexported fields
}
func ParseAnalyzeExpenseOutput ¶ added in v0.0.6
func ParseAnalyzeExpenseOutput(output *AnalyzeExpenseOutput) ([]*ExpenseDocument, error)
ParseAnalyzeExpenseOutput parses the Textract Analyze Expense API output into a slice of ExpenseDocument.
func (*ExpenseDocument) SummaryFields ¶ added in v0.0.7
func (ed *ExpenseDocument) SummaryFields() []*ExpenseField
type ExpenseField ¶ added in v0.0.7
type ExpenseField struct{}
type IdentityDocument ¶ added in v0.0.2
type IdentityDocument struct {
// contains filtered or unexported fields
}
func ParseAnalyzeIDOutput ¶ added in v0.0.4
func ParseAnalyzeIDOutput(output *AnalyzeIDOutput) ([]*IdentityDocument, error)
ParseAnalyzeIDOutput parses the Textract Analyze ID API output into a slice of IdentityDocument.
func (*IdentityDocument) Document ¶ added in v0.0.4
func (id *IdentityDocument) Document() *Document
func (*IdentityDocument) FieldByType ¶ added in v0.0.3
func (id *IdentityDocument) FieldByType(ft IdentityDocumentFieldType) *IdentityDocumentField
func (*IdentityDocument) Fields ¶ added in v0.0.3
func (id *IdentityDocument) Fields() []*IdentityDocumentField
func (*IdentityDocument) IdentityDocumentType ¶ added in v0.0.4
func (id *IdentityDocument) IdentityDocumentType() IdentityDocumentType
type IdentityDocumentField ¶ added in v0.0.3
type IdentityDocumentField struct {
// contains filtered or unexported fields
}
IdentityDocumentField represents a field extracted from an identity document by Textract.
func (*IdentityDocumentField) Confidence ¶ added in v0.0.3
func (idf *IdentityDocumentField) Confidence() float64
Confidence returns the confidence score associated with the field extraction.
func (*IdentityDocumentField) FieldType ¶ added in v0.0.4
func (idf *IdentityDocumentField) FieldType() IdentityDocumentFieldType
FieldType returns the type of the identity document field.
func (*IdentityDocumentField) IsNormalized ¶ added in v0.0.3
func (idf *IdentityDocumentField) IsNormalized() bool
IsNormalized checks if the field value is normalized.
func (*IdentityDocumentField) NormalizedValue ¶ added in v0.0.3
func (idf *IdentityDocumentField) NormalizedValue() *NormalizedIdentityDocumentFieldValue
NormalizedValue returns the normalized value of the identity document field.
func (*IdentityDocumentField) Value ¶ added in v0.0.3
func (idf *IdentityDocumentField) Value() string
Value returns the value of the identity document field.
type IdentityDocumentFieldType ¶ added in v0.0.3
type IdentityDocumentFieldType string
IdentityDocumentFieldType represents the type of fields in an identity document.
const ( IdentityDocumentFieldTypeFirstName IdentityDocumentFieldType = "FIRST_NAME" IdentityDocumentFieldTypeLastName IdentityDocumentFieldType = "LAST_NAME" IdentityDocumentFieldTypeMiddleName IdentityDocumentFieldType = "MIDDLE_NAME" IdentityDocumentFieldTypeSuffix IdentityDocumentFieldType = "Suffix" IdentityDocumentFieldTypeCityInAddress IdentityDocumentFieldType = "CITY_IN_ADDRESS" IdentityDocumentFieldTypeZipCodeInAddress IdentityDocumentFieldType = "ZIP_CODE_IN_ADDRESS" IdentityDocumentFieldTypeStateInAddress IdentityDocumentFieldType = "STATE_IN_ADDRESS" IdentityDocumentFieldTypeStateName IdentityDocumentFieldType = "STATE_NAME" IdentityDocumentFieldTypeDocumentNumber IdentityDocumentFieldType = "DOCUMENT_NUMBER" IdentityDocumentFieldTypeExpirationDate IdentityDocumentFieldType = "EXPIRATION_DATE" IdentityDocumentFieldTypeDateOfBirth IdentityDocumentFieldType = "DATE_OF_BIRTH" IdentityDocumentFieldTypeDateOfIssue IdentityDocumentFieldType = "DATE_OF_ISSUE" IdentityDocumentFieldTypeIDType IdentityDocumentFieldType = "ID_TYPE" IdentityDocumentFieldTypeEndorsements IdentityDocumentFieldType = "ENDORSEMENTS" IdentityDocumentFieldTypeVeteran IdentityDocumentFieldType = "VETERAN" IdentityDocumentFieldTypeRestrictions IdentityDocumentFieldType = "RESTRICTIONS" IdentityDocumentFieldTypeClass IdentityDocumentFieldType = "CLASS" IdentityDocumentFieldTypeAddress IdentityDocumentFieldType = "ADDRESS" IdentityDocumentFieldTypeCounty IdentityDocumentFieldType = "COUNTY" IdentityDocumentFieldTypePlaceOfBirth IdentityDocumentFieldType = "PLACE_OF_BIRTH" IdentityDocumentFieldTypeMRZCode IdentityDocumentFieldType = "MRZ_CODE" IdentityDocumentFieldTypeOther IdentityDocumentFieldType = "Other" )
type IdentityDocumentType ¶ added in v0.0.3
type IdentityDocumentType string
IdentityDocumentType represents the type of an identity document.
const ( IdentityDocumentTypeDriverLicenseFront IdentityDocumentType = "DRIVER LICENSE FRONT" IdentityDocumentTypePassport IdentityDocumentType = "PASSPORT" IdentityDocumentTypeOther IdentityDocumentType = "OTHER" )
type Key ¶ added in v0.0.4
type Key struct {
// contains filtered or unexported fields
}
func (*Key) BoundingBox ¶ added in v0.0.4
func (b *Key) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Key) Confidence ¶ added in v0.0.4
func (b *Key) Confidence() float64
Confidence returns the confidence of the block.
func (*Key) PageNumber ¶ added in v0.0.4
func (b *Key) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Key) Polygon ¶ added in v0.0.4
func (b *Key) Polygon() Polygon
Polygon returns the polygon information of the block.
type KeyValue ¶ added in v0.0.4
type KeyValue struct {
// contains filtered or unexported fields
}
func (*KeyValue) BoundingBox ¶ added in v0.0.4
func (kv *KeyValue) BoundingBox() *BoundingBox
func (*KeyValue) Confidence ¶ added in v0.0.4
Confidence calculates the confidence score for a key value.
func (*KeyValue) ID ¶ added in v0.0.4
func (b *KeyValue) ID() string
ID returns the identifier of the block.
func (*KeyValue) PageNumber ¶ added in v0.0.4
func (b *KeyValue) PageNumber() int
PageNumber returns the page number associated with the block.
func (*KeyValue) Text ¶ added in v0.0.5
func (kv *KeyValue) Text(optFns ...func(*TextLinearizationOptions)) string
type Layout ¶ added in v0.0.4
type Layout struct {
// contains filtered or unexported fields
}
func (*Layout) AddChildren ¶ added in v0.0.4
func (l *Layout) AddChildren(children ...LayoutChild)
func (*Layout) BoundingBox ¶ added in v0.0.4
func (b *Layout) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Layout) Confidence ¶ added in v0.0.4
func (b *Layout) Confidence() float64
Confidence returns the confidence of the block.
func (*Layout) ID ¶ added in v0.0.4
func (b *Layout) ID() string
ID returns the identifier of the block.
func (*Layout) PageNumber ¶ added in v0.0.4
func (b *Layout) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Layout) Polygon ¶ added in v0.0.4
func (b *Layout) Polygon() Polygon
Polygon returns the polygon information of the block.
func (*Layout) Text ¶ added in v0.0.4
func (l *Layout) Text(optFns ...func(*TextLinearizationOptions)) string
type LayoutChild ¶ added in v0.0.4
type LayoutChild interface { ID() string Text(optFns ...func(*TextLinearizationOptions)) string BoundingBox() *BoundingBox }
type Line ¶
type Line struct {
// contains filtered or unexported fields
}
func (*Line) BoundingBox ¶ added in v0.0.4
func (b *Line) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Line) Confidence ¶
func (b *Line) Confidence() float64
Confidence returns the confidence of the block.
func (*Line) PageNumber ¶ added in v0.0.4
func (b *Line) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Line) Polygon ¶ added in v0.0.4
func (b *Line) Polygon() Polygon
Polygon returns the polygon information of the block.
func (*Line) Text ¶
func (l *Line) Text(_ ...func(*TextLinearizationOptions)) string
type NormalizedIdentityDocumentFieldValue ¶ added in v0.0.4
type NormalizedIdentityDocumentFieldValue struct {
// contains filtered or unexported fields
}
NormalizedIdentityDocumentFieldValue represents a normalized value of an identity document field.
func (NormalizedIdentityDocumentFieldValue) DateValue ¶ added in v0.0.4
func (nidfv NormalizedIdentityDocumentFieldValue) DateValue() (time.Time, error)
DateValue returns the time representation of the normalized date value.
func (NormalizedIdentityDocumentFieldValue) Value ¶ added in v0.0.4
func (nidfv NormalizedIdentityDocumentFieldValue) Value() string
Value returns the string representation of the normalized value.
func (NormalizedIdentityDocumentFieldValue) ValueType ¶ added in v0.0.4
func (nidfv NormalizedIdentityDocumentFieldValue) ValueType() types.ValueType
ValueType returns the type of the normalized value.
type Orientation ¶
type Orientation struct {
// contains filtered or unexported fields
}
Orientation represents the orientation of a geometric element.
func (*Orientation) Degrees ¶
func (o *Orientation) Degrees() float64
Degrees returns the orientation in degrees.
func (*Orientation) Radians ¶
func (o *Orientation) Radians() float64
Radians returns the orientation in radians.
type Page ¶
type Page struct {
// contains filtered or unexported fields
}
func (*Page) AddLayouts ¶ added in v0.0.4
func (*Page) SearchValueByKey ¶ added in v0.0.4
func (*Page) Signatures ¶ added in v0.0.2
func (*Page) Text ¶
func (p *Page) Text(optFns ...func(*TextLinearizationOptions)) string
type Point ¶
type Point struct {
// contains filtered or unexported fields
}
Point represents a 2D point.
type Query ¶ added in v0.0.2
type Query struct {
// contains filtered or unexported fields
}
Query represents a query with associated information, including an identifier, text, alias, query pages, results, a page, and raw block data.
func (*Query) ResultsByConfidence ¶ added in v0.0.2
func (q *Query) ResultsByConfidence() []*QueryResult
ResultsByConfidence lists this query instance's results, sorted from most to least confident.
func (*Query) TopResult ¶ added in v0.0.2
func (q *Query) TopResult() *QueryResult
TopResult retrieves the top result by confidence score, if any are available.
type QueryResult ¶ added in v0.0.2
type QueryResult struct {
// contains filtered or unexported fields
}
QueryResult represents the result of a parsed query.
func (*QueryResult) BoundingBox ¶ added in v0.0.4
func (b *QueryResult) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*QueryResult) Confidence ¶ added in v0.0.2
func (b *QueryResult) Confidence() float64
Confidence returns the confidence of the block.
func (*QueryResult) ID ¶ added in v0.0.2
func (b *QueryResult) ID() string
ID returns the identifier of the block.
func (*QueryResult) PageNumber ¶ added in v0.0.4
func (b *QueryResult) PageNumber() int
PageNumber returns the page number associated with the block.
func (*QueryResult) Polygon ¶ added in v0.0.4
func (b *QueryResult) Polygon() Polygon
Polygon returns the polygon information of the block.
func (*QueryResult) Text ¶ added in v0.0.2
func (qr *QueryResult) Text() string
Text returns the extracted text from the query result.
type RowCellsAtOptions ¶ added in v0.0.7
type RowCellsAtOptions struct {
IgnoreMergedCells bool
}
type RowsOptions ¶ added in v0.0.7
type RowsOptions struct {
IgnoreMergedCells bool
}
type SelectionElement ¶
type SelectionElement struct {
// contains filtered or unexported fields
}
SelectionElement represents an element with selection status.
func (*SelectionElement) BoundingBox ¶ added in v0.0.4
func (b *SelectionElement) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*SelectionElement) Confidence ¶
func (b *SelectionElement) Confidence() float64
Confidence returns the confidence of the block.
func (*SelectionElement) ID ¶
func (b *SelectionElement) ID() string
ID returns the identifier of the block.
func (*SelectionElement) IsSelected ¶
func (se *SelectionElement) IsSelected() bool
IsSelected checks if the element is selected.
func (*SelectionElement) PageNumber ¶ added in v0.0.4
func (b *SelectionElement) PageNumber() int
PageNumber returns the page number associated with the block.
func (*SelectionElement) Polygon ¶ added in v0.0.4
func (b *SelectionElement) Polygon() Polygon
Polygon returns the polygon information of the block.
func (*SelectionElement) Status ¶
func (se *SelectionElement) Status() types.SelectionStatus
Status returns the selection status of the element.
func (*SelectionElement) String ¶ added in v0.0.7
func (se *SelectionElement) String() string
func (*SelectionElement) Text ¶ added in v0.0.5
func (se *SelectionElement) Text(optFns ...func(*TextLinearizationOptions)) string
Text returns the text representation of the selection element. It considers the selection status and applies linearization options.
type Signature ¶ added in v0.0.2
type Signature struct {
// contains filtered or unexported fields
}
func (*Signature) BoundingBox ¶ added in v0.0.4
func (b *Signature) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Signature) Confidence ¶ added in v0.0.2
func (b *Signature) Confidence() float64
Confidence returns the confidence of the block.
func (*Signature) ID ¶ added in v0.0.2
func (b *Signature) ID() string
ID returns the identifier of the block.
func (*Signature) PageNumber ¶ added in v0.0.4
func (b *Signature) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Signature) Polygon ¶ added in v0.0.4
func (b *Signature) Polygon() Polygon
Polygon returns the polygon information of the block.
func (*Signature) Text ¶ added in v0.0.4
func (s *Signature) Text(optFns ...func(*TextLinearizationOptions)) string
type Table ¶
type Table struct {
// contains filtered or unexported fields
}
func (*Table) BoundingBox ¶ added in v0.0.4
func (b *Table) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Table) CellAt ¶
func (t *Table) CellAt(rowIndex, columnIndex int, optFns ...func(*CellAtOptions)) Cell
func (*Table) Confidence ¶
func (b *Table) Confidence() float64
Confidence returns the confidence of the block.
func (*Table) PageNumber ¶ added in v0.0.4
func (b *Table) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Table) Polygon ¶ added in v0.0.4
func (b *Table) Polygon() Polygon
Polygon returns the polygon information of the block.
func (*Table) RowCellsAt ¶ added in v0.0.6
func (t *Table) RowCellsAt(rowIndex int, optFns ...func(*RowCellsAtOptions)) []Cell
func (*Table) Rows ¶
func (t *Table) Rows(optFns ...func(*RowsOptions)) []*TableRow
func (*Table) Text ¶ added in v0.0.5
func (t *Table) Text(optFns ...func(*TextLinearizationOptions)) string
type TableCell ¶ added in v0.0.4
type TableCell struct {
// contains filtered or unexported fields
}
TableCell represents a cell in a table.
func (*TableCell) IsColumnHeader ¶ added in v0.0.4
func (c *TableCell) IsColumnHeader() bool
IsColumnHeader checks if the cell is a column header.
func (*TableCell) IsMerged ¶ added in v0.0.7
func (c *TableCell) IsMerged() bool
IsMerged checks if the cell is part of a merged group.
func (*TableCell) IsTableFooter ¶ added in v0.0.4
func (c *TableCell) IsTableFooter() bool
IsTableFooter checks if the cell is a table footer.
func (*TableCell) IsTableSectionTitle ¶ added in v0.0.4
func (c *TableCell) IsTableSectionTitle() bool
IsTableSectionTitle checks if the cell is a table section title.
func (*TableCell) IsTableSummary ¶ added in v0.0.4
func (c *TableCell) IsTableSummary() bool
IsTableSummary checks if the cell is a table summary.
func (*TableCell) IsTableTitle ¶ added in v0.0.4
func (c *TableCell) IsTableTitle() bool
IsTableTitle checks if the cell is a table title.
func (*TableCell) SelectionElement ¶ added in v0.0.7
func (tc *TableCell) SelectionElement() *SelectionElement
SelectionElement returns the selection element associated with the table cell.
func (*TableCell) Text ¶ added in v0.0.4
func (tc *TableCell) Text(optFns ...func(*TextLinearizationOptions)) string
Text returns the text content of the table cell.
type TableFooter ¶ added in v0.0.4
type TableFooter struct {
// contains filtered or unexported fields
}
TableFooter represents the footer of a table block.
func (*TableFooter) BoundingBox ¶ added in v0.0.4
func (b *TableFooter) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*TableFooter) Confidence ¶ added in v0.0.4
func (b *TableFooter) Confidence() float64
Confidence returns the confidence of the block.
func (*TableFooter) ID ¶ added in v0.0.4
func (b *TableFooter) ID() string
ID returns the identifier of the block.
func (*TableFooter) PageNumber ¶ added in v0.0.4
func (b *TableFooter) PageNumber() int
PageNumber returns the page number associated with the block.
func (*TableFooter) Polygon ¶ added in v0.0.4
func (b *TableFooter) Polygon() Polygon
Polygon returns the polygon information of the block.
func (*TableFooter) Text ¶ added in v0.0.4
func (tf *TableFooter) Text(_ ...func(*TextLinearizationOptions)) string
Text returns the concatenated text of all words in the table footer.
func (*TableFooter) Words ¶ added in v0.0.6
func (tf *TableFooter) Words() []*Word
Words returns the words within the table footer.
type TableMergedCell ¶ added in v0.0.6
type TableMergedCell struct {
// contains filtered or unexported fields
}
TableMergedCell represents a merged cell in a table.
func (*TableMergedCell) IsColumnHeader ¶ added in v0.0.6
func (c *TableMergedCell) IsColumnHeader() bool
IsColumnHeader checks if the cell is a column header.
func (*TableMergedCell) IsMerged ¶ added in v0.0.7
func (c *TableMergedCell) IsMerged() bool
IsMerged checks if the cell is part of a merged group.
func (*TableMergedCell) IsTableFooter ¶ added in v0.0.6
func (c *TableMergedCell) IsTableFooter() bool
IsTableFooter checks if the cell is a table footer.
func (*TableMergedCell) IsTableSectionTitle ¶ added in v0.0.6
func (c *TableMergedCell) IsTableSectionTitle() bool
IsTableSectionTitle checks if the cell is a table section title.
func (*TableMergedCell) IsTableSummary ¶ added in v0.0.6
func (c *TableMergedCell) IsTableSummary() bool
IsTableSummary checks if the cell is a table summary.
func (*TableMergedCell) IsTableTitle ¶ added in v0.0.6
func (c *TableMergedCell) IsTableTitle() bool
IsTableTitle checks if the cell is a table title.
func (*TableMergedCell) Text ¶ added in v0.0.6
func (tmc *TableMergedCell) Text(_ ...func(*TextLinearizationOptions)) string
Text returns the text content of the merged cell.
func (*TableMergedCell) Words ¶ added in v0.0.6
func (tmc *TableMergedCell) Words() []*Word
Words returns the words in the merged cell.
type TableTitle ¶ added in v0.0.4
type TableTitle struct {
// contains filtered or unexported fields
}
TableTitle represents the title of a table, containing a collection of words.
func (*TableTitle) BoundingBox ¶ added in v0.0.4
func (b *TableTitle) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*TableTitle) Confidence ¶ added in v0.0.4
func (b *TableTitle) Confidence() float64
Confidence returns the confidence of the block.
func (*TableTitle) ID ¶ added in v0.0.4
func (b *TableTitle) ID() string
ID returns the identifier of the block.
func (*TableTitle) PageNumber ¶ added in v0.0.4
func (b *TableTitle) PageNumber() int
PageNumber returns the page number associated with the block.
func (*TableTitle) Polygon ¶ added in v0.0.4
func (b *TableTitle) Polygon() Polygon
Polygon returns the polygon information of the block.
func (*TableTitle) Text ¶ added in v0.0.4
func (tt *TableTitle) Text(_ ...func(*TextLinearizationOptions)) string
Text returns the concatenated text of the table title, using default or provided linearization options.
func (*TableTitle) Words ¶ added in v0.0.6
func (tt *TableTitle) Words() []*Word
Words returns the words constituting the table title.
type TextLinearizationOptions ¶ added in v0.0.4
type TextLinearizationOptions struct { // MaxNumberOfConsecutiveNewLines sets the maximum number of consecutive new lines to keep, removing extra whitespace. MaxNumberOfConsecutiveNewLines int // HideHeaderLayout hides headers in the linearized output. HideHeaderLayout bool HideFooterLayout bool // HideFigureLayout hides figures in the linearized output. HideFigureLayout bool // HidePageNumberLayout hides page numbers in the linearized output. HidePageNumberLayout bool // PageNumberPrefix is the prefix for page number layout elements. PageNumberPrefix string // PageNumberSuffix is the suffix for page number layout elements. PageNumberSuffix string // SameParagraphSeparator is the separator to use when combining elements within a text block. SameParagraphSeparator string // LayoutElementSeparator is the separator to use when combining linearized layout elements. LayoutElementSeparator string // ListElementSeparator is the separator for elements in a list layout. ListElementSeparator string // ListLayoutPrefix is the prefix for list layout elements (parent). ListLayoutPrefix string // ListLayoutSuffix is the suffix for list layout elements (parent). ListLayoutSuffix string // ListElementPrefix is the prefix for elements in a list layout (children). ListElementPrefix string // ListElementSuffix is the suffix for elements in a list layout (children). ListElementSuffix string // RemoveNewLinesInListElements removes new lines in list elements. RemoveNewLinesInListElements bool // TitlePrefix is the prefix for title layout elements. TitlePrefix string // TitleSuffix is the suffix for title layout elements. TitleSuffix string // TableLayoutPrefix is the prefix for table elements. TableLayoutPrefix string // TableLayoutSuffix is the suffix for table elements. TableLayoutSuffix string // TableLinearizationFormat sets how to represent tables in the linearized output. Choices are plaintext or markdown. TableLinearizationFormat string // TableMinTableWords is the threshold below which tables will be rendered as words instead of using table layout. TableMinTableWords int // TableColumnSeparator is the table column separator, used when linearizing layout tables, not used if AnalyzeDocument was called with the TABLES feature. TableColumnSeparator string // TablePrefix is the prefix for table layout. TablePrefix string // TableSuffix is the suffix for table layout. TableSuffix string // TableRowSeparator is the table row separator. TableRowSeparator string // TableRowPrefix is the prefix for table row. TableRowPrefix string // TableRowSuffix is the suffix for table row. TableRowSuffix string // TableCellPrefix is the prefix for table cell. TableCellPrefix string // TableCellSuffix is the suffix for table cell. TableCellSuffix string // SectionHeaderPrefix is the prefix for section header layout elements. SectionHeaderPrefix string // SectionHeaderSuffix is the suffix for section header layout elements. SectionHeaderSuffix string // KeyValueLayoutPrefix is the prefix for key_value layout elements (not for individual key-value elements). KeyValueLayoutPrefix string // KeyValueLayoutSuffix is the suffix for key_value layout elements (not for individual key-value elements). KeyValueLayoutSuffix string // KeyValuePrefix is the prefix for key-value elements. KeyValuePrefix string // KeyValueSuffix is the suffix for key-value elements. KeyValueSuffix string // KeyPrefix is the prefix for key elements. KeyPrefix string // KeySuffix is the suffix for key elements. KeySuffix string // ValuePrefix is the prefix for value elements. ValuePrefix string // ValueSuffix is the suffix for value elements. ValueSuffix string // SelectionElementSelected is the representation for selection elements when selected. SelectionElementSelected string // SelectionElementNotSelected is the representation for selection elements when not selected. SelectionElementNotSelected string // HeuristicHTolerance sets how much the line below and above the current line should differ in width to be separated. HeuristicHTolerance float64 // HeuristicOverlapRatio sets how much vertical overlap is tolerated between two subsequent lines before merging them into a single line. HeuristicOverlapRatio float64 // SignatureToken is the signature representation in the linearized text. SignatureToken string }
TextLinearizationOptions defines how a document is linearized into a text string.
type Value ¶ added in v0.0.4
type Value struct {
// contains filtered or unexported fields
}
func (*Value) BoundingBox ¶ added in v0.0.4
func (b *Value) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Value) Confidence ¶ added in v0.0.4
func (b *Value) Confidence() float64
Confidence returns the confidence of the block.
func (*Value) ID ¶ added in v0.0.4
func (b *Value) ID() string
ID returns the identifier of the block.
func (*Value) PageNumber ¶ added in v0.0.4
func (b *Value) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Value) Polygon ¶ added in v0.0.4
func (b *Value) Polygon() Polygon
Polygon returns the polygon information of the block.
func (*Value) SelectionElement ¶ added in v0.0.7
func (v *Value) SelectionElement() *SelectionElement
SelectionElement returns the selection element associated with the table cell.
func (*Value) Text ¶ added in v0.0.4
func (v *Value) Text(optFns ...func(*TextLinearizationOptions)) string
type Word ¶
type Word struct {
// contains filtered or unexported fields
}
Word represents a word extracted by Textract.
func (*Word) BoundingBox ¶ added in v0.0.4
func (b *Word) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Word) Confidence ¶
func (b *Word) Confidence() float64
Confidence returns the confidence of the block.
func (*Word) IsHandwriting ¶
IsHandwriting checks if the word is handwriting.
func (*Word) PageNumber ¶ added in v0.0.4
func (b *Word) PageNumber() int
PageNumber returns the page number associated with the block.
Source Files ¶
- base.go
- block_parser.go
- document.go
- enums.go
- expense_document.go
- expense_document_parser.go
- expense_field.go
- geometry.go
- identity_document.go
- identity_document_field.go
- identity_document_parser.go
- key_value.go
- layout.go
- line.go
- options.go
- page.go
- page_parser.go
- query.go
- selection_element.go
- signature.go
- table.go
- table_cell.go
- table_footer.go
- table_title.go
- textractor.go
- word.go