Documentation ¶
Index ¶
- Variables
- type AnalyzeExpenseOutput
- type AnalyzeIDOutput
- type BoundingBox
- func (bb *BoundingBox) Area() float64
- func (bb *BoundingBox) Bottom() float64
- func (bb *BoundingBox) Height() float64
- func (bb *BoundingBox) HorizontalCenter() float64
- func (bb *BoundingBox) Intersection(other *BoundingBox) *BoundingBox
- func (bb *BoundingBox) Left() float64
- func (bb *BoundingBox) Right() float64
- func (bb *BoundingBox) String() string
- func (bb *BoundingBox) Top() float64
- func (bb *BoundingBox) VerticalCenter() float64
- func (bb *BoundingBox) Width() float64
- type BoundingBoxAccessor
- type Cell
- type Document
- func (d *Document) KeyValues() []*KeyValue
- func (d *Document) Lines() []*Line
- func (d *Document) Pages() []*Page
- func (d *Document) Signatures() []*Signature
- func (d *Document) Tables() []*Table
- func (d *Document) Text(optFns ...func(*TextLinearizationOptions)) string
- func (d *Document) Words() []*Word
- type DocumentAPIOutput
- type ExpenseDocument
- type IdentityDocument
- type IdentityDocumentField
- func (idf *IdentityDocumentField) Confidence() float32
- func (idf *IdentityDocumentField) FieldType() IdentityDocumentFieldType
- func (idf *IdentityDocumentField) IsNormalized() bool
- func (idf *IdentityDocumentField) NormalizedValue() *NormalizedIdentityDocumentFieldValue
- func (idf *IdentityDocumentField) Value() string
- type IdentityDocumentFieldType
- type IdentityDocumentType
- type Key
- func (b *Key) BlockType() types.BlockType
- func (b *Key) BoundingBox() *BoundingBox
- func (b *Key) Confidence() float64
- func (b *Key) ID() string
- func (b *Key) PageNumber() int
- func (b *Key) Polygon() []*Point
- func (b *Key) Raw() types.Block
- func (k *Key) String() string
- func (k *Key) Text() string
- func (k *Key) Words() []*Word
- type KeyValue
- func (b *KeyValue) BlockType() types.BlockType
- func (kv *KeyValue) BoundingBox() *BoundingBox
- func (kv *KeyValue) Confidence() float64
- func (b *KeyValue) ID() string
- func (kv *KeyValue) Key() *Key
- func (b *KeyValue) PageNumber() int
- func (kv *KeyValue) Polygon() []*Point
- func (b *KeyValue) Raw() types.Block
- func (kv *KeyValue) Text(optFns ...func(*TextLinearizationOptions)) string
- func (kv *KeyValue) Value() *Value
- func (kv *KeyValue) Words() []*Word
- type Layout
- func (l *Layout) AddChildren(children ...LayoutChild)
- func (b *Layout) BlockType() types.BlockType
- func (b *Layout) BoundingBox() *BoundingBox
- func (b *Layout) Confidence() float64
- func (b *Layout) ID() string
- func (b *Layout) PageNumber() int
- func (b *Layout) Polygon() []*Point
- func (b *Layout) Raw() types.Block
- func (l *Layout) Text(optFns ...func(*TextLinearizationOptions)) string
- type LayoutChild
- type Line
- func (b *Line) BlockType() types.BlockType
- func (b *Line) BoundingBox() *BoundingBox
- func (b *Line) Confidence() float64
- func (b *Line) ID() string
- func (b *Line) PageNumber() int
- func (b *Line) Polygon() []*Point
- func (b *Line) Raw() types.Block
- func (l *Line) Text(_ ...func(*TextLinearizationOptions)) string
- func (l *Line) Words() []*Word
- type NormalizedIdentityDocumentFieldValue
- type Orientation
- type Page
- func (p *Page) AddLayouts(layouts ...*Layout)
- func (p *Page) Height() float64
- func (p *Page) ID() string
- func (p *Page) KeyValues() []*KeyValue
- func (p *Page) Layouts() []*Layout
- func (p *Page) Lines() []*Line
- func (p *Page) Number() int
- func (p *Page) Queries() []*Query
- func (p *Page) SearchValueByKey(key string) []*KeyValue
- func (p *Page) Signatures() []*Signature
- func (p *Page) Tables() []*Table
- func (p *Page) Text(optFns ...func(*TextLinearizationOptions)) string
- func (p *Page) Width() float64
- func (p *Page) Words() []*Word
- type Point
- type Query
- type QueryResult
- func (b *QueryResult) BlockType() types.BlockType
- func (b *QueryResult) BoundingBox() *BoundingBox
- func (b *QueryResult) Confidence() float64
- func (b *QueryResult) ID() string
- func (b *QueryResult) PageNumber() int
- func (b *QueryResult) Polygon() []*Point
- func (b *QueryResult) Raw() types.Block
- func (qr *QueryResult) Text() string
- type SelectionElement
- func (b *SelectionElement) BlockType() types.BlockType
- func (b *SelectionElement) BoundingBox() *BoundingBox
- func (b *SelectionElement) Confidence() float64
- func (b *SelectionElement) ID() string
- func (se *SelectionElement) IsSelected() bool
- func (b *SelectionElement) PageNumber() int
- func (b *SelectionElement) Polygon() []*Point
- func (b *SelectionElement) Raw() types.Block
- func (se *SelectionElement) Status() types.SelectionStatus
- func (se *SelectionElement) Text(optFns ...func(*TextLinearizationOptions)) string
- type Signature
- func (b *Signature) BlockType() types.BlockType
- func (b *Signature) BoundingBox() *BoundingBox
- func (b *Signature) Confidence() float64
- func (b *Signature) ID() string
- func (b *Signature) PageNumber() int
- func (b *Signature) Polygon() []*Point
- func (b *Signature) Raw() types.Block
- func (s *Signature) Text(optFns ...func(*TextLinearizationOptions)) string
- func (s *Signature) Words() []*Word
- type Table
- func (b *Table) BlockType() types.BlockType
- func (b *Table) BoundingBox() *BoundingBox
- func (t *Table) CellAt(rowIndex, columnIndex int) Cell
- func (b *Table) Confidence() float64
- func (b *Table) ID() string
- func (b *Table) PageNumber() int
- func (b *Table) Polygon() []*Point
- func (b *Table) Raw() types.Block
- func (t *Table) RowCellsAt(rowIndex int) []Cell
- func (t *Table) RowCount() int
- func (t *Table) Rows() []*TableRow
- func (t *Table) Text(optFns ...func(*TextLinearizationOptions)) string
- func (t *Table) Words() []*Word
- type TableCell
- func (c *TableCell) IsColumnHeader() bool
- func (c *TableCell) IsTableFooter() bool
- func (c *TableCell) IsTableSectionTitle() bool
- func (c *TableCell) IsTableSummary() bool
- func (c *TableCell) IsTableTitle() bool
- func (tc *TableCell) Text(optFns ...func(*TextLinearizationOptions)) string
- func (tc *TableCell) Words() []*Word
- type TableFooter
- func (b *TableFooter) BlockType() types.BlockType
- func (b *TableFooter) BoundingBox() *BoundingBox
- func (b *TableFooter) Confidence() float64
- func (b *TableFooter) ID() string
- func (b *TableFooter) PageNumber() int
- func (b *TableFooter) Polygon() []*Point
- func (b *TableFooter) Raw() types.Block
- func (tf *TableFooter) Text(_ ...func(*TextLinearizationOptions)) string
- func (tf *TableFooter) Words() []*Word
- type TableMergedCell
- func (c *TableMergedCell) IsColumnHeader() bool
- func (c *TableMergedCell) IsTableFooter() bool
- func (c *TableMergedCell) IsTableSectionTitle() bool
- func (c *TableMergedCell) IsTableSummary() bool
- func (c *TableMergedCell) IsTableTitle() bool
- func (tmc *TableMergedCell) Text(_ ...func(*TextLinearizationOptions)) string
- func (tmc *TableMergedCell) Words() []*Word
- type TableRow
- type TableTitle
- func (b *TableTitle) BlockType() types.BlockType
- func (b *TableTitle) BoundingBox() *BoundingBox
- func (b *TableTitle) Confidence() float64
- func (b *TableTitle) ID() string
- func (b *TableTitle) PageNumber() int
- func (b *TableTitle) Polygon() []*Point
- func (b *TableTitle) Raw() types.Block
- func (tt *TableTitle) Text(_ ...func(*TextLinearizationOptions)) string
- func (tt *TableTitle) Words() []*Word
- type TextLinearizationOptions
- type Value
- func (b *Value) BlockType() types.BlockType
- func (b *Value) BoundingBox() *BoundingBox
- func (b *Value) Confidence() float64
- func (b *Value) ID() string
- func (b *Value) PageNumber() int
- func (b *Value) Polygon() []*Point
- func (b *Value) Raw() types.Block
- func (v *Value) String() string
- func (v *Value) Text(optFns ...func(*TextLinearizationOptions)) string
- func (v *Value) Words() []*Word
- type Word
- func (b *Word) BlockType() types.BlockType
- func (b *Word) BoundingBox() *BoundingBox
- func (b *Word) Confidence() float64
- func (b *Word) ID() string
- func (w *Word) IsHandwriting() bool
- func (w *Word) IsPrinted() bool
- func (b *Word) PageNumber() int
- func (b *Word) Polygon() []*Point
- func (b *Word) Raw() types.Block
- func (w *Word) Text() string
- func (w *Word) TextType() types.TextType
Constants ¶
This section is empty.
Variables ¶
var DefaultLinerizationOptions = TextLinearizationOptions{ LinearizeTables: true, LinearizeKeyValues: true, RemoveNewLinesInListElements: true, MaxNumberOfConsecutiveNewLines: 2, HideHeaderLayout: false, HideFooterLayout: false, HideFigureLayout: false, HidePageNumberLayout: false, PageNumberPrefix: "", PageNumberSuffix: "", SameParagraphSeparator: " ", LayoutElementSeparator: "\n\n", ListElementSeparator: "\n", ListLayoutPrefix: "", ListLayoutSuffix: "", ListElementPrefix: "", ListElementSuffix: "", TitlePrefix: "", TitleSuffix: "", TableLayoutPrefix: "\n\n", TableLayoutSuffix: "\n", TableLinearizationFormat: "plaintext", TableMinTableWords: 0, TableColumnSeparator: "\t", TablePrefix: "", TableSuffix: "", TableRowSeparator: "\n", TableRowPrefix: "", TableRowSuffix: "", TableCellPrefix: "", TableCellSuffix: "", SectionHeaderPrefix: "", SectionHeaderSuffix: "", TextPrefix: "", TextSuffix: "", KeyValueLayoutPrefix: "\n\n", KeyValueLayoutSuffix: "", KeyValuePrefix: "", KeyValueSuffix: "", KeyPrefix: "", KeySuffix: "", ValuePrefix: "", ValueSuffix: "", SelectionElementSelected: "[X]", SelectionElementNotSelected: "[ ]", HeuristicHTolerance: 0.3, HeuristicLineBreakThreshold: 0.9, HeuristicOverlapRatio: 0.5, SignatureToken: "[SIGNATURE]", }
Functions ¶
This section is empty.
Types ¶
type AnalyzeExpenseOutput ¶ added in v0.0.3
type AnalyzeExpenseOutput struct { DocumentMetadata *types.DocumentMetadata `json:"DocumentMetadata"` ExpenseDocuments []types.ExpenseDocument `json:"ExpenseDocuments"` }
type AnalyzeIDOutput ¶ added in v0.0.4
type AnalyzeIDOutput struct { DocumentMetadata *types.DocumentMetadata `json:"DocumentMetadata"` IdentityDocuments []types.IdentityDocument `json:"IdentityDocuments"` }
type BoundingBox ¶
type BoundingBox struct {
// contains filtered or unexported fields
}
func NewEnclosingBoundingBox ¶ added in v0.0.4
func NewEnclosingBoundingBox[T BoundingBoxAccessor](accessors ...T) *BoundingBox
NewEnclosingBoundingBox returns a new bounding box that represents the union of multiple bounding boxes.
func (*BoundingBox) Area ¶ added in v0.0.4
func (bb *BoundingBox) Area() float64
Area calculates and returns the area of the bounding box. If either the width or height of the bounding box is less than zero, the area is considered zero to prevent negative area values.
func (*BoundingBox) Bottom ¶
func (bb *BoundingBox) Bottom() float64
Bottom returns the bottom coordinate of the bounding box.
func (*BoundingBox) Height ¶
func (bb *BoundingBox) Height() float64
func (*BoundingBox) HorizontalCenter ¶
func (bb *BoundingBox) HorizontalCenter() float64
HorizontalCenter returns the horizontal center coordinate of the bounding box.
func (*BoundingBox) Intersection ¶
func (bb *BoundingBox) Intersection(other *BoundingBox) *BoundingBox
Intersection returns a new bounding box that represents the intersection of two bounding boxes.
func (*BoundingBox) Left ¶
func (bb *BoundingBox) Left() float64
func (*BoundingBox) Right ¶
func (bb *BoundingBox) Right() float64
Right returns the right coordinate of the bounding box.
func (*BoundingBox) String ¶ added in v0.0.2
func (bb *BoundingBox) String() string
String returns a string representation of the bounding box.
func (*BoundingBox) Top ¶
func (bb *BoundingBox) Top() float64
func (*BoundingBox) VerticalCenter ¶
func (bb *BoundingBox) VerticalCenter() float64
VerticalCenter returns the vertical center coordinate of the bounding box.
func (*BoundingBox) Width ¶
func (bb *BoundingBox) Width() float64
type BoundingBoxAccessor ¶ added in v0.0.4
type BoundingBoxAccessor interface {
BoundingBox() *BoundingBox
}
type Cell ¶
type Cell interface { Words() []*Word Text(optFns ...func(*TextLinearizationOptions)) string Confidence() float64 }
type Document ¶
type Document struct {
// contains filtered or unexported fields
}
func ParseDocumentAPIOutput ¶ added in v0.0.4
func ParseDocumentAPIOutput(output *DocumentAPIOutput) (*Document, error)
func (*Document) Signatures ¶ added in v0.0.4
func (*Document) Text ¶ added in v0.0.4
func (d *Document) Text(optFns ...func(*TextLinearizationOptions)) string
type DocumentAPIOutput ¶ added in v0.0.4
type DocumentAPIOutput struct { DocumentMetadata *types.DocumentMetadata `json:"DocumentMetadata"` Blocks []types.Block `json:"Blocks"` }
type ExpenseDocument ¶ added in v0.0.6
type ExpenseDocument struct {
// contains filtered or unexported fields
}
func ParseAnalyzeExpenseOutput ¶ added in v0.0.6
func ParseAnalyzeExpenseOutput(output *AnalyzeExpenseOutput) ([]*ExpenseDocument, error)
type IdentityDocument ¶ added in v0.0.2
type IdentityDocument struct {
// contains filtered or unexported fields
}
func ParseAnalyzeIDOutput ¶ added in v0.0.4
func ParseAnalyzeIDOutput(output *AnalyzeIDOutput) ([]*IdentityDocument, error)
func (*IdentityDocument) Document ¶ added in v0.0.4
func (id *IdentityDocument) Document() *Document
func (*IdentityDocument) FieldByType ¶ added in v0.0.3
func (id *IdentityDocument) FieldByType(ft IdentityDocumentFieldType) *IdentityDocumentField
func (*IdentityDocument) Fields ¶ added in v0.0.3
func (id *IdentityDocument) Fields() []*IdentityDocumentField
func (*IdentityDocument) IdentityDocumentType ¶ added in v0.0.4
func (id *IdentityDocument) IdentityDocumentType() IdentityDocumentType
type IdentityDocumentField ¶ added in v0.0.3
type IdentityDocumentField struct {
// contains filtered or unexported fields
}
IdentityDocumentField represents a field extracted from an identity document by Textract.
func (*IdentityDocumentField) Confidence ¶ added in v0.0.3
func (idf *IdentityDocumentField) Confidence() float32
Confidence returns the confidence score associated with the field extraction.
func (*IdentityDocumentField) FieldType ¶ added in v0.0.4
func (idf *IdentityDocumentField) FieldType() IdentityDocumentFieldType
FieldType returns the type of the identity document field.
func (*IdentityDocumentField) IsNormalized ¶ added in v0.0.3
func (idf *IdentityDocumentField) IsNormalized() bool
IsNormalized checks if the field value is normalized.
func (*IdentityDocumentField) NormalizedValue ¶ added in v0.0.3
func (idf *IdentityDocumentField) NormalizedValue() *NormalizedIdentityDocumentFieldValue
NormalizedValue returns the normalized value of the identity document field.
func (*IdentityDocumentField) Value ¶ added in v0.0.3
func (idf *IdentityDocumentField) Value() string
Value returns the value of the identity document field.
type IdentityDocumentFieldType ¶ added in v0.0.3
type IdentityDocumentFieldType string
IdentityDocumentFieldType represents the type of fields in an identity document.
const ( IdentityDocumentFieldTypeFirstName IdentityDocumentFieldType = "FIRST_NAME" IdentityDocumentFieldTypeLastName IdentityDocumentFieldType = "LAST_NAME" IdentityDocumentFieldTypeMiddleName IdentityDocumentFieldType = "MIDDLE_NAME" IdentityDocumentFieldTypeSuffix IdentityDocumentFieldType = "Suffix" IdentityDocumentFieldTypeCityInAddress IdentityDocumentFieldType = "CITY_IN_ADDRESS" IdentityDocumentFieldTypeZipCodeInAddress IdentityDocumentFieldType = "ZIP_CODE_IN_ADDRESS" IdentityDocumentFieldTypeStateInAddress IdentityDocumentFieldType = "STATE_IN_ADDRESS" IdentityDocumentFieldTypeStateName IdentityDocumentFieldType = "STATE_NAME" IdentityDocumentFieldTypeDocumentNumber IdentityDocumentFieldType = "DOCUMENT_NUMBER" IdentityDocumentFieldTypeExpirationDate IdentityDocumentFieldType = "EXPIRATION_DATE" IdentityDocumentFieldTypeDateOfBirth IdentityDocumentFieldType = "DATE_OF_BIRTH" IdentityDocumentFieldTypeDateOfIssue IdentityDocumentFieldType = "DATE_OF_ISSUE" IdentityDocumentFieldTypeIDType IdentityDocumentFieldType = "ID_TYPE" IdentityDocumentFieldTypeEndorsements IdentityDocumentFieldType = "ENDORSEMENTS" IdentityDocumentFieldTypeVeteran IdentityDocumentFieldType = "VETERAN" IdentityDocumentFieldTypeRestrictions IdentityDocumentFieldType = "RESTRICTIONS" IdentityDocumentFieldTypeClass IdentityDocumentFieldType = "CLASS" IdentityDocumentFieldTypeAddress IdentityDocumentFieldType = "ADDRESS" IdentityDocumentFieldTypeCounty IdentityDocumentFieldType = "COUNTY" IdentityDocumentFieldTypePlaceOfBirth IdentityDocumentFieldType = "PLACE_OF_BIRTH" IdentityDocumentFieldTypeMRZCode IdentityDocumentFieldType = "MRZ_CODE" IdentityDocumentFieldTypeOther IdentityDocumentFieldType = "Other" )
type IdentityDocumentType ¶ added in v0.0.3
type IdentityDocumentType string
IdentityDocumentType represents the type of an identity document.
const ( IdentityDocumentTypeDriverLicenseFront IdentityDocumentType = "DRIVER LICENSE FRONT" IdentityDocumentTypePassport IdentityDocumentType = "PASSPORT" IdentityDocumentTypeOther IdentityDocumentType = "OTHER" )
type Key ¶ added in v0.0.4
type Key struct {
// contains filtered or unexported fields
}
func (*Key) BoundingBox ¶ added in v0.0.4
func (b *Key) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Key) Confidence ¶ added in v0.0.4
func (b *Key) Confidence() float64
Confidence returns the confidence of the block.
func (*Key) PageNumber ¶ added in v0.0.4
func (b *Key) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Key) Polygon ¶ added in v0.0.4
func (b *Key) Polygon() []*Point
Polygon returns the polygon information of the block.
type KeyValue ¶ added in v0.0.4
type KeyValue struct {
// contains filtered or unexported fields
}
func (*KeyValue) BoundingBox ¶ added in v0.0.4
func (kv *KeyValue) BoundingBox() *BoundingBox
func (*KeyValue) Confidence ¶ added in v0.0.4
Confidence calculates the confidence score for a key value.
func (*KeyValue) ID ¶ added in v0.0.4
func (b *KeyValue) ID() string
ID returns the identifier of the block.
func (*KeyValue) PageNumber ¶ added in v0.0.4
func (b *KeyValue) PageNumber() int
PageNumber returns the page number associated with the block.
func (*KeyValue) Text ¶ added in v0.0.5
func (kv *KeyValue) Text(optFns ...func(*TextLinearizationOptions)) string
type Layout ¶ added in v0.0.4
type Layout struct {
// contains filtered or unexported fields
}
func (*Layout) AddChildren ¶ added in v0.0.4
func (l *Layout) AddChildren(children ...LayoutChild)
func (*Layout) BoundingBox ¶ added in v0.0.4
func (b *Layout) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Layout) Confidence ¶ added in v0.0.4
func (b *Layout) Confidence() float64
Confidence returns the confidence of the block.
func (*Layout) ID ¶ added in v0.0.4
func (b *Layout) ID() string
ID returns the identifier of the block.
func (*Layout) PageNumber ¶ added in v0.0.4
func (b *Layout) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Layout) Polygon ¶ added in v0.0.4
func (b *Layout) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*Layout) Text ¶ added in v0.0.4
func (l *Layout) Text(optFns ...func(*TextLinearizationOptions)) string
type LayoutChild ¶ added in v0.0.4
type LayoutChild interface { ID() string Text(optFns ...func(*TextLinearizationOptions)) string BoundingBox() *BoundingBox }
type Line ¶
type Line struct {
// contains filtered or unexported fields
}
func (*Line) BoundingBox ¶ added in v0.0.4
func (b *Line) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Line) Confidence ¶
func (b *Line) Confidence() float64
Confidence returns the confidence of the block.
func (*Line) PageNumber ¶ added in v0.0.4
func (b *Line) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Line) Polygon ¶ added in v0.0.4
func (b *Line) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*Line) Text ¶
func (l *Line) Text(_ ...func(*TextLinearizationOptions)) string
type NormalizedIdentityDocumentFieldValue ¶ added in v0.0.4
type NormalizedIdentityDocumentFieldValue struct {
// contains filtered or unexported fields
}
NormalizedIdentityDocumentFieldValue represents a normalized value of an identity document field.
func (NormalizedIdentityDocumentFieldValue) DateValue ¶ added in v0.0.4
func (nidfv NormalizedIdentityDocumentFieldValue) DateValue() (time.Time, error)
DateValue returns the time representation of the normalized date value.
func (NormalizedIdentityDocumentFieldValue) Value ¶ added in v0.0.4
func (nidfv NormalizedIdentityDocumentFieldValue) Value() string
Value returns the string representation of the normalized value.
func (NormalizedIdentityDocumentFieldValue) ValueType ¶ added in v0.0.4
func (nidfv NormalizedIdentityDocumentFieldValue) ValueType() types.ValueType
ValueType returns the type of the normalized value.
type Orientation ¶
type Orientation struct {
// contains filtered or unexported fields
}
Orientation represents the orientation of a geometric element.
func (*Orientation) Degrees ¶
func (o *Orientation) Degrees() float64
Degrees returns the orientation in degrees.
func (*Orientation) Radians ¶
func (o *Orientation) Radians() float64
Radians returns the orientation in radians.
type Page ¶
type Page struct {
// contains filtered or unexported fields
}
func (*Page) AddLayouts ¶ added in v0.0.4
func (*Page) SearchValueByKey ¶ added in v0.0.4
func (*Page) Signatures ¶ added in v0.0.2
func (*Page) Text ¶
func (p *Page) Text(optFns ...func(*TextLinearizationOptions)) string
type Point ¶
type Point struct {
// contains filtered or unexported fields
}
Point represents a 2D point.
type Query ¶ added in v0.0.2
type Query struct {
// contains filtered or unexported fields
}
Query represents a query with associated information, including an identifier, text, alias, query pages, results, a page, and raw block data.
func (*Query) ResultsByConfidence ¶ added in v0.0.2
func (q *Query) ResultsByConfidence() []*QueryResult
ResultsByConfidence lists this query instance's results, sorted from most to least confident.
func (*Query) TopResult ¶ added in v0.0.2
func (q *Query) TopResult() *QueryResult
TopResult retrieves the top result by confidence score, if any are available.
type QueryResult ¶ added in v0.0.2
type QueryResult struct {
// contains filtered or unexported fields
}
QueryResult represents the result of a parsed query.
func (*QueryResult) BoundingBox ¶ added in v0.0.4
func (b *QueryResult) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*QueryResult) Confidence ¶ added in v0.0.2
func (b *QueryResult) Confidence() float64
Confidence returns the confidence of the block.
func (*QueryResult) ID ¶ added in v0.0.2
func (b *QueryResult) ID() string
ID returns the identifier of the block.
func (*QueryResult) PageNumber ¶ added in v0.0.4
func (b *QueryResult) PageNumber() int
PageNumber returns the page number associated with the block.
func (*QueryResult) Polygon ¶ added in v0.0.4
func (b *QueryResult) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*QueryResult) Text ¶ added in v0.0.2
func (qr *QueryResult) Text() string
Text returns the extracted text from the query result.
type SelectionElement ¶
type SelectionElement struct {
// contains filtered or unexported fields
}
SelectionElement represents an element with selection status.
func (*SelectionElement) BoundingBox ¶ added in v0.0.4
func (b *SelectionElement) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*SelectionElement) Confidence ¶
func (b *SelectionElement) Confidence() float64
Confidence returns the confidence of the block.
func (*SelectionElement) ID ¶
func (b *SelectionElement) ID() string
ID returns the identifier of the block.
func (*SelectionElement) IsSelected ¶
func (se *SelectionElement) IsSelected() bool
IsSelected checks if the element is selected.
func (*SelectionElement) PageNumber ¶ added in v0.0.4
func (b *SelectionElement) PageNumber() int
PageNumber returns the page number associated with the block.
func (*SelectionElement) Polygon ¶ added in v0.0.4
func (b *SelectionElement) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*SelectionElement) Status ¶
func (se *SelectionElement) Status() types.SelectionStatus
Status returns the selection status of the element.
func (*SelectionElement) Text ¶ added in v0.0.5
func (se *SelectionElement) Text(optFns ...func(*TextLinearizationOptions)) string
Text returns the text representation of the selection element. It considers the selection status and applies linearization options.
type Signature ¶ added in v0.0.2
type Signature struct {
// contains filtered or unexported fields
}
func (*Signature) BoundingBox ¶ added in v0.0.4
func (b *Signature) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Signature) Confidence ¶ added in v0.0.2
func (b *Signature) Confidence() float64
Confidence returns the confidence of the block.
func (*Signature) ID ¶ added in v0.0.2
func (b *Signature) ID() string
ID returns the identifier of the block.
func (*Signature) PageNumber ¶ added in v0.0.4
func (b *Signature) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Signature) Polygon ¶ added in v0.0.4
func (b *Signature) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*Signature) Text ¶ added in v0.0.4
func (s *Signature) Text(optFns ...func(*TextLinearizationOptions)) string
type Table ¶
type Table struct {
// contains filtered or unexported fields
}
func (*Table) BoundingBox ¶ added in v0.0.4
func (b *Table) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Table) Confidence ¶
func (b *Table) Confidence() float64
Confidence returns the confidence of the block.
func (*Table) PageNumber ¶ added in v0.0.4
func (b *Table) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Table) Polygon ¶ added in v0.0.4
func (b *Table) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*Table) RowCellsAt ¶ added in v0.0.6
func (*Table) Text ¶ added in v0.0.5
func (t *Table) Text(optFns ...func(*TextLinearizationOptions)) string
type TableCell ¶ added in v0.0.4
type TableCell struct {
// contains filtered or unexported fields
}
func (*TableCell) IsColumnHeader ¶ added in v0.0.4
func (c *TableCell) IsColumnHeader() bool
func (*TableCell) IsTableFooter ¶ added in v0.0.4
func (c *TableCell) IsTableFooter() bool
func (*TableCell) IsTableSectionTitle ¶ added in v0.0.4
func (c *TableCell) IsTableSectionTitle() bool
func (*TableCell) IsTableSummary ¶ added in v0.0.4
func (c *TableCell) IsTableSummary() bool
func (*TableCell) IsTableTitle ¶ added in v0.0.4
func (c *TableCell) IsTableTitle() bool
func (*TableCell) Text ¶ added in v0.0.4
func (tc *TableCell) Text(optFns ...func(*TextLinearizationOptions)) string
type TableFooter ¶ added in v0.0.4
type TableFooter struct {
// contains filtered or unexported fields
}
TableFooter represents the footer of a table block.
func (*TableFooter) BoundingBox ¶ added in v0.0.4
func (b *TableFooter) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*TableFooter) Confidence ¶ added in v0.0.4
func (b *TableFooter) Confidence() float64
Confidence returns the confidence of the block.
func (*TableFooter) ID ¶ added in v0.0.4
func (b *TableFooter) ID() string
ID returns the identifier of the block.
func (*TableFooter) PageNumber ¶ added in v0.0.4
func (b *TableFooter) PageNumber() int
PageNumber returns the page number associated with the block.
func (*TableFooter) Polygon ¶ added in v0.0.4
func (b *TableFooter) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*TableFooter) Text ¶ added in v0.0.4
func (tf *TableFooter) Text(_ ...func(*TextLinearizationOptions)) string
Text returns the concatenated text of all words in the table footer.
func (*TableFooter) Words ¶ added in v0.0.6
func (tf *TableFooter) Words() []*Word
Words returns the words within the table footer.
type TableMergedCell ¶ added in v0.0.6
type TableMergedCell struct {
// contains filtered or unexported fields
}
func (*TableMergedCell) IsColumnHeader ¶ added in v0.0.6
func (c *TableMergedCell) IsColumnHeader() bool
func (*TableMergedCell) IsTableFooter ¶ added in v0.0.6
func (c *TableMergedCell) IsTableFooter() bool
func (*TableMergedCell) IsTableSectionTitle ¶ added in v0.0.6
func (c *TableMergedCell) IsTableSectionTitle() bool
func (*TableMergedCell) IsTableSummary ¶ added in v0.0.6
func (c *TableMergedCell) IsTableSummary() bool
func (*TableMergedCell) IsTableTitle ¶ added in v0.0.6
func (c *TableMergedCell) IsTableTitle() bool
func (*TableMergedCell) Text ¶ added in v0.0.6
func (tmc *TableMergedCell) Text(_ ...func(*TextLinearizationOptions)) string
func (*TableMergedCell) Words ¶ added in v0.0.6
func (tmc *TableMergedCell) Words() []*Word
type TableTitle ¶ added in v0.0.4
type TableTitle struct {
// contains filtered or unexported fields
}
TableTitle represents the title of a table, containing a collection of words.
func (*TableTitle) BoundingBox ¶ added in v0.0.4
func (b *TableTitle) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*TableTitle) Confidence ¶ added in v0.0.4
func (b *TableTitle) Confidence() float64
Confidence returns the confidence of the block.
func (*TableTitle) ID ¶ added in v0.0.4
func (b *TableTitle) ID() string
ID returns the identifier of the block.
func (*TableTitle) PageNumber ¶ added in v0.0.4
func (b *TableTitle) PageNumber() int
PageNumber returns the page number associated with the block.
func (*TableTitle) Polygon ¶ added in v0.0.4
func (b *TableTitle) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*TableTitle) Text ¶ added in v0.0.4
func (tt *TableTitle) Text(_ ...func(*TextLinearizationOptions)) string
Text returns the concatenated text of the table title, using default or provided linearization options.
func (*TableTitle) Words ¶ added in v0.0.6
func (tt *TableTitle) Words() []*Word
Words returns the words constituting the table title.
type TextLinearizationOptions ¶ added in v0.0.4
type TextLinearizationOptions struct { // LinearizeTables includes tables in the linearized output. LinearizeTables bool // LinearizeKeyValues includes form key and values in the linearized output. LinearizeKeyValues bool // RemoveNewLinesInListElements removes new lines in list elements. RemoveNewLinesInListElements bool // MaxNumberOfConsecutiveNewLines sets the maximum number of consecutive new lines to keep, removing extra whitespace. MaxNumberOfConsecutiveNewLines int // HideHeaderLayout hides headers in the linearized output. HideHeaderLayout bool HideFooterLayout bool // HideFigureLayout hides figures in the linearized output. HideFigureLayout bool // HidePageNumberLayout hides page numbers in the linearized output. HidePageNumberLayout bool // PageNumberPrefix is the prefix for page number layout elements. PageNumberPrefix string // PageNumberSuffix is the suffix for page number layout elements. PageNumberSuffix string // SameParagraphSeparator is the separator to use when combining elements within a text block. SameParagraphSeparator string // LayoutElementSeparator is the separator to use when combining linearized layout elements. LayoutElementSeparator string // ListElementSeparator is the separator for elements in a list layout. ListElementSeparator string // ListLayoutPrefix is the prefix for list layout elements (parent). ListLayoutPrefix string // ListLayoutSuffix is the suffix for list layout elements (parent). ListLayoutSuffix string // ListElementPrefix is the prefix for elements in a list layout (children). ListElementPrefix string // ListElementSuffix is the suffix for elements in a list layout (children). ListElementSuffix string // TitlePrefix is the prefix for title layout elements. TitlePrefix string // TitleSuffix is the suffix for title layout elements. TitleSuffix string // TableLayoutPrefix is the prefix for table elements. TableLayoutPrefix string // TableLayoutSuffix is the suffix for table elements. TableLayoutSuffix string // TableLinearizationFormat sets how to represent tables in the linearized output. Choices are plaintext or markdown. TableLinearizationFormat string // TableMinTableWords is the threshold below which tables will be rendered as words instead of using table layout. TableMinTableWords int // TableColumnSeparator is the table column separator, used when linearizing layout tables, not used if AnalyzeDocument was called with the TABLES feature. TableColumnSeparator string // TablePrefix is the prefix for table layout. TablePrefix string // TableSuffix is the suffix for table layout. TableSuffix string // TableRowSeparator is the table row separator. TableRowSeparator string // TableRowPrefix is the prefix for table row. TableRowPrefix string // TableRowSuffix is the suffix for table row. TableRowSuffix string // TableCellPrefix is the prefix for table cell. TableCellPrefix string // TableCellSuffix is the suffix for table cell. TableCellSuffix string // SectionHeaderPrefix is the prefix for section header layout elements. SectionHeaderPrefix string // SectionHeaderSuffix is the suffix for section header layout elements. SectionHeaderSuffix string // TextPrefix is the prefix for text layout elements. TextPrefix string // TextSuffix is the suffix for text layout elements. TextSuffix string // KeyValueLayoutPrefix is the prefix for key_value layout elements (not for individual key-value elements). KeyValueLayoutPrefix string // KeyValueLayoutSuffix is the suffix for key_value layout elements (not for individual key-value elements). KeyValueLayoutSuffix string // KeyValuePrefix is the prefix for key-value elements. KeyValuePrefix string // KeyValueSuffix is the suffix for key-value elements. KeyValueSuffix string // KeyPrefix is the prefix for key elements. KeyPrefix string // KeySuffix is the suffix for key elements. KeySuffix string // ValuePrefix is the prefix for value elements. ValuePrefix string // ValueSuffix is the suffix for value elements. ValueSuffix string // SelectionElementSelected is the representation for selection elements when selected. SelectionElementSelected string // SelectionElementNotSelected is the representation for selection elements when not selected. SelectionElementNotSelected string // HeuristicHTolerance sets how much the line below and above the current line should differ in width to be separated. HeuristicHTolerance float64 // HeuristicLineBreakThreshold sets how much space is acceptable between two lines before splitting them. Expressed in multiple of min heights. HeuristicLineBreakThreshold float64 // HeuristicOverlapRatio sets how much vertical overlap is tolerated between two subsequent lines before merging them into a single line. HeuristicOverlapRatio float64 // SignatureToken is the signature representation in the linearized text. SignatureToken string }
TextLinearizationOptions defines how a document is linearized into a text string.
type Value ¶ added in v0.0.4
type Value struct {
// contains filtered or unexported fields
}
func (*Value) BoundingBox ¶ added in v0.0.4
func (b *Value) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Value) Confidence ¶ added in v0.0.4
func (b *Value) Confidence() float64
Confidence returns the confidence of the block.
func (*Value) ID ¶ added in v0.0.4
func (b *Value) ID() string
ID returns the identifier of the block.
func (*Value) PageNumber ¶ added in v0.0.4
func (b *Value) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Value) Polygon ¶ added in v0.0.4
func (b *Value) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*Value) Text ¶ added in v0.0.4
func (v *Value) Text(optFns ...func(*TextLinearizationOptions)) string
type Word ¶
type Word struct {
// contains filtered or unexported fields
}
Word represents a word extracted by Textract.
func (*Word) BoundingBox ¶ added in v0.0.4
func (b *Word) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Word) Confidence ¶
func (b *Word) Confidence() float64
Confidence returns the confidence of the block.
func (*Word) IsHandwriting ¶
IsHandwriting checks if the word is handwriting.
func (*Word) PageNumber ¶ added in v0.0.4
func (b *Word) PageNumber() int
PageNumber returns the page number associated with the block.
Source Files ¶
- base.go
- block_parser.go
- document.go
- enums.go
- expense_document.go
- expense_document_parser.go
- geometry.go
- identity_document.go
- identity_document_field.go
- identity_document_parser.go
- key_value.go
- layout.go
- line.go
- options.go
- page.go
- page_parser.go
- query.go
- selection_element.go
- signature.go
- table.go
- table_cell.go
- table_footer.go
- table_title.go
- textractor.go
- word.go