Documentation
¶
Index ¶
- Variables
- type AnalyzeIDOutput
- type BoundingBox
- func (bb *BoundingBox) Area() float32
- func (bb *BoundingBox) Bottom() float32
- func (bb *BoundingBox) Height() float32
- func (bb *BoundingBox) HorizontalCenter() float32
- func (bb *BoundingBox) Intersection(other *BoundingBox) *BoundingBox
- func (bb *BoundingBox) Left() float32
- func (bb *BoundingBox) Right() float32
- func (bb *BoundingBox) String() string
- func (bb *BoundingBox) Top() float32
- func (bb *BoundingBox) VerticalCenter() float32
- func (bb *BoundingBox) Width() float32
- type BoundingBoxAccessor
- type Document
- func (d *Document) KeyValues() []*KeyValue
- func (d *Document) Lines() []*Line
- func (d *Document) Pages() []*Page
- func (d *Document) Signatures() []*Signature
- func (d *Document) Tables() []*Table
- func (d *Document) Text(optFns ...func(*TextLinearizationOptions)) string
- func (d *Document) Words() []*Word
- type DocumentAPIOutput
- type IdentityDocument
- type IdentityDocumentField
- func (idf *IdentityDocumentField) Confidence() float32
- func (idf *IdentityDocumentField) FieldType() IdentityDocumentFieldType
- func (idf *IdentityDocumentField) IsNormalized() bool
- func (idf *IdentityDocumentField) NormalizedValue() *NormalizedIdentityDocumentFieldValue
- func (idf *IdentityDocumentField) Value() string
- type IdentityDocumentFieldType
- type IdentityDocumentType
- type Key
- func (b *Key) BlockType() types.BlockType
- func (b *Key) BoundingBox() *BoundingBox
- func (b *Key) Confidence() float32
- func (b *Key) ID() string
- func (b *Key) PageNumber() int
- func (b *Key) Polygon() []*Point
- func (b *Key) Raw() types.Block
- func (k *Key) String() string
- func (k *Key) Text() string
- func (k *Key) Words() []*Word
- type KeyValue
- func (b *KeyValue) BlockType() types.BlockType
- func (kv *KeyValue) BoundingBox() *BoundingBox
- func (kv *KeyValue) Confidence() float32
- func (b *KeyValue) ID() string
- func (kv *KeyValue) Key() *Key
- func (b *KeyValue) PageNumber() int
- func (kv *KeyValue) Polygon() []*Point
- func (b *KeyValue) Raw() types.Block
- func (kv *KeyValue) TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word)
- func (kv *KeyValue) Value() *Value
- func (kv *KeyValue) Words() []*Word
- type Layout
- func (l *Layout) AddChildren(children ...LayoutChild)
- func (b *Layout) BlockType() types.BlockType
- func (b *Layout) BoundingBox() *BoundingBox
- func (b *Layout) Confidence() float32
- func (b *Layout) ID() string
- func (b *Layout) PageNumber() int
- func (b *Layout) Polygon() []*Point
- func (b *Layout) Raw() types.Block
- func (l *Layout) Text(optFns ...func(*TextLinearizationOptions)) string
- func (l *Layout) TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word)
- type LayoutChild
- type Line
- func (b *Line) BlockType() types.BlockType
- func (b *Line) BoundingBox() *BoundingBox
- func (b *Line) Confidence() float32
- func (b *Line) ID() string
- func (b *Line) PageNumber() int
- func (b *Line) Polygon() []*Point
- func (b *Line) Raw() types.Block
- func (l *Line) Text(_ ...func(*TextLinearizationOptions)) string
- func (l *Line) TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word)
- func (l *Line) Words() []*Word
- type NormalizedIdentityDocumentFieldValue
- type Orientation
- type Page
- func (p *Page) AddLayouts(layouts ...*Layout)
- func (p *Page) Height() float32
- func (p *Page) ID() string
- func (p *Page) KeyValues() []*KeyValue
- func (p *Page) Layouts() []*Layout
- func (p *Page) Lines() []*Line
- func (p *Page) Number() int
- func (p *Page) Queries() []*Query
- func (p *Page) SearchValueByKey(key string) []*KeyValue
- func (p *Page) Signatures() []*Signature
- func (p *Page) Tables() []*Table
- func (p *Page) Text(optFns ...func(*TextLinearizationOptions)) string
- func (p *Page) Width() float32
- func (p *Page) Words() []*Word
- type Point
- type Query
- type QueryResult
- func (b *QueryResult) BlockType() types.BlockType
- func (b *QueryResult) BoundingBox() *BoundingBox
- func (qr *QueryResult) Confidence() float32
- func (b *QueryResult) ID() string
- func (b *QueryResult) PageNumber() int
- func (b *QueryResult) Polygon() []*Point
- func (b *QueryResult) Raw() types.Block
- func (qr *QueryResult) Text() string
- type SelectionElement
- func (b *SelectionElement) BlockType() types.BlockType
- func (b *SelectionElement) BoundingBox() *BoundingBox
- func (b *SelectionElement) Confidence() float32
- func (b *SelectionElement) ID() string
- func (se *SelectionElement) IsSelected() bool
- func (b *SelectionElement) PageNumber() int
- func (b *SelectionElement) Polygon() []*Point
- func (b *SelectionElement) Raw() types.Block
- func (se *SelectionElement) Status() types.SelectionStatus
- func (se *SelectionElement) TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word)
- func (se *SelectionElement) Words() []*Word
- type Signature
- func (b *Signature) BlockType() types.BlockType
- func (b *Signature) BoundingBox() *BoundingBox
- func (b *Signature) Confidence() float32
- func (b *Signature) ID() string
- func (b *Signature) PageNumber() int
- func (b *Signature) Polygon() []*Point
- func (b *Signature) Raw() types.Block
- func (s *Signature) Text(optFns ...func(*TextLinearizationOptions)) string
- func (s *Signature) TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word)
- func (s *Signature) Words() []*Word
- type Table
- type TableCell
- func (b *TableCell) BlockType() types.BlockType
- func (b *TableCell) BoundingBox() *BoundingBox
- func (tc *TableCell) Confidence() float32
- func (b *TableCell) ID() string
- func (tc *TableCell) IsColumnHeader() bool
- func (tc *TableCell) IsTableFooter() bool
- func (tc *TableCell) IsTableSectionTitle() bool
- func (tc *TableCell) IsTableSummary() bool
- func (tc *TableCell) IsTableTitle() bool
- func (b *TableCell) PageNumber() int
- func (b *TableCell) Polygon() []*Point
- func (b *TableCell) Raw() types.Block
- func (tc *TableCell) Text() string
- type TableFooter
- func (b *TableFooter) BlockType() types.BlockType
- func (b *TableFooter) BoundingBox() *BoundingBox
- func (b *TableFooter) Confidence() float32
- func (b *TableFooter) ID() string
- func (b *TableFooter) PageNumber() int
- func (b *TableFooter) Polygon() []*Point
- func (b *TableFooter) Raw() types.Block
- func (tf *TableFooter) Text() string
- type TableRow
- type TableTitle
- func (b *TableTitle) BlockType() types.BlockType
- func (b *TableTitle) BoundingBox() *BoundingBox
- func (b *TableTitle) Confidence() float32
- func (b *TableTitle) ID() string
- func (b *TableTitle) PageNumber() int
- func (b *TableTitle) Polygon() []*Point
- func (b *TableTitle) Raw() types.Block
- func (tt *TableTitle) Text() string
- type TextLinearizationOptions
- type Value
- func (b *Value) BlockType() types.BlockType
- func (b *Value) BoundingBox() *BoundingBox
- func (b *Value) Confidence() float32
- func (b *Value) ID() string
- func (b *Value) PageNumber() int
- func (b *Value) Polygon() []*Point
- func (b *Value) Raw() types.Block
- func (v *Value) String() string
- func (v *Value) Text() string
- func (v *Value) TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word)
- func (v *Value) Words() []*Word
- type Word
- func (b *Word) BlockType() types.BlockType
- func (b *Word) BoundingBox() *BoundingBox
- func (b *Word) Confidence() float32
- func (b *Word) ID() string
- func (w *Word) IsHandwriting() bool
- func (w *Word) IsPrinted() bool
- func (b *Word) PageNumber() int
- func (b *Word) Polygon() []*Point
- func (b *Word) Raw() types.Block
- func (w *Word) Text() string
- func (w *Word) TextType() types.TextType
Constants ¶
This section is empty.
Variables ¶
var DefaultLinerizationOptions = TextLinearizationOptions{ LinearizeTables: true, LinearizeKeyValues: true, RemoveNewLinesInLeafElements: true, MaxNumberOfConsecutiveNewLines: 2, HideHeaderLayout: false, HideFooterLayout: false, HideFigureLayout: false, HidePageNumberLayout: false, PageNumberPrefix: "", PageNumberSuffix: "", SameParagraphSeparator: " ", LayoutElementSeparator: "\n\n", ListElementSeparator: "\n", ListLayoutPrefix: "", ListLayoutSuffix: "", ListElementPrefix: "", ListElementSuffix: "", TitlePrefix: "", TitleSuffix: "", TableLayoutPrefix: "\n\n", TableLayoutSuffix: "\n", TableRemoveColumnHeaders: false, TableLinearizationFormat: "plaintext", TableTabulateFormat: "github", TableMinTableWords: 0, TableColumnSeparator: "\t", TablePrefix: "", TableSuffix: "", TableRowSeparator: "\n", TableRowPrefix: "", TableRowSuffix: "", TableCellPrefix: "", TableCellSuffix: "", SectionHeaderPrefix: "", SectionHeaderSuffix: "", TextPrefix: "", TextSuffix: "", KeyValueLayoutPrefix: "\n\n", KeyValueLayoutSuffix: "", KeyValuePrefix: "", KeyValueSuffix: "", ValuePrefix: "", ValueSuffix: "", SelectionElementSelected: "[X]", SelectionElementNotSelected: "[ ]", HeuristicHTolerance: 0.3, HeuristicLineBreakThreshold: 0.9, HeuristicOverlapRatio: 0.5, SignatureToken: "[SIGNATURE]", AddPrefixesAndSuffixesAsWords: false, AddPrefixesAndSuffixesInText: true, }
Functions ¶
This section is empty.
Types ¶
type AnalyzeIDOutput ¶ added in v0.0.4
type AnalyzeIDOutput struct { DocumentMetadata *types.DocumentMetadata `json:"DocumentMetadata"` IdentityDocuments []types.IdentityDocument `json:"IdentityDocuments"` }
type BoundingBox ¶
type BoundingBox struct {
// contains filtered or unexported fields
}
func NewEnclosingBoundingBox ¶ added in v0.0.4
func NewEnclosingBoundingBox[T BoundingBoxAccessor](accessors ...T) *BoundingBox
NewEnclosingBoundingBox returns a new bounding box that represents the union of multiple bounding boxes.
func (*BoundingBox) Area ¶ added in v0.0.4
func (bb *BoundingBox) Area() float32
Area calculates and returns the area of the bounding box. If either the width or height of the bounding box is less than zero, the area is considered zero to prevent negative area values.
func (*BoundingBox) Bottom ¶
func (bb *BoundingBox) Bottom() float32
Bottom returns the bottom coordinate of the bounding box.
func (*BoundingBox) Height ¶
func (bb *BoundingBox) Height() float32
func (*BoundingBox) HorizontalCenter ¶
func (bb *BoundingBox) HorizontalCenter() float32
HorizontalCenter returns the horizontal center coordinate of the bounding box.
func (*BoundingBox) Intersection ¶
func (bb *BoundingBox) Intersection(other *BoundingBox) *BoundingBox
Intersection returns a new bounding box that represents the intersection of two bounding boxes.
func (*BoundingBox) Left ¶
func (bb *BoundingBox) Left() float32
func (*BoundingBox) Right ¶
func (bb *BoundingBox) Right() float32
Right returns the right coordinate of the bounding box.
func (*BoundingBox) String ¶ added in v0.0.2
func (bb *BoundingBox) String() string
String returns a string representation of the bounding box.
func (*BoundingBox) Top ¶
func (bb *BoundingBox) Top() float32
func (*BoundingBox) VerticalCenter ¶
func (bb *BoundingBox) VerticalCenter() float32
VerticalCenter returns the vertical center coordinate of the bounding box.
func (*BoundingBox) Width ¶
func (bb *BoundingBox) Width() float32
type BoundingBoxAccessor ¶ added in v0.0.4
type BoundingBoxAccessor interface {
BoundingBox() *BoundingBox
}
type Document ¶
type Document struct {
// contains filtered or unexported fields
}
func ParseDocumentAPIOutput ¶ added in v0.0.4
func ParseDocumentAPIOutput(output *DocumentAPIOutput) (*Document, error)
func (*Document) Signatures ¶ added in v0.0.4
func (*Document) Text ¶ added in v0.0.4
func (d *Document) Text(optFns ...func(*TextLinearizationOptions)) string
type DocumentAPIOutput ¶ added in v0.0.4
type DocumentAPIOutput struct { DocumentMetadata *types.DocumentMetadata `json:"DocumentMetadata"` Blocks []types.Block `json:"Blocks"` }
type IdentityDocument ¶ added in v0.0.2
type IdentityDocument struct {
// contains filtered or unexported fields
}
func ParseAnalyzeIDOutput ¶ added in v0.0.4
func ParseAnalyzeIDOutput(output *AnalyzeIDOutput) ([]*IdentityDocument, error)
func (*IdentityDocument) Document ¶ added in v0.0.4
func (id *IdentityDocument) Document() *Document
func (*IdentityDocument) FieldByType ¶ added in v0.0.3
func (id *IdentityDocument) FieldByType(ft IdentityDocumentFieldType) *IdentityDocumentField
func (*IdentityDocument) Fields ¶ added in v0.0.3
func (id *IdentityDocument) Fields() []*IdentityDocumentField
func (*IdentityDocument) IdentityDocumentType ¶ added in v0.0.4
func (id *IdentityDocument) IdentityDocumentType() IdentityDocumentType
type IdentityDocumentField ¶ added in v0.0.3
type IdentityDocumentField struct {
// contains filtered or unexported fields
}
IdentityDocumentField represents a field extracted from an identity document by Textract.
func (*IdentityDocumentField) Confidence ¶ added in v0.0.3
func (idf *IdentityDocumentField) Confidence() float32
Confidence returns the confidence score associated with the field extraction.
func (*IdentityDocumentField) FieldType ¶ added in v0.0.4
func (idf *IdentityDocumentField) FieldType() IdentityDocumentFieldType
FieldType returns the type of the identity document field.
func (*IdentityDocumentField) IsNormalized ¶ added in v0.0.3
func (idf *IdentityDocumentField) IsNormalized() bool
IsNormalized checks if the field value is normalized.
func (*IdentityDocumentField) NormalizedValue ¶ added in v0.0.3
func (idf *IdentityDocumentField) NormalizedValue() *NormalizedIdentityDocumentFieldValue
NormalizedValue returns the normalized value of the identity document field.
func (*IdentityDocumentField) Value ¶ added in v0.0.3
func (idf *IdentityDocumentField) Value() string
Value returns the value of the identity document field.
type IdentityDocumentFieldType ¶ added in v0.0.3
type IdentityDocumentFieldType string
IdentityDocumentFieldType represents the type of fields in an identity document.
const ( IdentityDocumentFieldTypeFirstName IdentityDocumentFieldType = "FIRST_NAME" IdentityDocumentFieldTypeLastName IdentityDocumentFieldType = "LAST_NAME" IdentityDocumentFieldTypeMiddleName IdentityDocumentFieldType = "MIDDLE_NAME" IdentityDocumentFieldTypeSuffix IdentityDocumentFieldType = "Suffix" IdentityDocumentFieldTypeCityInAddress IdentityDocumentFieldType = "CITY_IN_ADDRESS" IdentityDocumentFieldTypeZipCodeInAddress IdentityDocumentFieldType = "ZIP_CODE_IN_ADDRESS" IdentityDocumentFieldTypeStateInAddress IdentityDocumentFieldType = "STATE_IN_ADDRESS" IdentityDocumentFieldTypeStateName IdentityDocumentFieldType = "STATE_NAME" IdentityDocumentFieldTypeDocumentNumber IdentityDocumentFieldType = "DOCUMENT_NUMBER" IdentityDocumentFieldTypeExpirationDate IdentityDocumentFieldType = "EXPIRATION_DATE" IdentityDocumentFieldTypeDateOfBirth IdentityDocumentFieldType = "DATE_OF_BIRTH" IdentityDocumentFieldTypeDateOfIssue IdentityDocumentFieldType = "DATE_OF_ISSUE" IdentityDocumentFieldTypeIDType IdentityDocumentFieldType = "ID_TYPE" IdentityDocumentFieldTypeEndorsements IdentityDocumentFieldType = "ENDORSEMENTS" IdentityDocumentFieldTypeVeteran IdentityDocumentFieldType = "VETERAN" IdentityDocumentFieldTypeRestrictions IdentityDocumentFieldType = "RESTRICTIONS" IdentityDocumentFieldTypeClass IdentityDocumentFieldType = "CLASS" IdentityDocumentFieldTypeAddress IdentityDocumentFieldType = "ADDRESS" IdentityDocumentFieldTypeCounty IdentityDocumentFieldType = "COUNTY" IdentityDocumentFieldTypePlaceOfBirth IdentityDocumentFieldType = "PLACE_OF_BIRTH" IdentityDocumentFieldTypeOther IdentityDocumentFieldType = "Other" )
type IdentityDocumentType ¶ added in v0.0.3
type IdentityDocumentType string
IdentityDocumentType represents the type of an identity document.
const ( IdentityDocumentTypeDrivingLicense IdentityDocumentType = "DRIVER LICENSE FRONT" IdentityDocumentTypePassport IdentityDocumentType = "PASSPORT" IdentityDocumentTypeOther IdentityDocumentType = "OTHER" )
type Key ¶ added in v0.0.4
type Key struct {
// contains filtered or unexported fields
}
func (*Key) BoundingBox ¶ added in v0.0.4
func (b *Key) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Key) Confidence ¶ added in v0.0.4
func (b *Key) Confidence() float32
Confidence returns the confidence of the block.
func (*Key) PageNumber ¶ added in v0.0.4
func (b *Key) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Key) Polygon ¶ added in v0.0.4
func (b *Key) Polygon() []*Point
Polygon returns the polygon information of the block.
type KeyValue ¶ added in v0.0.4
type KeyValue struct {
// contains filtered or unexported fields
}
func (*KeyValue) BoundingBox ¶ added in v0.0.4
func (kv *KeyValue) BoundingBox() *BoundingBox
func (*KeyValue) Confidence ¶ added in v0.0.4
Confidence calculates the confidence score for a key value.
func (*KeyValue) ID ¶ added in v0.0.4
func (b *KeyValue) ID() string
ID returns the identifier of the block.
func (*KeyValue) PageNumber ¶ added in v0.0.4
func (b *KeyValue) PageNumber() int
PageNumber returns the page number associated with the block.
func (*KeyValue) TextAndWords ¶ added in v0.0.4
func (kv *KeyValue) TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word)
type Layout ¶ added in v0.0.4
type Layout struct {
// contains filtered or unexported fields
}
func (*Layout) AddChildren ¶ added in v0.0.4
func (l *Layout) AddChildren(children ...LayoutChild)
func (*Layout) BoundingBox ¶ added in v0.0.4
func (b *Layout) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Layout) Confidence ¶ added in v0.0.4
func (b *Layout) Confidence() float32
Confidence returns the confidence of the block.
func (*Layout) ID ¶ added in v0.0.4
func (b *Layout) ID() string
ID returns the identifier of the block.
func (*Layout) PageNumber ¶ added in v0.0.4
func (b *Layout) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Layout) Polygon ¶ added in v0.0.4
func (b *Layout) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*Layout) Text ¶ added in v0.0.4
func (l *Layout) Text(optFns ...func(*TextLinearizationOptions)) string
func (*Layout) TextAndWords ¶ added in v0.0.4
func (l *Layout) TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word)
type LayoutChild ¶ added in v0.0.4
type LayoutChild interface { ID() string TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word) BoundingBox() *BoundingBox }
type Line ¶
type Line struct {
// contains filtered or unexported fields
}
func (*Line) BoundingBox ¶ added in v0.0.4
func (b *Line) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Line) Confidence ¶
func (b *Line) Confidence() float32
Confidence returns the confidence of the block.
func (*Line) PageNumber ¶ added in v0.0.4
func (b *Line) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Line) Polygon ¶ added in v0.0.4
func (b *Line) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*Line) Text ¶
func (l *Line) Text(_ ...func(*TextLinearizationOptions)) string
func (*Line) TextAndWords ¶ added in v0.0.4
func (l *Line) TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word)
type NormalizedIdentityDocumentFieldValue ¶ added in v0.0.4
type NormalizedIdentityDocumentFieldValue struct {
// contains filtered or unexported fields
}
NormalizedIdentityDocumentFieldValue represents a normalized value of an identity document field.
func (NormalizedIdentityDocumentFieldValue) DateValue ¶ added in v0.0.4
func (nidfv NormalizedIdentityDocumentFieldValue) DateValue() (time.Time, error)
DateValue returns the time representation of the normalized date value.
func (NormalizedIdentityDocumentFieldValue) Value ¶ added in v0.0.4
func (nidfv NormalizedIdentityDocumentFieldValue) Value() string
Value returns the string representation of the normalized value.
func (NormalizedIdentityDocumentFieldValue) ValueType ¶ added in v0.0.4
func (nidfv NormalizedIdentityDocumentFieldValue) ValueType() types.ValueType
ValueType returns the type of the normalized value.
type Orientation ¶
type Orientation struct {
// contains filtered or unexported fields
}
Orientation represents the orientation of a geometric element.
func (*Orientation) Degrees ¶
func (o *Orientation) Degrees() float32
Degrees returns the orientation in degrees.
func (*Orientation) Radians ¶
func (o *Orientation) Radians() float32
Radians returns the orientation in radians.
type Page ¶
type Page struct {
// contains filtered or unexported fields
}
func (*Page) AddLayouts ¶ added in v0.0.4
func (*Page) SearchValueByKey ¶ added in v0.0.4
func (*Page) Signatures ¶ added in v0.0.2
func (*Page) Text ¶
func (p *Page) Text(optFns ...func(*TextLinearizationOptions)) string
type Point ¶
type Point struct {
// contains filtered or unexported fields
}
Point represents a 2D point.
type Query ¶ added in v0.0.2
type Query struct {
// contains filtered or unexported fields
}
Query represents a query with associated information, including an identifier, text, alias, query pages, results, a page, and raw block data.
func (*Query) ResultsByConfidence ¶ added in v0.0.2
func (q *Query) ResultsByConfidence() []*QueryResult
ResultsByConfidence lists this query instance's results, sorted from most to least confident.
func (*Query) TopResult ¶ added in v0.0.2
func (q *Query) TopResult() *QueryResult
TopResult retrieves the top result by confidence score, if any are available.
type QueryResult ¶ added in v0.0.2
type QueryResult struct {
// contains filtered or unexported fields
}
QueryResult represents the result of a parsed query.
func (*QueryResult) BoundingBox ¶ added in v0.0.4
func (b *QueryResult) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*QueryResult) Confidence ¶ added in v0.0.2
func (qr *QueryResult) Confidence() float32
Confidence returns the confidence level of the query result.
func (*QueryResult) ID ¶ added in v0.0.2
func (b *QueryResult) ID() string
ID returns the identifier of the block.
func (*QueryResult) PageNumber ¶ added in v0.0.4
func (b *QueryResult) PageNumber() int
PageNumber returns the page number associated with the block.
func (*QueryResult) Polygon ¶ added in v0.0.4
func (b *QueryResult) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*QueryResult) Text ¶ added in v0.0.2
func (qr *QueryResult) Text() string
Text returns the extracted text from the query result.
type SelectionElement ¶
type SelectionElement struct {
// contains filtered or unexported fields
}
func (*SelectionElement) BoundingBox ¶ added in v0.0.4
func (b *SelectionElement) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*SelectionElement) Confidence ¶
func (b *SelectionElement) Confidence() float32
Confidence returns the confidence of the block.
func (*SelectionElement) ID ¶
func (b *SelectionElement) ID() string
ID returns the identifier of the block.
func (*SelectionElement) IsSelected ¶
func (se *SelectionElement) IsSelected() bool
IsSelected checks if the element is selected.
func (*SelectionElement) PageNumber ¶ added in v0.0.4
func (b *SelectionElement) PageNumber() int
PageNumber returns the page number associated with the block.
func (*SelectionElement) Polygon ¶ added in v0.0.4
func (b *SelectionElement) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*SelectionElement) Status ¶
func (se *SelectionElement) Status() types.SelectionStatus
Status returns the selection status of the element.
func (*SelectionElement) TextAndWords ¶ added in v0.0.4
func (se *SelectionElement) TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word)
func (*SelectionElement) Words ¶ added in v0.0.4
func (se *SelectionElement) Words() []*Word
type Signature ¶ added in v0.0.2
type Signature struct {
// contains filtered or unexported fields
}
func (*Signature) BoundingBox ¶ added in v0.0.4
func (b *Signature) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Signature) Confidence ¶ added in v0.0.2
func (b *Signature) Confidence() float32
Confidence returns the confidence of the block.
func (*Signature) ID ¶ added in v0.0.2
func (b *Signature) ID() string
ID returns the identifier of the block.
func (*Signature) PageNumber ¶ added in v0.0.4
func (b *Signature) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Signature) Polygon ¶ added in v0.0.4
func (b *Signature) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*Signature) Text ¶ added in v0.0.4
func (s *Signature) Text(optFns ...func(*TextLinearizationOptions)) string
func (*Signature) TextAndWords ¶ added in v0.0.4
func (s *Signature) TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word)
type Table ¶
type Table struct {
// contains filtered or unexported fields
}
func (*Table) BoundingBox ¶ added in v0.0.4
func (b *Table) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Table) Confidence ¶
func (b *Table) Confidence() float32
Confidence returns the confidence of the block.
func (*Table) PageNumber ¶ added in v0.0.4
func (b *Table) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Table) Polygon ¶ added in v0.0.4
func (b *Table) Polygon() []*Point
Polygon returns the polygon information of the block.
type TableCell ¶ added in v0.0.4
type TableCell struct {
// contains filtered or unexported fields
}
func (*TableCell) BoundingBox ¶ added in v0.0.4
func (b *TableCell) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*TableCell) Confidence ¶ added in v0.0.4
func (*TableCell) ID ¶ added in v0.0.4
func (b *TableCell) ID() string
ID returns the identifier of the block.
func (*TableCell) IsColumnHeader ¶ added in v0.0.4
func (*TableCell) IsTableFooter ¶ added in v0.0.4
func (*TableCell) IsTableSectionTitle ¶ added in v0.0.4
func (*TableCell) IsTableSummary ¶ added in v0.0.4
func (*TableCell) IsTableTitle ¶ added in v0.0.4
func (*TableCell) PageNumber ¶ added in v0.0.4
func (b *TableCell) PageNumber() int
PageNumber returns the page number associated with the block.
func (*TableCell) Polygon ¶ added in v0.0.4
func (b *TableCell) Polygon() []*Point
Polygon returns the polygon information of the block.
type TableFooter ¶ added in v0.0.4
type TableFooter struct {
// contains filtered or unexported fields
}
func (*TableFooter) BoundingBox ¶ added in v0.0.4
func (b *TableFooter) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*TableFooter) Confidence ¶ added in v0.0.4
func (b *TableFooter) Confidence() float32
Confidence returns the confidence of the block.
func (*TableFooter) ID ¶ added in v0.0.4
func (b *TableFooter) ID() string
ID returns the identifier of the block.
func (*TableFooter) PageNumber ¶ added in v0.0.4
func (b *TableFooter) PageNumber() int
PageNumber returns the page number associated with the block.
func (*TableFooter) Polygon ¶ added in v0.0.4
func (b *TableFooter) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*TableFooter) Text ¶ added in v0.0.4
func (tf *TableFooter) Text() string
type TableTitle ¶ added in v0.0.4
type TableTitle struct {
// contains filtered or unexported fields
}
func (*TableTitle) BoundingBox ¶ added in v0.0.4
func (b *TableTitle) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*TableTitle) Confidence ¶ added in v0.0.4
func (b *TableTitle) Confidence() float32
Confidence returns the confidence of the block.
func (*TableTitle) ID ¶ added in v0.0.4
func (b *TableTitle) ID() string
ID returns the identifier of the block.
func (*TableTitle) PageNumber ¶ added in v0.0.4
func (b *TableTitle) PageNumber() int
PageNumber returns the page number associated with the block.
func (*TableTitle) Polygon ¶ added in v0.0.4
func (b *TableTitle) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*TableTitle) Text ¶ added in v0.0.4
func (tt *TableTitle) Text() string
type TextLinearizationOptions ¶ added in v0.0.4
type TextLinearizationOptions struct { // LinearizeTables includes tables in the linearized output. LinearizeTables bool // LinearizeKeyValues includes form key and values in the linearized output. LinearizeKeyValues bool // RemoveNewLinesInLeafElements removes new lines in leaf layout elements, removing extra whitespace. RemoveNewLinesInLeafElements bool // MaxNumberOfConsecutiveNewLines sets the maximum number of consecutive new lines to keep, removing extra whitespace. MaxNumberOfConsecutiveNewLines int // HideHeaderLayout hides headers in the linearized output. HideHeaderLayout bool HideFooterLayout bool // HideFigureLayout hides figures in the linearized output. HideFigureLayout bool // HidePageNumberLayout hides page numbers in the linearized output. HidePageNumberLayout bool // PageNumberPrefix is the prefix for page number layout elements. PageNumberPrefix string // PageNumberSuffix is the suffix for page number layout elements. PageNumberSuffix string // SameParagraphSeparator is the separator to use when combining elements within a text block. SameParagraphSeparator string // LayoutElementSeparator is the separator to use when combining linearized layout elements. LayoutElementSeparator string // ListElementSeparator is the separator for elements in a list layout. ListElementSeparator string // ListLayoutPrefix is the prefix for list layout elements (parent). ListLayoutPrefix string // ListLayoutSuffix is the suffix for list layout elements (parent). ListLayoutSuffix string // ListElementPrefix is the prefix for elements in a list layout (children). ListElementPrefix string // ListElementSuffix is the suffix for elements in a list layout (children). ListElementSuffix string // TitlePrefix is the prefix for title layout elements. TitlePrefix string // TitleSuffix is the suffix for title layout elements. TitleSuffix string // TableLayoutPrefix is the prefix for table elements. TableLayoutPrefix string // TableLayoutSuffix is the suffix for table elements. TableLayoutSuffix string // TableRemoveColumnHeaders removes column headers from tables. TableRemoveColumnHeaders bool // TableLinearizationFormat sets how to represent tables in the linearized output. Choices are plaintext or markdown. TableLinearizationFormat string // TableTabulateFormat is the markdown tabulate format to use when tables are linearized as markdown. TableTabulateFormat string // TableMinTableWords is the threshold below which tables will be rendered as words instead of using table layout. TableMinTableWords int // TableColumnSeparator is the table column separator, used when linearizing layout tables, not used if AnalyzeDocument was called with the TABLES feature. TableColumnSeparator string // TablePrefix is the prefix for table layout. TablePrefix string // TableSuffix is the suffix for table layout. TableSuffix string // TableRowSeparator is the table row separator. TableRowSeparator string // TableRowPrefix is the prefix for table row. TableRowPrefix string // TableRowSuffix is the suffix for table row. TableRowSuffix string // TableCellPrefix is the prefix for table cell. TableCellPrefix string // TableCellSuffix is the suffix for table cell. TableCellSuffix string // SectionHeaderPrefix is the prefix for section header layout elements. SectionHeaderPrefix string // SectionHeaderSuffix is the suffix for section header layout elements. SectionHeaderSuffix string // TextPrefix is the prefix for text layout elements. TextPrefix string // TextSuffix is the suffix for text layout elements. TextSuffix string // KeyValueLayoutPrefix is the prefix for key_value layout elements (not for individual key-value elements). KeyValueLayoutPrefix string // KeyValueLayoutSuffix is the suffix for key_value layout elements (not for individual key-value elements). KeyValueLayoutSuffix string // KeyValuePrefix is the prefix for key-value elements. KeyValuePrefix string // KeyValueSuffix is the suffix for key-value elements. KeyValueSuffix string // KeyPrefix is the prefix for key elements. KeyPrefix string // KeySuffix is the suffix for key elements. KeySuffix string // ValuePrefix is the prefix for value elements. ValuePrefix string // ValueSuffix is the suffix for value elements. ValueSuffix string // SelectionElementSelected is the representation for selection elements when selected. SelectionElementSelected string // SelectionElementNotSelected is the representation for selection elements when not selected. SelectionElementNotSelected string // HeuristicHTolerance sets how much the line below and above the current line should differ in width to be separated. HeuristicHTolerance float32 // HeuristicLineBreakThreshold sets how much space is acceptable between two lines before splitting them. Expressed in multiple of min heights. HeuristicLineBreakThreshold float32 // HeuristicOverlapRatio sets how much vertical overlap is tolerated between two subsequent lines before merging them into a single line. HeuristicOverlapRatio float32 // SignatureToken is the signature representation in the linearized text. SignatureToken string // AddPrefixesAndSuffixesAsWords controls if the prefixes/suffixes will be inserted in the words returned by `get_text_and_words`. AddPrefixesAndSuffixesAsWords bool // AddPrefixesAndSuffixesInText controls if the prefixes/suffixes will be added to the linearized text. AddPrefixesAndSuffixesInText bool }
TextLinearizationOptions defines how a document is linearized into a text string.
type Value ¶ added in v0.0.4
type Value struct {
// contains filtered or unexported fields
}
func (*Value) BoundingBox ¶ added in v0.0.4
func (b *Value) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Value) Confidence ¶ added in v0.0.4
func (b *Value) Confidence() float32
Confidence returns the confidence of the block.
func (*Value) ID ¶ added in v0.0.4
func (b *Value) ID() string
ID returns the identifier of the block.
func (*Value) PageNumber ¶ added in v0.0.4
func (b *Value) PageNumber() int
PageNumber returns the page number associated with the block.
func (*Value) Polygon ¶ added in v0.0.4
func (b *Value) Polygon() []*Point
Polygon returns the polygon information of the block.
func (*Value) TextAndWords ¶ added in v0.0.4
func (v *Value) TextAndWords(optFns ...func(*TextLinearizationOptions)) (string, []*Word)
type Word ¶
type Word struct {
// contains filtered or unexported fields
}
Word represents a word extracted by Textract.
func (*Word) BoundingBox ¶ added in v0.0.4
func (b *Word) BoundingBox() *BoundingBox
BoundingBox returns the bounding box information of the block.
func (*Word) Confidence ¶
func (b *Word) Confidence() float32
Confidence returns the confidence of the block.
func (*Word) IsHandwriting ¶
IsHandwriting checks if the word is handwriting.
func (*Word) PageNumber ¶ added in v0.0.4
func (b *Word) PageNumber() int
PageNumber returns the page number associated with the block.
Source Files
¶
- base.go
- block_parser.go
- document.go
- enums.go
- geometry.go
- identity_document.go
- identity_document_field.go
- identity_document_parser.go
- key_value.go
- layout.go
- line.go
- options.go
- page.go
- page_parser.go
- query.go
- selection_element.go
- signature.go
- table.go
- table_cell.go
- table_footer.go
- table_title.go
- textractor.go
- word.go