Documentation ¶
Overview ¶
Package convert extends the interface for the character encodings that transform text to and from Unicode UTF-8.
Index ¶
- Constants
- Variables
- func EncodeAlias(name string) string
- func EncodeUTF32(name string) encoding.Encoding
- func Encoder(name string) (encoding.Encoding, error)
- func EqualLB(r, nl [2]rune) bool
- func Humanize(name string) string
- func Picture(b byte) rune
- func Shorten(name string) string
- func Swap(code rune) rune
- type Convert
- func (c *Convert) ANSI(b ...byte) ([]rune, error)
- func (c *Convert) ANSIControls() *Convert
- func (c *Convert) Chars(b ...byte) ([]rune, error)
- func (c *Convert) Dump(b ...byte) ([]rune, error)
- func (c *Convert) FixJISTable()
- func (c *Convert) LineBreak()
- func (c *Convert) RunesControls()
- func (c *Convert) RunesControlsEBCDIC()
- func (c *Convert) RunesDOS()
- func (c *Convert) RunesEBCDIC()
- func (c *Convert) RunesKOI8()
- func (c *Convert) RunesLatin()
- func (c *Convert) RunesMacintosh()
- func (c *Convert) RunesShiftJIS()
- func (c *Convert) RunesUTF8()
- func (c *Convert) RunesWindows()
- func (c *Convert) RunesXRows()
- func (c *Convert) SkipCode() *Convert
- func (c *Convert) SkipIgnore(i int) bool
- func (c *Convert) Swap() (*Convert, error)
- func (c *Convert) Swaps() (*Convert, error)
- func (c *Convert) Text(b ...byte) ([]rune, error)
- func (c *Convert) Transform() error
- type Flag
Examples ¶
Constants ¶
const ( // NUL Null control code. NUL = iota // SOH Start of heading. SOH // STX Start of text. STX // ETX End of text. ETX // EOT End of transmission. EOT // ENQ Enquiry. ENQ // ACK Acknowledge. ACK // BEL Bell or alert. BEL // BS Backspace. BS // HT Horizontal tabulation. HT // LF Line feed. LF // VT Vertical tabulation. VT // FF Form feed. FF // CR Carriage return. CR // SO Shift out. SO // SI Shift in. SI // DLE Data Link Escape. DLE // DC1 Device control one. DC1 // DC2 Device control two. DC2 // DC3 Device control three. DC3 // DC4 Device control four. DC4 // NAK Negative acknowledge. NAK // SYN Synchronous idle. SYN // ETB End of transmission block. ETB // CAN Cancel. CAN // EM End of medium. EM // SUB Substitute. SUB // ESC Escape. ESC // FS File separator. FS // GS Group separator. GS // RS Record separator. RS // US Unit separator. US // SP Space. SP )
The common ASCII and Unicode control decimal values.
const ( // LeftSquareBracket [. LeftSquareBracket = 91 // VerticalBar |. VerticalBar = 124 // DEL Delete. DEL = 127 // Dash Hyphen -. Dash = 150 // Nbsp Non-breaking space. Nbsp = 160 // InvertedExclamation ¡. InvertedExclamation = 161 // Cent ¢. Cent = 162 // BrokenBar ¦. BrokenBar = 166 // Negation ¬. Negation = 172 // PlusMinus ±. PlusMinus = 177 // LightVertical light vertical │. LightVertical = 179 // SquareRoot Square root √. SquareRoot = 251 // NBSP Non-breaking space. NBSP = 255 // Delta Δ. Delta = 916 // LeftwardsArrow ←. LeftwardsArrow = 8592 // SquareRootU Unicode square root √. SquareRootU = 8730 // House ⌂. House = 8962 // IntegralExtension ⎮. IntegralExtension = 9134 // SymbolNUL ␀. SymbolNUL = 9216 // SymbolESC ␛. SymbolESC = 9243 // SymbolDEL ␡. SymbolDEL = 9249 // LightVerticalU Box drawing light vertical │. LightVerticalU = 9474 // CheckMark ✓. CheckMark = 10003 // Replacement character �. Replacement = 65533 // Open Box ␣. OpenBox = 9251 )
Special ASCII and Unicode character codes and symbols.
const (
Row8 = 128 // the first cell after the end of a 7-bit row.
)
Variables ¶
var ( ErrANSI = errors.New("ansi controls must be chained to c.swap") ErrBytes = errors.New("cannot transform an empty byte slice") ErrEncode = errors.New("no input encoding provided") ErrName = errors.New("unknown or unsupported code page name or alias") ErrOutput = errors.New("nothing to output") ErrWidth = errors.New("cannot find the number columns from using line break") ErrWrap = errors.New("wrap width must be chained to c.swap") )
Functions ¶
func EncodeAlias ¶
EncodeAlias returns a valid IANA index encoding name from a shorten name or alias.
Example ¶
package main import ( "fmt" "github.com/bengarrett/retrotxtgo/convert" ) func main() { s := convert.EncodeAlias("437") fmt.Println(s) s = convert.EncodeAlias("koi8u") fmt.Println(s) }
Output: IBM437 KOI8-U
func EncodeUTF32 ¶
EncodeUTF32 initializes common UTF-32 encodings.
Example ¶
package main import ( "fmt" "github.com/bengarrett/retrotxtgo/convert" ) func main() { s := convert.EncodeUTF32("utf-32") fmt.Println(s) }
Output: UTF-32LE (Use BOM)
func Encoder ¶
Encoder returns the named character set encoder.
Example ¶
package main import ( "fmt" "github.com/bengarrett/retrotxtgo/convert" ) func main() { e, _ := convert.Encoder("cp437") fmt.Println(e) e, _ = convert.Encoder("1252") fmt.Println(e) }
Output: IBM Code Page 437 Windows 1252
func EqualLB ¶
EqualLB reports whether r matches the single or multi-byte, line break character runes.
func Humanize ¶
Humanize the encoding by using an shorter, less formal name.
Example ¶
package main import ( "fmt" "github.com/bengarrett/retrotxtgo/convert" ) func main() { s := convert.Humanize("cp437") fmt.Println(s) s = convert.Humanize("cp1252") fmt.Println(s) }
Output: IBM437 Windows-1252
func Picture ¶
Picture converts a byte value to a Unicode Control Picture rune.
Example ¶
package main import ( "fmt" "github.com/bengarrett/retrotxtgo/convert" ) func main() { const lf = byte(138) r := convert.Picture(lf) fmt.Printf("%U %s\n", r, string(r)) const esc = byte(155) r = convert.Picture(esc) fmt.Printf("%U %s\n", r, string(r)) }
Output: U+240A ␊ U+241B ␛
Types ¶
type Convert ¶
type Convert struct { Args Flag // Args are the cmd supplied flag arguments. Input struct { Encoding encoding.Encoding // Encoding are the encoding of the input text. Input []byte // Bytes are the input text as bytes. Ignore []rune // Ignore these runes. LineBreak [2]rune // Line break controls used by the text. UseBreaks bool // UseBreaks uses the line break controls as new lines. Table bool // Table flags this text as a code page table. } Output []rune // Output are the transformed UTF-8 runes. }
Convert 8-bit code page text encodings or Unicode byte array text to UTF-8 runes.
func (*Convert) ANSI ¶
ANSI transforms legacy encoded ANSI into modern UTF-8 text. It displays ASCII control codes as characters. It obeys the DOS end of file marker.
func (*Convert) ANSIControls ¶
ANSIControls replaces out all ←[ and ␛[ character matches with functional ANSI escape controls.
func (*Convert) Chars ¶
Chars transforms legacy encoded characters and text control codes into UTF-8 characters. It displays both ASCII and ANSI control codes as characters. It ignores the DOS end of file marker.
func (*Convert) Dump ¶
Dump transforms legacy encoded text or ANSI into modern UTF-8 text. It obeys common ASCII control codes. It ignores the DOS end of file marker.
func (*Convert) FixJISTable ¶
func (c *Convert) FixJISTable()
FixJISTable blanks invalid ShiftJIS characters while printing 8-bit tables.
func (*Convert) LineBreak ¶
func (c *Convert) LineBreak()
LineBreak will try to guess the line break representation as a 2 byte value. A guess of Unix will return [10, 0], Windows [13, 10], otherwise a [0, 0] value is returned.
func (*Convert) RunesControls ¶
func (c *Convert) RunesControls()
RunesControls switches out C0 and C1 ASCII controls with Unicode Control Picture represenations.
func (*Convert) RunesControlsEBCDIC ¶
func (c *Convert) RunesControlsEBCDIC()
RunesControlsEBCDIC switches out EBCDIC controls with Unicode Control Picture represenations.
func (*Convert) RunesDOS ¶
func (c *Convert) RunesDOS()
RunesDOS switches out C0, C1 and other controls with PC/MS-DOS picture glyphs.
func (*Convert) RunesEBCDIC ¶
func (c *Convert) RunesEBCDIC()
RunesEBCDIC switches out EBCDIC IBM mainframe controls with Unicode picture represenations. Where no appropriate picture exists a space placeholder is used.
func (*Convert) RunesKOI8 ¶
func (c *Convert) RunesKOI8()
RunesKOI8 blanks out unused C0, C1 and other controls spaces for Russian sets.
func (*Convert) RunesLatin ¶
func (c *Convert) RunesLatin()
RunesLatin blanks out unused C0, C1 and other controls spaces for ISO Latin sets.
func (*Convert) RunesMacintosh ¶
func (c *Convert) RunesMacintosh()
RunesMacintosh replaces specific Mac OS Roman characters with Unicode picture represenations.
func (*Convert) RunesShiftJIS ¶
func (c *Convert) RunesShiftJIS()
RunesShiftJIS tweaks some Unicode picture represenations for Shift-JIS.
func (*Convert) RunesUTF8 ¶
func (c *Convert) RunesUTF8()
RunesUTF8 tweaks some Unicode picture represenations for UTF-8 Basic Latin.
func (*Convert) RunesWindows ¶
func (c *Convert) RunesWindows()
RunesWindows tweaks some Unicode picture represenations for Windows-125x sets.
func (*Convert) RunesXRows ¶
func (c *Convert) RunesXRows()
RunesXRows blanks out rows 8x and 9x with spaces.
func (*Convert) SkipCode ¶
SkipCode marks control characters to be ignored. It needs to be applied before Convert.transform().
func (*Convert) SkipIgnore ¶
SkipIgnore reports whether the rune should be skipped.