convert

package
v1.0.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 3, 2025 License: LGPL-3.0 Imports: 23 Imported by: 0

Documentation

Overview

Package convert extends the interface for the character encodings that transform text to and from Unicode UTF-8.

Index

Examples

Constants

View Source
const (
	// NUL Null control code.
	NUL = iota
	// SOH Start of heading.
	SOH
	// STX Start of text.
	STX
	// ETX End of text.
	ETX
	// EOT End of transmission.
	EOT
	// ENQ Enquiry.
	ENQ
	// ACK Acknowledge.
	ACK
	// BEL Bell or alert.
	BEL
	// BS Backspace.
	BS
	// HT Horizontal tabulation.
	HT
	// LF Line feed.
	LF
	// VT Vertical tabulation.
	VT
	// FF Form feed.
	FF
	// CR Carriage return.
	CR
	// SO Shift out.
	SO
	// SI Shift in.
	SI
	// DLE Data Link Escape.
	DLE
	// DC1 Device control one.
	DC1
	// DC2 Device control two.
	DC2
	// DC3 Device control three.
	DC3
	// DC4 Device control four.
	DC4
	// NAK Negative acknowledge.
	NAK
	// SYN Synchronous idle.
	SYN
	// ETB End of transmission block.
	ETB
	// CAN Cancel.
	CAN
	// EM End of medium.
	EM
	// SUB Substitute.
	SUB
	// ESC Escape.
	ESC
	// FS File separator.
	FS
	// GS Group separator.
	GS
	// RS Record separator.
	RS
	// US Unit separator.
	US
	// SP Space.
	SP
)

The common ASCII and Unicode control decimal values.

View Source
const (
	// LeftSquareBracket [.
	LeftSquareBracket = 91
	// VerticalBar |.
	VerticalBar = 124
	// DEL Delete.
	DEL = 127
	// Dash Hyphen -.
	Dash = 150
	// Nbsp Non-breaking space.
	Nbsp = 160
	// InvertedExclamation ¡.
	InvertedExclamation = 161
	// Cent ¢.
	Cent = 162
	// BrokenBar ¦.
	BrokenBar = 166
	// Negation ¬.
	Negation = 172
	// PlusMinus ±.
	PlusMinus = 177
	// LightVertical light vertical │.
	LightVertical = 179
	// SquareRoot Square root √.
	SquareRoot = 251
	// NBSP Non-breaking space.
	NBSP = 255
	// Delta Δ.
	Delta = 916
	// LeftwardsArrow ←.
	LeftwardsArrow = 8592
	// SquareRootU Unicode square root √.
	SquareRootU = 8730
	// House ⌂.
	House = 8962
	// IntegralExtension ⎮.
	IntegralExtension = 9134
	// SymbolNUL ␀.
	SymbolNUL = 9216
	// SymbolESC ␛.
	SymbolESC = 9243
	// SymbolDEL ␡.
	SymbolDEL = 9249
	// LightVerticalU Box drawing light vertical │.
	LightVerticalU = 9474
	// CheckMark ✓.
	CheckMark = 10003
	// Replacement character �.
	Replacement = 65533
	// Open Box ␣.
	OpenBox = 9251
)

Special ASCII and Unicode character codes and symbols.

View Source
const (
	Row8 = 128 // the first cell after the end of a 7-bit row.

)

Variables

View Source
var (
	ErrANSI   = errors.New("ansi controls must be chained to c.swap")
	ErrBytes  = errors.New("cannot transform an empty byte slice")
	ErrEncode = errors.New("no input encoding provided")
	ErrName   = errors.New("unknown or unsupported code page name or alias")
	ErrOutput = errors.New("nothing to output")
	ErrWidth  = errors.New("cannot find the number columns from using line break")
	ErrWrap   = errors.New("wrap width must be chained to c.swap")
)

Functions

func EncodeAlias

func EncodeAlias(name string) string

EncodeAlias returns a valid IANA index encoding name from a shorten name or alias.

Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	s := convert.EncodeAlias("437")
	fmt.Println(s)
	s = convert.EncodeAlias("koi8u")
	fmt.Println(s)
}
Output:

IBM437
KOI8-U

func EncodeUTF32

func EncodeUTF32(name string) encoding.Encoding

EncodeUTF32 initializes common UTF-32 encodings.

Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	s := convert.EncodeUTF32("utf-32")
	fmt.Println(s)
}
Output:

UTF-32LE (Use BOM)

func Encoder

func Encoder(name string) (encoding.Encoding, error)

Encoder returns the named character set encoder.

Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	e, _ := convert.Encoder("cp437")
	fmt.Println(e)
	e, _ = convert.Encoder("1252")
	fmt.Println(e)
}
Output:

IBM Code Page 437
Windows 1252

func EqualLB

func EqualLB(r, nl [2]rune) bool

EqualLB reports whether r matches the single or multi-byte, line break character runes.

func Humanize

func Humanize(name string) string

Humanize the encoding by using an shorter, less formal name.

Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	s := convert.Humanize("cp437")
	fmt.Println(s)
	s = convert.Humanize("cp1252")
	fmt.Println(s)
}
Output:

IBM437
Windows-1252

func Picture

func Picture(b byte) rune

Picture converts a byte value to a Unicode Control Picture rune.

Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	const lf = byte(138)
	r := convert.Picture(lf)
	fmt.Printf("%U %s\n", r, string(r))
	const esc = byte(155)
	r = convert.Picture(esc)
	fmt.Printf("%U %s\n", r, string(r))
}
Output:

U+240A ␊
U+241B ␛

func Shorten

func Shorten(name string) string

Shorten the name to a custom name, a common name or an alias.

Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	s := convert.Shorten("cp437")
	fmt.Println(s)
	s = convert.Shorten("IBM-437")
	fmt.Println(s)
}
Output:

437
437

func Swap

func Swap(code rune) rune
Example
package main

import (
	"fmt"

	"github.com/bengarrett/retrotxtgo/convert"
)

func main() {
	fmt.Println(string(convert.Swap(convert.DEL)))
	fmt.Println(string(convert.Swap(convert.SquareRoot)))
}
Output:

Δ
✓

Types

type Convert

type Convert struct {
	Args  Flag // Args are the cmd supplied flag arguments.
	Input struct {
		Encoding  encoding.Encoding // Encoding are the encoding of the input text.
		Input     []byte            // Bytes are the input text as bytes.
		Ignore    []rune            // Ignore these runes.
		LineBreak [2]rune           // Line break controls used by the text.
		UseBreaks bool              // UseBreaks uses the line break controls as new lines.
		Table     bool              // Table flags this text as a code page table.
	}
	Output []rune // Output are the transformed UTF-8 runes.
}

Convert 8-bit code page text encodings or Unicode byte array text to UTF-8 runes.

func (*Convert) ANSI

func (c *Convert) ANSI(b ...byte) ([]rune, error)

ANSI transforms legacy encoded ANSI into modern UTF-8 text. It displays ASCII control codes as characters. It obeys the DOS end of file marker.

func (*Convert) ANSIControls

func (c *Convert) ANSIControls() *Convert

ANSIControls replaces out all ←[ and ␛[ character matches with functional ANSI escape controls.

func (*Convert) Chars

func (c *Convert) Chars(b ...byte) ([]rune, error)

Chars transforms legacy encoded characters and text control codes into UTF-8 characters. It displays both ASCII and ANSI control codes as characters. It ignores the DOS end of file marker.

func (*Convert) Dump

func (c *Convert) Dump(b ...byte) ([]rune, error)

Dump transforms legacy encoded text or ANSI into modern UTF-8 text. It obeys common ASCII control codes. It ignores the DOS end of file marker.

func (*Convert) FixJISTable

func (c *Convert) FixJISTable()

FixJISTable blanks invalid ShiftJIS characters while printing 8-bit tables.

func (*Convert) LineBreak

func (c *Convert) LineBreak()

LineBreak will try to guess the line break representation as a 2 byte value. A guess of Unix will return [10, 0], Windows [13, 10], otherwise a [0, 0] value is returned.

func (*Convert) RunesControls

func (c *Convert) RunesControls()

RunesControls switches out C0 and C1 ASCII controls with Unicode Control Picture represenations.

func (*Convert) RunesControlsEBCDIC

func (c *Convert) RunesControlsEBCDIC()

RunesControlsEBCDIC switches out EBCDIC controls with Unicode Control Picture represenations.

func (*Convert) RunesDOS

func (c *Convert) RunesDOS()

RunesDOS switches out C0, C1 and other controls with PC/MS-DOS picture glyphs.

func (*Convert) RunesEBCDIC

func (c *Convert) RunesEBCDIC()

RunesEBCDIC switches out EBCDIC IBM mainframe controls with Unicode picture represenations. Where no appropriate picture exists a space placeholder is used.

func (*Convert) RunesKOI8

func (c *Convert) RunesKOI8()

RunesKOI8 blanks out unused C0, C1 and other controls spaces for Russian sets.

func (*Convert) RunesLatin

func (c *Convert) RunesLatin()

RunesLatin blanks out unused C0, C1 and other controls spaces for ISO Latin sets.

func (*Convert) RunesMacintosh

func (c *Convert) RunesMacintosh()

RunesMacintosh replaces specific Mac OS Roman characters with Unicode picture represenations.

func (*Convert) RunesShiftJIS

func (c *Convert) RunesShiftJIS()

RunesShiftJIS tweaks some Unicode picture represenations for Shift-JIS.

func (*Convert) RunesUTF8

func (c *Convert) RunesUTF8()

RunesUTF8 tweaks some Unicode picture represenations for UTF-8 Basic Latin.

func (*Convert) RunesWindows

func (c *Convert) RunesWindows()

RunesWindows tweaks some Unicode picture represenations for Windows-125x sets.

func (*Convert) RunesXRows

func (c *Convert) RunesXRows()

RunesXRows blanks out rows 8x and 9x with spaces.

func (*Convert) SkipCode

func (c *Convert) SkipCode() *Convert

SkipCode marks control characters to be ignored. It needs to be applied before Convert.transform().

func (*Convert) SkipIgnore

func (c *Convert) SkipIgnore(i int) bool

SkipIgnore reports whether the rune should be skipped.

func (*Convert) Swap

func (c *Convert) Swap() (*Convert, error)

Swap transforms character map and control codes into UTF-8 unicode runes.

func (*Convert) Swaps

func (c *Convert) Swaps() (*Convert, error)

func (*Convert) Text

func (c *Convert) Text(b ...byte) ([]rune, error)

Text transforms legacy encoded text or ANSI into modern UTF-8 text. It obeys common ASCII control codes. It obeys the DOS end of file marker.

func (*Convert) Transform

func (c *Convert) Transform() error

Transform byte data from named character map encoded text into UTF-8.

type Flag

type Flag struct {
	Controls  []string // Always use these control codes.
	SwapChars []string // Swap out these characters with common alternatives.
	MaxWidth  int      // Maximum text width per-line.
}

Flag are the user supplied values.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL