xcharset

package
v1.4.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 10, 2020 License: MIT Imports: 10 Imported by: 0

README

xcharset

Dependencies
  • golang.org/x/text
  • github.com/saintfish
  • xtesting*
Functions
  • type DetectResult struct {}
  • DetectCharsetBest(bs []byte) (*DetectResult, error)
  • DetectCharsetAll(bs []byte) ([]*DetectResult, error)
  • EncodeString(encode encoding.Encoding, src string) (string, error)
  • DecodeString(encode encoding.Encoding, src string) (string, error)
  • EncodeBytes(encode encoding.Encoding, src []byte) ([]byte, error)
  • DecodeBytes(encode encoding.Encoding, src []byte) ([]byte, error)
  • TrimBomString(str string) string
  • TrimBomBytes(bs []byte) []byte
  • GetEncoding(iana string) (encode encoding.Encoding, existed bool)

Documentation

Index

Constants

View Source
const (
	BOM  = "\xef\xbb\xbf"
	BOM2 = "\xef\xbf\xbe"
)
View Source
const (
	IANA_UTF8    = "UTF-8"    // *
	IANA_UTF16BE = "UTF-16BE" // *
	IANA_UTF16LE = "UTF-16LE" // *
	IANA_UTF32BE = "UTF-32BE" // *
	IANA_UTF32LE = "UTF-32LE" // *

	IANA_ISO88591    = "ISO-8859-1"   // Latin-1, 1: en, da, de, es, fr, it, nl, no, pt, sv; 2: cs, hu, pl, ro
	IANA_ISO88595    = "ISO-8859-5"   // ru
	IANA_ISO88596    = "ISO-8859-6"   // ar
	IANA_ISO88597    = "ISO-8859-7"   // el
	IANA_ISO88598    = "ISO-8859-8"   // he
	IANA_ISO88598I   = "ISO-8859-8-I" // he
	IANA_ISO88599    = "ISO-8859-9"   // tr
	IANA_WINDOWS1251 = "windows-1251" // ar
	IANA_WINDOWS1256 = "windows-1256" // ar
	IANA_KOI8R       = "KOI8-R"       // ru

	IANA_SHIFTJIS  = "Shift_JIS"   // ja
	IANA_GB18030   = "GB-18030"    // zh
	IANA_EUCJP     = "EUC-JP"      // ja
	IANA_EUCKR     = "EUC-KR"      // ko
	IANA_BIG5      = "Big5"        // zh
	IANA_ISO2022JP = "ISO-2022-JP" // jp
	IANA_ISO2022KR = "ISO-2022-KR" // kr
	IANA_ISO2022CN = "ISO-2022-CN" // cn

	IANA_IBM424RTL = "IBM420_rtl" // he, ar
	IANA_IBM424LTR = "IBM420_ltr" // he, ar
)

Variables

This section is empty.

Functions

func DecodeBytes

func DecodeBytes(encode encoding.Encoding, src []byte) ([]byte, error)

DecodeBytes decodes a bytes in a specific encoding.

func DecodeString

func DecodeString(encode encoding.Encoding, src string) (string, error)

DecodeString decodes a string in a specific encoding.

func EncodeBytes

func EncodeBytes(encode encoding.Encoding, src []byte) ([]byte, error)

EncodeBytes encodes a bytes in a specific encoding.

func EncodeString

func EncodeString(encode encoding.Encoding, src string) (string, error)

EncodeString encodes a string in a specific encoding.

func GetEncoding

func GetEncoding(iana string) (encode encoding.Encoding, existed bool)

GetEncoding returns a encoding.Encoding from some IANA.

func TrimBomBytes

func TrimBomBytes(bs []byte) []byte

TrimBomBytes removes BOM from a bytes.

func TrimBomString

func TrimBomString(str string) string

TrimBomString removes BOM from a string.

Types

type DetectResult

type DetectResult struct {
	// IANA name of the detected charset.
	Charset string

	// IANA name of the detected language. It may be empty for some charsets.
	Language string

	// The confidence of the result. Scale from 1 to 100.
	Confidence int
}

DetectResult contains all the information that charset detector gives.

func DetectCharsetAll

func DetectCharsetAll(bs []byte) ([]*DetectResult, error)

DetectCharsetBest returns all Results which have non-zero Confidence. The Results are sorted by Confidence in descending order.

func DetectCharsetBest

func DetectCharsetBest(bs []byte) (*DetectResult, error)

DetectCharsetBest returns the Result with highest Confidence.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL