Documentation ¶
Index ¶
- Constants
- Variables
- func CharDescription(ch rune) string
- func Escape(input string) string
- func IsECMAWordChar(r rune) bool
- func IsWordChar(r rune) bool
- func Unescape(input string) (string, error)
- type AnchorLoc
- type BmPrefix
- type CharSet
- func (c CharSet) CharIn(ch rune) bool
- func (c CharSet) Copy() CharSet
- func (c CharSet) HasSubtraction() bool
- func (c CharSet) IsEmpty() bool
- func (c CharSet) IsMergeable() bool
- func (c CharSet) IsNegated() bool
- func (c CharSet) IsSingleton() bool
- func (c CharSet) IsSingletonInverse() bool
- func (c CharSet) SingletonChar() rune
- func (c CharSet) String() string
- type Code
- type Error
- type ErrorCode
- type InstOp
- type Prefix
- type RegexOptions
- type RegexTree
- type ReplacerData
Constants ¶
const ( LowercaseSet = 0 // Set to arg. LowercaseAdd = 1 // Add arg. LowercaseBor = 2 // Bitwise or with 1. LowercaseBad = 3 // Bitwise and with 1 and add original. )
const ( Onerep InstOp = 0 // lef,back char,min,max a {n} Notonerep = 1 // lef,back char,min,max .{n} Setrep = 2 // lef,back set,min,max [\d]{n} Oneloop = 3 // lef,back char,min,max a {,n} Notoneloop = 4 // lef,back char,min,max .{,n} Setloop = 5 // lef,back set,min,max [\d]{,n} Onelazy = 6 // lef,back char,min,max a {,n}? Notonelazy = 7 // lef,back char,min,max .{,n}? Setlazy = 8 // lef,back set,min,max [\d]{,n}? One = 9 // lef char a Notone = 10 // lef char [^a] Set = 11 // lef set [a-z\s] \w \s \d Multi = 12 // lef string abcd Ref = 13 // lef group \# Bol = 14 // ^ Eol = 15 // $ Boundary = 16 // \b Nonboundary = 17 // \B Beginning = 18 // \A Start = 19 // \G EndZ = 20 // \Z End = 21 // \Z Nothing = 22 // Reject! Lazybranch = 23 // back jump straight first Branchmark = 24 // back jump branch first for loop Lazybranchmark = 25 // back jump straight first for loop Nullcount = 26 // back val set counter, null mark Setcount = 27 // back val set counter, make mark Branchcount = 28 // back jump,limit branch++ if zero<=c<limit Lazybranchcount = 29 // back jump,limit same, but straight first Nullmark = 30 // back save position Setmark = 31 // back save position Capturemark = 32 // back group define group Getmark = 33 // back recall position Setjump = 34 // back save backtrack state Backjump = 35 // zap back to saved state Forejump = 36 // zap backtracking state Testref = 37 // backtrack if ref undefined Goto = 38 // jump just go Prune = 39 // prune it baby Stop = 40 // done! ECMABoundary = 41 // \b NonECMABoundary = 42 // \B Mask = 63 // Mask to get unmodified ordinary operator Rtl = 64 // bit to indicate that we're reverse scanning. Back = 128 // bit to indicate that we're backtracking. Back2 = 256 // bit to indicate that we're backtracking on a second branch. Ci = 512 // bit to indicate that we're case-insensitive. )
const ( IgnoreCase RegexOptions = 0x0001 // "i" Multiline = 0x0002 // "m" ExplicitCapture = 0x0004 // "n" Compiled = 0x0008 // "c" Singleline = 0x0010 // "s" IgnorePatternWhitespace = 0x0020 // "x" RightToLeft = 0x0040 // "r" Debug = 0x0080 // "d" ECMAScript = 0x0100 // "e" RE2 = 0x0200 // RE2 compat mode Unicode = 0x0400 // "u" )
const ( // internal issue ErrInternalError ErrorCode = "regexp/syntax: internal error" // Parser errors ErrUnterminatedComment = "unterminated comment" ErrInvalidCharRange = "invalid character class range" ErrInvalidRepeatSize = "invalid repeat count" ErrInvalidUTF8 = "invalid UTF-8" ErrCaptureGroupOutOfRange = "capture group number out of range" ErrUnexpectedParen = "unexpected )" ErrMissingParen = "missing closing )" ErrMissingBrace = "missing closing }" ErrInvalidRepeatOp = "invalid nested repetition operator" ErrMissingRepeatArgument = "missing argument to repetition operator" ErrConditionalExpression = "illegal conditional (?(...)) expression" ErrTooManyAlternates = "too many | in (?()|)" ErrUnrecognizedGrouping = "unrecognized grouping construct: (%v" ErrInvalidGroupName = "invalid group name: group names must begin with a word character and have a matching terminator" ErrCapNumNotZero = "capture number cannot be zero" ErrUndefinedBackRef = "reference to undefined group number %v" ErrUndefinedNameRef = "reference to undefined group name %v" ErrAlternationCantCapture = "alternation conditions do not capture and cannot be named" ErrAlternationCantHaveComment = "alternation conditions cannot be comments" ErrMalformedReference = "(?(%v) ) malformed" ErrUndefinedReference = "(?(%v) ) reference to undefined group" ErrIllegalEndEscape = "illegal \\ at end of pattern" ErrMalformedSlashP = "malformed \\p{X} character escape" ErrIncompleteSlashP = "incomplete \\p{X} character escape" ErrUnknownSlashP = "unknown unicode category, script, or property '%v'" ErrUnrecognizedEscape = "unrecognized escape sequence \\%v" ErrMissingControl = "missing control character" ErrUnrecognizedControl = "unrecognized control character" ErrTooFewHex = "insufficient hexadecimal digits" ErrInvalidHex = "hex values may not be larger than 0x10FFFF" ErrMalformedNameRef = "malformed \\k<...> named back reference" ErrBadClassInCharRange = "cannot include class \\%v in character range" ErrUnterminatedBracket = "unterminated [] set" ErrSubtractionMustBeLast = "a subtraction must be the last element in a character class" ErrReversedCharRange = "[%c-%c] range in reverse order" )
const ( Q byte = 5 // quantifier S = 4 // ordinary stopper Z = 3 // ScanBlank stopper X = 2 // whitespace E = 1 // should be escaped )
const ( AnchorBeginning AnchorLoc = 0x0001 AnchorBol = 0x0002 AnchorStart = 0x0004 AnchorEol = 0x0008 AnchorEndZ = 0x0010 AnchorEnd = 0x0020 AnchorBoundary = 0x0040 AnchorECMABoundary = 0x0080 )
where the regex can be pegged
const (
//MaxPrefixSize is the largest number of runes we'll use for a BoyerMoyer prefix
MaxPrefixSize = 50
)
Variables ¶
var ( AnyClass = getCharSetFromOldString([]rune{0}, false) ECMAAnyClass = getCharSetFromOldString([]rune{0, 0x000a, 0x000b, 0x000d, 0x000e}, false) NoneClass = getCharSetFromOldString(nil, false) ECMAWordClass = getCharSetFromOldString(ecmaWord, false) NotECMAWordClass = getCharSetFromOldString(ecmaWord, true) ECMASpaceClass = getCharSetFromOldString(ecmaSpace, false) NotECMASpaceClass = getCharSetFromOldString(ecmaSpace, true) ECMADigitClass = getCharSetFromOldString(ecmaDigit, false) NotECMADigitClass = getCharSetFromOldString(ecmaDigit, true) WordClass = getCharSetFromCategoryString(false, false, wordCategoryText) NotWordClass = getCharSetFromCategoryString(true, false, wordCategoryText) SpaceClass = getCharSetFromCategoryString(false, false, spaceCategoryText) NotSpaceClass = getCharSetFromCategoryString(true, false, spaceCategoryText) DigitClass = getCharSetFromCategoryString(false, false, "Nd") NotDigitClass = getCharSetFromCategoryString(false, true, "Nd") RE2SpaceClass = getCharSetFromOldString(re2Space, false) NotRE2SpaceClass = getCharSetFromOldString(re2Space, true) )
var ErrReplacementError = errors.New("Replacement pattern error.")
ErrReplacementError is a general error during parsing the replacement text
Functions ¶
func CharDescription ¶
CharDescription Produces a human-readable description for a single character.
func IsECMAWordChar ¶
func IsWordChar ¶
According to UTS#18 Unicode Regular Expressions (http://www.unicode.org/reports/tr18/) RL 1.4 Simple Word Boundaries The class of <word_character> includes all Alphabetic values from the Unicode character database, from UnicodeData.txt [UData], plus the U+200C ZERO WIDTH NON-JOINER and U+200D ZERO WIDTH JOINER.
Types ¶
type BmPrefix ¶
type BmPrefix struct {
// contains filtered or unexported fields
}
BmPrefix precomputes the Boyer-Moore tables for fast string scanning. These tables allow you to scan for the first occurrence of a string within a large body of text without examining every character. The performance of the heuristic depends on the actual string and the text being searched, but usually, the longer the string that is being searched for, the fewer characters need to be examined.
func (*BmPrefix) IsMatch ¶
When a regex is anchored, we can do a quick IsMatch test instead of a Scan
func (*BmPrefix) Scan ¶
Scan uses the Boyer-Moore algorithm to find the first occurrence of the specified string within text, beginning at index, and constrained within beglimit and endlimit.
The direction and case-sensitivity of the match is determined by the arguments to the RegexBoyerMoore constructor.
type CharSet ¶
type CharSet struct {
// contains filtered or unexported fields
}
CharSet combines start-end rune ranges and unicode categories representing a set of characters
func (CharSet) CharIn ¶
CharIn returns true if the rune is in our character set (either ranges or categories). It handles negations and subtracted sub-charsets.
func (CharSet) HasSubtraction ¶
func (CharSet) IsMergeable ¶
func (CharSet) IsSingleton ¶
func (CharSet) IsSingletonInverse ¶
func (CharSet) SingletonChar ¶
SingletonChar will return the char from the first range without validation. It assumes you have checked for IsSingleton or IsSingletonInverse and will panic given bad input
type Code ¶
type Code struct { Codes []int // the code Strings [][]rune // string table Sets []*CharSet //character set table TrackCount int // how many instructions use backtracking Caps map[int]int // mapping of user group numbers -> impl group slots Capsize int // number of impl group slots FcPrefix *Prefix // the set of candidate first characters (may be null) BmPrefix *BmPrefix // the fixed prefix string as a Boyer-Moore machine (may be null) Anchors AnchorLoc // the set of zero-length start anchors (RegexFCD.Bol, etc) RightToLeft bool // true if right to left }
func (*Code) OpcodeDescription ¶
OpcodeDescription is a humman readable string of the specific offset
type Error ¶
An Error describes a failure to parse a regular expression and gives the offending expression.
type ErrorCode ¶
type ErrorCode string
An ErrorCode describes a failure to parse a regular expression.
type RegexOptions ¶
type RegexOptions int32
type RegexTree ¶
type RegexTree struct { Capnames map[string]int Caplist []string // contains filtered or unexported fields }
type ReplacerData ¶
func NewReplacerData ¶
func NewReplacerData(rep string, caps map[int]int, capsize int, capnames map[string]int, op RegexOptions) (*ReplacerData, error)
NewReplacerData will populate a reusable replacer data struct based on the given replacement string and the capture group data from a regexp