Documentation ¶
Overview ¶
rewrite is a package for modifying the contents of html & other web-related content types. it's primarily used as a tool to maintain the functionality of a web resource within the context of an archive
rewrite is a package for modifying the contents of html & other web-related content types. it's primarily used as a tool to maintain the functionality of a web resource within the context of an archive
Index ¶
- Variables
- func ReplaceAllSubmatchFunc(re *regexp.Regexp, b []byte, f func(s []byte) []byte) []byte
- type Buffer
- type Config
- type CookieRewriter
- type CssRewriter
- type HeaderRewriter
- type HtmlRewriter
- type PrefixRewriter
- type RegexRewriter
- type RewriteRule
- type Rewriter
- type RewriterType
- type UrlRewriter
- type WarcRecordRewriter
Constants ¶
This section is empty.
Variables ¶
var ( CharsetRegex = regexp.MustCompile(`<meta[^>]*?[\s;"\']charset\s*=[\s"\']*([^\s"\'/>]*)`) CssUrlRegex = regexp.MustCompile(`(?m)url\s*\(\s*(?:[\"']|(?:&.{1,4};))*\s*([^)'\"]+)\s*(?:["']|(?:&.{1,4};))*\s*\)`) // CssImportNoUrlRegex = regexp.MustCompile(`@import\\s+(?!url)\\(?\\s*['\"]?(?!url[\\s\\(])([\w.:/\\\\-]+)`) CssImportNoUrlRegex = regexp.MustCompile(``) HttpxMatchString = regexp.MustCompile(`https?:\\?/\\?/[A-Za-z0-9:_@.-]+`) JsHttpx = regexp.MustCompile(``) )
var DefaultHeaderRewriters = map[string]RewriteRule{ "Access-Control-Allow-Origin": PrefixIfUrlRewrite, "Access-Control-Allow-Credentials": PrefixIfUrlRewrite, "Access-Control-Expose-Headers": PrefixIfUrlRewrite, "Access-Control-Max-Age": PrefixIfUrlRewrite, "Access-Control-Allow-Methods": PrefixIfUrlRewrite, "Access-Control-Allow-Headers": PrefixIfUrlRewrite, "Accept-Patch": Keep, "Accept-Ranges": Keep, "Age": Prefix, "Allow": Keep, "Alt-Svc": Prefix, "Cache-Control": Prefix, "Connection": Prefix, "Content-Base": UrlRewrite, "Content-Disposition": Keep, "Content-Encoding": PrefixIfContentRewrite, "Content-Language": Keep, "Content-Length": ContentLength, "Content-Location": UrlRewrite, "Content-Md5": Prefix, "Content-Range": Keep, "Content-Security-Policy": Prefix, "Content-Security-Policy-Report-Only": Prefix, "Content-Type": Keep, "Date": Keep, "Etag": Prefix, "Expires": Prefix, "Last-Modified": Prefix, "Link": Keep, "Location": UrlRewrite, "P3p": Prefix, "Pragma": Prefix, "Proxy-Authenticate": Keep, "Public-Key-Pins": Prefix, "Retry-After": Prefix, "Server": Prefix, "Set-Cookie": Cookie, "Strict-Transport-Security": Prefix, "Trailer": Prefix, "Transfer-Encoding": Prefix, "Tk": Prefix, "Upgrade": Prefix, "Upgrade-Insecure-Requests": Prefix, "Vary": Prefix, "Via": Prefix, "Warning": Prefix, "Www-Authenticate": Keep, "X-Frame-Options": Prefix, "X-Xss-Protection": Prefix, }
var DefaultWarcRecordRewriters = map[string]Rewriter{ "header": NoopRewriter, "cookie": NoopRewriter, "html": NoopRewriter, "html-banner-only": NoopRewriter, "css": NoopRewriter, "js": NoopRewriter, "js-proxy": NoopRewriter, "json": NoopRewriter, "xml": NoopRewriter, "dash": NoopRewriter, "hls": NoopRewriter, "amf": NoopRewriter, }
var ErrNotFinished = errors.New("not finished")
var NoopRewriter = PrefixRewriter{}
var RewriteTypes = map[string]string{
"text/html": "html",
"application/xhtml": "html",
"application/xhtml+xml": "html",
"text/css": "css",
"text/javascript": "js",
"application/javascript": "js",
"application/x-javascript": "js",
"application/json": "json",
"application/x-mpegURL": "hls",
"application/vnd.apple.mpegurl": "hls",
"application/dash+xml": "dash",
"application/x-amf": "amf",
"text/plain": "guess-text",
"": "guess-text",
"application/octet-stream": "guess-bin",
}
Functions ¶
func ReplaceAllSubmatchFunc ¶
Shameless copy pasta from Stack Overflow https://stackoverflow.com/questions/28000832/how-to-access-a-capturing-group-from-regexp-replaceallfunc
Types ¶
type Buffer ¶
Buffer behaves just like a bytes.Buffer, but uses a rewriter to adjust any bytes written with buffer.Write
type Config ¶
type Config struct { DestUrl string Defmod Rewriter Rewriters []RewriterType HeaderPrefix string HeaderRules map[string]RewriteRule }
func DefaultConfig ¶
func DefaultConfig() *Config
type CookieRewriter ¶
type CookieRewriter struct { }
func NewCookieRewriter ¶
func NewCookieRewriter(configs ...func(*Config)) *CookieRewriter
func (*CookieRewriter) Rewrite ¶
func (crw *CookieRewriter) Rewrite(p []byte) []byte
type CssRewriter ¶
type CssRewriter struct {
Rw *UrlRewriter
}
func NewCssRewriter ¶
func NewCssRewriter(urlrw *UrlRewriter) *CssRewriter
func (*CssRewriter) Rewrite ¶
func (rerw *CssRewriter) Rewrite(p []byte) []byte
type HeaderRewriter ¶
type HeaderRewriter struct { Prefix string Rules map[string]RewriteRule Urlrw Rewriter Cookierw Rewriter RewritingContent bool }
func NewHeaderRewriter ¶
func NewHeaderRewriter(configs ...func(cfg *Config)) *HeaderRewriter
func (HeaderRewriter) RewriteHeaders ¶
func (hrw HeaderRewriter) RewriteHeaders(headers http.Header) http.Header
type HtmlRewriter ¶
type HtmlRewriter struct {
// contains filtered or unexported fields
}
func NewHtmlRewriter ¶
func NewHtmlRewriter(urlrw Rewriter, configs ...func(*Config)) *HtmlRewriter
func (*HtmlRewriter) Rewrite ¶
func (hrw *HtmlRewriter) Rewrite(p []byte) []byte
type PrefixRewriter ¶
type PrefixRewriter struct {
Prefix []byte
}
PrefixRewriter adds a prefix if not present
func (PrefixRewriter) Rewrite ¶
func (prw PrefixRewriter) Rewrite(p []byte) []byte
type RegexRewriter ¶
func (*RegexRewriter) Rewrite ¶
func (rerw *RegexRewriter) Rewrite(p []byte) []byte
type RewriteRule ¶
type RewriteRule int
const ( Keep RewriteRule = iota PrefixIfUrlRewrite Prefix UrlRewrite PrefixIfContentRewrite ContentLength Cookie )
type Rewriter ¶
Rewriter takes an input byte slice of and returns an output slice of rewritten bytes, the length of input & output will not necessarily match, implementations *may* alter input bytes
type RewriterType ¶
type RewriterType int
RewriterType enumerates rewriters that operate on different types of content
const ( RwTypeUnknown RewriterType = iota RwTypeUrl RwTypeHeader RwTypeContent RwTypeCookie RwTypeHtml RwTypeJavascript RwTypeCss )
func (RewriterType) String ¶
func (rwt RewriterType) String() string
type UrlRewriter ¶
type UrlRewriter struct {
// contains filtered or unexported fields
}
func NewHostRelativeUrlRewriter ¶
func NewHostRelativeUrlRewriter(from string) *UrlRewriter
func NewRelativeUrlRewriter ¶
func NewRelativeUrlRewriter(from string) *UrlRewriter
NewRelativeUrlRewriter turns urls that match from's hostname into relative urls
func NewUrlRewriter ¶
func NewUrlRewriter(from, to string) *UrlRewriter
func (*UrlRewriter) Rewrite ¶
func (urw *UrlRewriter) Rewrite(p []byte) []byte
func (*UrlRewriter) RewriteString ¶
func (urw *UrlRewriter) RewriteString(p string) string
type WarcRecordRewriter ¶
type WarcRecordRewriter struct { Index cdxj.Writer Urlrw *UrlRewriter Cookierw *CookieRewriter // contains filtered or unexported fields }
func NewWarcRecordRewriter ¶
func NewWarcRecordRewriter(urlstr string, config ...func(o *Config)) *WarcRecordRewriter
NewWarcRecordRewriter allocates a Rewriter, config funcs are optional. NewWarcRecordRewriter(urlstr) will return a default rewriter that rewrites content urls that match the domain of urlstr to relative urls
func (*WarcRecordRewriter) Rewrite ¶
func (wrr *WarcRecordRewriter) Rewrite(in []byte) (out []byte)
Rewrite exists to conform WarcRecordRewriter to the rewriter interface, but doesn't handle malformed data very well. If you're confident that the supplied bytes represents a valid warc record, this'll work just fine, for better error reporting, use RewriteRecord
func (*WarcRecordRewriter) RewriteRecord ¶
RewriteRecord takes a record and rewrites it according to rules defined on the Rewriter.