crawler

package
v0.0.0-...-0486d4a Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 30, 2020 License: MIT Imports: 17 Imported by: 0

Documentation

Index

Constants

View Source
const IndexPageTemplate = `` /* 325-byte string literal not displayed */
View Source
const LandingPageTemplate = `` /* 448-byte string literal not displayed */

Variables

This section is empty.

Functions

This section is empty.

Types

type AssetFilter

type AssetFilter struct {
	CSSPaths        []string
	JavaScriptPaths []string
}

func (AssetFilter) Process

func (f AssetFilter) Process(entry blog.Entry, root *html.Node) error

type CategoryFilter

type CategoryFilter struct{}

CategoryFilter presents a filter to add categories into the <head> tag. The categories are provided by <meta> tags as the following:

<meta property="hatena:category" content="Games" />
<meta property="hatena:category" content="Hobby" />

func (CategoryFilter) Process

func (f CategoryFilter) Process(entry blog.Entry, root *html.Node) error

type CodeFilter

type CodeFilter struct{}

CodeFilter presents a filter to make styled codes to plain text in the <pre> tags.

func (CodeFilter) Process

func (f CodeFilter) Process(entry blog.Entry, root *html.Node) error

type Crawler

type Crawler struct {
	BlogClient *blog.Client
	DataStore  *DataStore
	Path       *Path
	CSSPath    string

	HatenaID string
	BlogID   string

	Filters []Filter
}

func (Crawler) RenderArchiveIndex

func (c Crawler) RenderArchiveIndex(w io.Writer, year int, entries []blog.Entry) error

func (Crawler) RenderCategoryIndex

func (c Crawler) RenderCategoryIndex(w io.Writer, category string, entries []blog.Entry) error

func (Crawler) RenderLanding

func (c Crawler) RenderLanding(w io.Writer, title string, categories []string, years []int) error

func (Crawler) Start

func (c Crawler) Start(ctx context.Context) error

type DataStore

type DataStore struct {
	Directory string
}

func (DataStore) Writer

func (d DataStore) Writer(path string) (io.WriteCloser, error)

type DateTimeFilter

type DateTimeFilter struct{}

DateTimeFilter presents a filter to add timestamp on editted, published, and updated the entry as a meta tag.

func (DateTimeFilter) Process

func (f DateTimeFilter) Process(entry blog.Entry, root *html.Node) error

type Downloader

type Downloader struct {
	HTTPClient *http.Client
}

func (*Downloader) Download

func (c *Downloader) Download(ctx context.Context, url string) (io.ReadCloser, error)

type DraftFilter

type DraftFilter struct{}

DraftFilter presents a filter to add draft information as a meta tag.

func (DraftFilter) Process

func (f DraftFilter) Process(entry blog.Entry, root *html.Node) error

type EncodingFilter

type EncodingFilter struct{}

EncodingFilter presents a filter to provide a charset attribute (UTF-8) by the <meta> tag.

func (EncodingFilter) Process

func (f EncodingFilter) Process(entry blog.Entry, root *html.Node) error

type Filter

type Filter interface {
	Process(entry blog.Entry, root *html.Node) error
}

type HatenaKeywordFilter

type HatenaKeywordFilter struct{}

HatenaKeywordFilter presents a filter to remove links of hatena keyword from HTML from the entry.

func (HatenaKeywordFilter) Process

func (f HatenaKeywordFilter) Process(entry blog.Entry, root *html.Node) error

type ImagePathFilter

type ImagePathFilter struct{}

ImagePathFilter presents a filter to fix image's url as a relative path as a base name.

It converts a src attribute in the <img> tag:

<img src="https://my-cdn.example.com/2020/03/01/foobar.png" />

to:

<img src="foobar.png" />

func (ImagePathFilter) Process

func (f ImagePathFilter) Process(entry blog.Entry, root *html.Node) error

type ImageURLExtractor

type ImageURLExtractor struct{}

func (*ImageURLExtractor) ExtractImageURLs

func (e *ImageURLExtractor) ExtractImageURLs(root *html.Node) []string

type IndexPageValue

type IndexPageValue struct {
	Title   string
	CSSPath string
	Entries []struct {
		Title string
		Link  string
	}
}

type LandingValue

type LandingValue struct {
	Title      string
	CSSPath    string
	Categories []struct {
		Name string
		Path string
	}
	Archives []struct {
		Name string
		Path string
	}
}

type LinkFilter

type LinkFilter struct{}

func (LinkFilter) Process

func (f LinkFilter) Process(entry blog.Entry, root *html.Node) error

type Path

type Path struct {
	URLPrefix string
}

func (Path) ArchiveFilePath

func (p Path) ArchiveFilePath(year int) string

func (Path) ArchiveUrlPath

func (p Path) ArchiveUrlPath(year int) string

func (Path) CategoryFilePath

func (p Path) CategoryFilePath(name string) string

func (Path) CategoryUrlPath

func (p Path) CategoryUrlPath(name string) string

func (Path) EntryFilePath

func (p Path) EntryFilePath(entry blog.Entry) string

func (Path) EntryURLPath

func (p Path) EntryURLPath(entry blog.Entry) string

func (Path) ImageFilePath

func (p Path) ImageFilePath(entry blog.Entry, name string) string

func (Path) ImageURLPath

func (p Path) ImageURLPath(entry blog.Entry, name string) string

func (Path) LandingFilePath

func (p Path) LandingFilePath() string

func (Path) LandingURLPath

func (p Path) LandingURLPath() string

type TitleFilter

type TitleFilter struct{}

TitleFilter presents a filter to add <title> into <head> and <h1> tag to the body from the entry..

func (TitleFilter) Process

func (f TitleFilter) Process(entry blog.Entry, root *html.Node) error

type TransformFunc

type TransformFunc func(node *html.Node) (*html.Node, error)

type Transformer

type Transformer struct {
	Func TransformFunc
}

func (Transformer) WalkTransform

func (w Transformer) WalkTransform(root *html.Node) error

type WalkFunc

type WalkFunc func(node *html.Node) error

type Walker

type Walker struct {
	Func WalkFunc
}

func (Walker) Walk

func (w Walker) Walk(root *html.Node) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL