warc

package
v3.0.0-beta.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 31, 2024 License: Apache-2.0 Imports: 8 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Checksum

func Checksum(wr gowarc.WarcRecord) string

func Date

func Date(wr gowarc.WarcRecord) (time.Time, error)

func FileName

func FileName(wr gowarc.WarcRecord) string

func Hostname

func Hostname(wr gowarc.WarcRecord) string

func IpAddress

func IpAddress(wr gowarc.WarcRecord) string

func MimeType

func MimeType(wr gowarc.WarcRecord) string

func NewIterator

func NewIterator(ctx context.Context, reader RecordIterator, filter *filter.RecordFilter, nth, limit int) <-chan Record

func RecordId

func RecordId(wr gowarc.WarcRecord) string

func StatusCode

func StatusCode(wr gowarc.WarcRecord) int

func Url

func Url(wr gowarc.WarcRecord) string

Types

type Iterator

type Iterator struct {
	// reader to read WARC records from
	WarcFileReader RecordIterator

	// return only the Nth record (0 for all) after applying filter
	Nth int

	// return at most N records (0 for all) after applying filter
	Limit int

	// return only records that match the filter
	Filter *filter.RecordFilter

	// channel to send records to
	Records chan<- Record
}

Itetaror is a WARC record iterator

type Metadata

type Metadata struct {
	Url        string    `json:"url,omitempty"`
	Date       time.Time `json:"date,omitempty"`
	IpAddress  string    `json:"ipAddress,omitempty"`
	FileName   string    `json:"filename,omitempty"`
	Hostname   string    `json:"hostname,omitempty"`
	RecordId   string    `json:"recordId,omitempty"`
	Checksum   string    `json:"checksum,omitempty"`
	MimeType   string    `json:"mimeType,omitempty"`
	StatusCode int       `json:"statusCode,omitempty"`
	Size       int64     `json:"size,omitempty"`
	Type       string    `json:"type,omitempty"`
	Offset     int64     `json:"offset,omitempty"`
}

type Record

type Record struct {
	Offset     int64
	Size       int64
	Err        error
	WarcRecord gowarc.WarcRecord
	Validation *gowarc.Validation
}

Record represents a WARC record with additional metadata

func (Record) Close

func (r Record) Close() error

type RecordError

type RecordError struct {
	Record Record
	Err    error
}

func Error

func Error(record Record, err error) RecordError

func (RecordError) Error

func (e RecordError) Error() string

type RecordIterator

type RecordIterator interface {
	Next() (gowarc.WarcRecord, int64, *gowarc.Validation, error)
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL