Documentation ¶
Index ¶
- Constants
- Variables
- func CollectCrawlLog(crawlLog *logV1.CrawlLog)
- func CollectPageLog(pageLog *logV1.PageLog)
- type CrawlLog
- type ErrorUDT
- type LogServer
- func (l *LogServer) Close()
- func (l *LogServer) ListCrawlLogs(req *logV1.CrawlLogListRequest, stream logV1.Log_ListCrawlLogsServer) error
- func (l *LogServer) ListPageLogs(req *logV1.PageLogListRequest, stream logV1.Log_ListPageLogsServer) error
- func (l *LogServer) WriteCrawlLog(stream logV1.Log_WriteCrawlLogServer) error
- func (l *LogServer) WritePageLog(stream logV1.Log_WritePageLogServer) error
- type PageLog
- type Pool
- type Resource
Constants ¶
View Source
const ( TableCrawlLog = "crawl_log" TablePageLog = "page_log" TableResource = "resource" )
View Source
const Namespace = "veidemann"
Variables ¶
View Source
var ( UriRequests = promauto.NewCounter(prometheus.CounterOpts{ Namespace: Namespace, Subsystem: "uri", Name: "requests_total", Help: "The total number of uris requested", }) UriRequestsFailed = promauto.NewCounter(prometheus.CounterOpts{ Namespace: Namespace, Subsystem: "uri", Name: "requests_failed_total", Help: "The total number of failed uri requests", }) UriStatusCode = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: Namespace, Subsystem: "uri", Name: "statuscode_total", Help: "The total number of responses for each status code", }, []string{"code"}) UriRecordType = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: Namespace, Subsystem: "uri", Name: "record_type_total", Help: "The total number of responses for each record type", }, []string{"type"}) UriMimeType = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: Namespace, Subsystem: "uri", Name: "mime_type_total", Help: "The total number of responses for each mime type", }, []string{"mime"}) UriFetchTimeSeconds = promauto.NewSummary(prometheus.SummaryOpts{ Namespace: Namespace, Subsystem: "uri", Name: "fetch_time_seconds", Help: "The time used for fetching the uri in seconds", }) UriSizeBytes = promauto.NewSummary(prometheus.SummaryOpts{ Namespace: Namespace, Subsystem: "uri", Name: "size_bytes", Help: "Fetched content size in bytes", }) PageRequests = promauto.NewCounter(prometheus.CounterOpts{ Namespace: Namespace, Subsystem: "page", Name: "requests_total", Help: "The total number of pages requested"}) PageOutlinks = promauto.NewSummary(prometheus.SummaryOpts{ Namespace: Namespace, Subsystem: "page", Name: "outlinks_total", Help: "Outlinks per page", }) PageResources = promauto.NewSummary(prometheus.SummaryOpts{ Namespace: Namespace, Subsystem: "page", Name: "resources_total", Help: "Resources loaded per page", }) PageResourcesCacheHit = promauto.NewSummary(prometheus.SummaryOpts{ Namespace: Namespace, Subsystem: "page", Name: "resources_cache_hit_total", Help: "Resources loaded from cache per page", }) PageResourcesCacheMiss = promauto.NewSummary(prometheus.SummaryOpts{ Namespace: Namespace, Subsystem: "page", Name: "resources_cache_miss_total", Help: "Resources loaded from origin server per page", }) PageLinks = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: Namespace, Subsystem: "page", Name: "links_total", Help: "Total number of outlinks and resources", }, []string{"type"}) )
Functions ¶
func CollectCrawlLog ¶
func CollectPageLog ¶
Types ¶
type CrawlLog ¶
type CrawlLog struct { WarcId string `cql:"warc_id"` TimeStamp time.Time `cql:"time_stamp"` StatusCode int32 `cql:"status_code"` Size int64 `cql:"size"` RequestedUri string `cql:"requested_uri"` ResponseUri string `cql:"response_uri"` DiscoveryPath string `cql:"discovery_path"` Referrer string `cql:"referrer"` ContentType string `cql:"content_type"` FetchTimeStamp time.Time `cql:"fetch_time_stamp"` FetchTimeMs int64 `cql:"fetch_time_ms"` BlockDigest string `cql:"block_digest"` PayloadDigest string `cql:"payload_digest"` StorageRef string `cql:"storage_ref"` RecordType string `cql:"record_type"` WarcRefersTo string `cql:"warc_refers_to"` IpAddress string `cql:"ip_address"` ExecutionId string `cql:"execution_id"` Retries int32 `cql:"retries"` Error *ErrorUDT `cql:"error"` JobExecutionId string `cql:"job_execution_id"` CollectionFinalName string `cql:"collection_final_name"` Method string `cql:"method"` }
type LogServer ¶
type LogServer struct { logV1.UnimplementedLogServer // contains filtered or unexported fields }
func New ¶
func New(session gocqlx.Session, readPoolSize int, writePoolSize int, readConsistency gocql.Consistency) *LogServer
func (*LogServer) ListCrawlLogs ¶
func (l *LogServer) ListCrawlLogs(req *logV1.CrawlLogListRequest, stream logV1.Log_ListCrawlLogsServer) error
func (*LogServer) ListPageLogs ¶
func (l *LogServer) ListPageLogs(req *logV1.PageLogListRequest, stream logV1.Log_ListPageLogsServer) error
func (*LogServer) WriteCrawlLog ¶
func (l *LogServer) WriteCrawlLog(stream logV1.Log_WriteCrawlLogServer) error
func (*LogServer) WritePageLog ¶
func (l *LogServer) WritePageLog(stream logV1.Log_WritePageLogServer) error
type PageLog ¶
type PageLog struct { WarcId string `cql:"warc_id"` Uri string `cql:"uri"` ExecutionId string `cql:"execution_id"` Referrer string `cql:"referrer"` JobExecutionId string `cql:"job_execution_id"` CollectionFinalName string `cql:"collection_final_name"` Method string `cql:"method"` Outlink []string `cql:"outlink"` }
type Pool ¶
type Pool struct {
// contains filtered or unexported fields
}
Pool holds instances of *gocqlx.Queryx.
type Resource ¶ added in v0.3.0
type Resource struct { Uri string `cql:"uri" json:"uri,omitempty"` FromCache bool `cql:"from_cache" json:"from_cache,omitempty"` Renderable bool `cql:"renderable" json:"renderable,omitempty"` ResourceType string `cql:"resource_type" json:"resource_type,omitempty"` ContentType string `cql:"content_type" json:"content_type,omitempty"` StatusCode int32 `cql:"status_code" json:"status_code,omitempty"` DiscoveryPath string `cql:"discovery_path" json:"discovery_path,omitempty"` WarcId string `cql:"warc_id" json:"warc_id,omitempty"` PageId string `cql:"page_id" json:"page_id,omitempty"` Referrer string `cql:"referrer" json:"referrer,omitempty"` Error *ErrorUDT `cql:"error" json:"error,omitempty"` Method string `cql:"method" json:"method,omitempty"` }
Click to show internal directories.
Click to hide internal directories.