services

package
v0.0.0-...-1ec42cb Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 26, 2024 License: MIT Imports: 46 Imported by: 0

Documentation

Overview

The report_manager takes care of running the issue reporters against the crawled pages. There are two different types of issue reporters. On one hand there's the PageIssueReporters, which are run against single pages as they are crawled. This checks can detect issues in the headers and body of the PageReport, such as wrong headers or missing tags. On the other hand there is the MultipageIssuReporters, which can run checks that affect multiple pages, such as duplicated titles.

Index

Constants

View Source
const (
	UserKey     contextKey = "user"
	SessionName string     = "SESSION_ID"
)
View Source
const (
	CrawlLimit      = 20000 // Max number of page reports that will be created
	LastCrawlsLimit = 5     // Max number returned by GetLastCrawls
	ClientTimeout   = 10    // HTTP client timeout in seconds.
)
View Source
const (
	Critical = iota + 1
	Alert
	Warning
)

Variables

View Source
var (
	// Error returned when the email is not a valid email.
	ErrInvalidEmail = errors.New("user service: invalid email")

	// Error returned when the password does not follow the password criteria.
	ErrInvalidPassword = errors.New("user service: invalid password")

	// Error returned when the user we are authenticating does not exist.
	ErrUnexistingUser = errors.New("user service: user does not exist")

	// Error returned when the password is incorrect for the user we are authenticating.
	ErrIncorrectPassword = errors.New("user service: incorrect password")

	// Error returned when trying to create a user that is already signed up.
	ErrUserExists = errors.New("user service: user already exists")
)

Functions

func NewFromHTTPResponse

func NewFromHTTPResponse(r *http.Response) (*models.PageReport, *html.Node, error)

Create a new PageReport from an http.Response.

func NewHTMLParser

func NewHTMLParser(u *url.URL, status int, headers *http.Header, body []byte, contentLength int64) (*models.PageReport, *html.Node, error)

Return a new PageReport.

Types

type ArchiveRemover

type ArchiveRemover interface {
	DeleteArchive(*models.Project)
}

type ArchiveService

type ArchiveService struct {
	ArchiveDir string
}

func NewArchiveService

func NewArchiveService(ad string) *ArchiveService

func (*ArchiveService) ArchiveExists

func (s *ArchiveService) ArchiveExists(p *models.Project) bool

ArchiveExists checks if a wacz file exists for the current project. It returns true if it exists, otherwise it returns false.

func (*ArchiveService) DeleteArchive

func (s *ArchiveService) DeleteArchive(p *models.Project)

DeleteArchive removes the wacz archive file for a given project. It checks if the file exists before removing it.

func (*ArchiveService) GetArchiveFilePath

func (s *ArchiveService) GetArchiveFilePath(p *models.Project) (string, error)

GetArchiveFilePath returns the project's wacz file path if it exists, otherwise it returns an error.

func (*ArchiveService) GetArchiveWriter

func (s *ArchiveService) GetArchiveWriter(p *models.Project) (*archiver.Writer, error)

ArchiveProject returns an archiver for the specified project. It returns an error if the archiver couldn't be created.

func (*ArchiveService) ReadArchiveRecord

func (s *ArchiveService) ReadArchiveRecord(p *models.Project, urlStr string) *models.ArchiveRecord

ReadArchive reads an URLs WACZ record from a project's archive.

type Archiver

type Archiver interface {
	AddRecord(*http.Response)
}

type Broker

type Broker struct {
	// contains filtered or unexported fields
}

PubSub broker service struct keeps a map of subscribers.

func NewPubSubBroker

func NewPubSubBroker() *Broker

func (*Broker) NewSubscriber

func (b *Broker) NewSubscriber(topic string, c func(*models.Message) error) *subscriber

Returns a new subsciber to the topic.

func (*Broker) Publish

func (b *Broker) Publish(topic string, m *models.Message)

Publishes a message to all subscribers of a topic.

func (*Broker) Unsubscribe

func (b *Broker) Unsubscribe(s *subscriber)

Unsubscribes a subscriber.

type CSVWriter

type CSVWriter struct {
	// contains filtered or unexported fields
}

func NewCSVWriter

func NewCSVWriter(f io.Writer) *CSVWriter

func (*CSVWriter) Write

func (cw *CSVWriter) Write(r *models.PageReport)

type Container

type Container struct {
	Config             *config.Config
	PubSubBroker       *Broker
	IssueService       *IssueService
	ReportService      *ReportService
	ReportManager      *ReportManager
	UserService        *UserService
	DashboardService   *DashboardService
	ProjectService     *ProjectService
	ProjectViewService *ProjectViewService
	ExportService      *Exporter
	CrawlerService     *CrawlerService
	Renderer           *Renderer
	CookieSession      *CookieSession
	ArchiveService     *ArchiveService
	// contains filtered or unexported fields
}

func NewContainer

func NewContainer(configFile string) *Container

func (*Container) InitArchiveService

func (c *Container) InitArchiveService()

func (*Container) InitConfig

func (c *Container) InitConfig(configFile string)

Load config file using the parameters in configFile.

func (*Container) InitCookieSession

func (c *Container) InitCookieSession()

Create cookie session handler

func (*Container) InitCrawlerService

func (c *Container) InitCrawlerService()

Create Crawler service.

func (*Container) InitDB

func (c *Container) InitDB()

Create the sql database connection.

func (*Container) InitDashboardService

func (c *Container) InitDashboardService()

Create the dashboCallbackBuilderard service.

func (*Container) InitExportService

func (c *Container) InitExportService()

Create the Export service.

func (*Container) InitIssueService

func (c *Container) InitIssueService()

Create the issue service.

func (*Container) InitProjectService

func (c *Container) InitProjectService()

Create the Project service.

func (*Container) InitProjectViewService

func (c *Container) InitProjectViewService()

Create the ProjectView service.

func (*Container) InitPubSubBroker

func (c *Container) InitPubSubBroker()

Create the PubSub broker.

func (*Container) InitRenderer

func (c *Container) InitRenderer()

Create html renderer.

func (*Container) InitReportManager

func (c *Container) InitReportManager()

Create the report manager and add all the available reporters.

func (*Container) InitReportService

func (c *Container) InitReportService()

Create the report service.

func (*Container) InitRepositories

func (c *Container) InitRepositories()

Create the data repositories.

func (*Container) InitUserService

func (c *Container) InitUserService()

Create the user service.

type CookieSession

type CookieSession struct {
	// contains filtered or unexported fields
}

func NewCookieSession

func NewCookieSession(r CookieSessionRepository) *CookieSession

func (*CookieSession) Auth

func (s *CookieSession) Auth(f func(w http.ResponseWriter, r *http.Request)) http.HandlerFunc

requireAuth is a middleware function that wraps the provided handler function and enforces authentication. It checks if the user is authenticated based on the session data.

func (*CookieSession) DestroySession

func (s *CookieSession) DestroySession(w http.ResponseWriter, r *http.Request) error

Destroys a user authentication session to deauthenticate a user.

func (*CookieSession) GetUser

func (s *CookieSession) GetUser(c context.Context) (*models.User, bool)

GetUserFromContext takes a context as input and retrieves the associated User value from it, if present.

func (*CookieSession) SetSession

func (s *CookieSession) SetSession(user *models.User, w http.ResponseWriter, r *http.Request) error

Sets a user authentication session with the user Id.

type CookieSessionRepository

type CookieSessionRepository interface {
	FindUserByEmail(email string) (*models.User, error)
}

type CrawlerHandler

type CrawlerHandler struct {
	// contains filtered or unexported fields
}

type CrawlerHandlerRepository

type CrawlerHandlerRepository interface {
	SavePageReport(*models.PageReport, int64) (*models.PageReport, error)
}

type CrawlerService

type CrawlerService struct {
	ArchiveService *ArchiveService
	// contains filtered or unexported fields
}

func (*CrawlerService) GetLastCrawls

func (s *CrawlerService) GetLastCrawls(p models.Project) []models.Crawl

Get a slice with 'LastCrawlsLimit' number of the crawls

func (*CrawlerService) StartCrawler

func (s *CrawlerService) StartCrawler(p models.Project, b models.BasicAuth) error

StartCrawler creates a new crawler and crawls the project's URL. It adds a new crawler for the project, it returns an error if there's one already running or if there's an error creating it. Finally the previous crawl's data is removed and the crawl is returned.

func (*CrawlerService) StopCrawler

func (s *CrawlerService) StopCrawler(p models.Project)

StopCrawler stops a crawler. If the crawler does not exsit it will just return.

type CrawlerServiceRepository

type CrawlerServiceRepository interface {
	SaveCrawl(models.Project) (*models.Crawl, error)
	GetLastCrawl(p *models.Project) models.Crawl
	GetLastCrawls(models.Project, int) []models.Crawl
	DeleteCrawlData(c *models.Crawl)

	CountIssuesByPriority(int64, int) int
	UpdateCrawl(*models.Crawl)
}

type CrawlerServicesContainer

type CrawlerServicesContainer struct {
	Broker         *Broker
	ReportManager  *ReportManager
	CrawlerHandler *CrawlerHandler
	ArchiveService *ArchiveService
	Config         *config.CrawlerConfig
}

type DashboardService

type DashboardService struct {
	// contains filtered or unexported fields
}

func (*DashboardService) GetCanonicalCount

func (s *DashboardService) GetCanonicalCount(crawlId int64) *models.CanonicalCount

Returns a count of PageReports that are canonical or not.

func (*DashboardService) GetImageAltCount

func (s *DashboardService) GetImageAltCount(crawlId int64) *models.AltCount

Returns the count Images with and without the alt attribute.

func (*DashboardService) GetMediaCount

func (s *DashboardService) GetMediaCount(crawlId int64) *models.Chart

Returns a Chart with the PageReport's media type chart data.

func (*DashboardService) GetSchemeCount

func (s *DashboardService) GetSchemeCount(crawlId int64) *models.SchemeCount

Returns the count of PageReports with and without https.

func (*DashboardService) GetStatusCodeByDepth

func (s *DashboardService) GetStatusCodeByDepth(crawlId int64) []models.StatusCodeByDepth

GetStatusCodeByDepth returns a slice of StatusCodeByDepth models with the total number of pagereports by depth and status code.

func (*DashboardService) GetStatusCount

func (s *DashboardService) GetStatusCount(crawlId int64) *models.Chart

Returns a Chart with the PageReport's status code chart data.

type DashboardServiceRepository

type DashboardServiceRepository interface {
	CountByMediaType(int64) *models.CountList
	CountByStatusCode(int64) *models.CountList

	CountByCanonical(int64) int
	CountImagesAlt(int64) *models.AltCount
	CountScheme(int64) *models.SchemeCount
	CountByNonCanonical(int64) int
	GetStatusCodeByDepth(crawlId int64) []models.StatusCodeByDepth
}

type DeleteHook

type DeleteHook func(user *models.User)

type ExportRepository

type ExportRepository interface {
	ExportLinks(*models.Crawl) <-chan *models.ExportLink
	ExportExternalLinks(*models.Crawl) <-chan *models.ExportLink
	ExportImages(crawl *models.Crawl) <-chan *models.ExportImage
	ExportScripts(crawl *models.Crawl) <-chan *models.Script
	ExportStyles(crawl *models.Crawl) <-chan *models.Style
	ExportIframes(crawl *models.Crawl) <-chan *models.Iframe
	ExportAudios(crawl *models.Crawl) <-chan *models.Audio
	ExportVideos(crawl *models.Crawl) <-chan *models.ExportVideo
	ExportHreflangs(crawl *models.Crawl) <-chan *models.ExportHreflang
}

type Exporter

type Exporter struct {
	// contains filtered or unexported fields
}

func NewExporter

func NewExporter(r ExportRepository) *Exporter

func (*Exporter) ExportAudios

func (e *Exporter) ExportAudios(f io.Writer, crawl *models.Crawl)

Export all audio as a CSV file

func (e *Exporter) ExportExternalLinks(f io.Writer, crawl *models.Crawl)

Export internal links as a CSV file

func (*Exporter) ExportHreflangs

func (e *Exporter) ExportHreflangs(f io.Writer, crawl *models.Crawl)

Export all hreflangs as a CSV file

func (*Exporter) ExportIframes

func (e *Exporter) ExportIframes(f io.Writer, crawl *models.Crawl)

Export all CSS styles as a CSV file

func (*Exporter) ExportImages

func (e *Exporter) ExportImages(f io.Writer, crawl *models.Crawl)

Export all images as a CSV file

func (e *Exporter) ExportLinks(f io.Writer, crawl *models.Crawl)

Export internal links as a CSV file

func (*Exporter) ExportScripts

func (e *Exporter) ExportScripts(f io.Writer, crawl *models.Crawl)

Export all scripts as a CSV file

func (*Exporter) ExportStyles

func (e *Exporter) ExportStyles(f io.Writer, crawl *models.Crawl)

Export all CSS styles as a CSV file

func (*Exporter) ExportVideos

func (e *Exporter) ExportVideos(f io.Writer, crawl *models.Crawl)

Export all video as a CSV file

type IssueService

type IssueService struct {
	// contains filtered or unexported fields
}

func NewIssueService

func NewIssueService(r IssueServiceRepository) *IssueService

func (*IssueService) GetIssuesCount

func (s *IssueService) GetIssuesCount(crawlID int64) *models.IssueCount

GetIssuesCount returns an IssueCount with the number of issues by type.

func (*IssueService) GetPaginatedReportsByIssue

func (s *IssueService) GetPaginatedReportsByIssue(crawlId int64, currentPage int, issueId string) (models.PaginatorView, error)

Returns a PaginatorView with the corresponding page reports.

type IssueServiceRepository

type IssueServiceRepository interface {
	GetNumberOfPagesForIssues(int64, string) int
	FindPageReportIssues(int64, int, string) []models.PageReport
	FindIssuesByTypeAndPriority(int64, int) []models.IssueGroup
}

type Parser

type Parser struct {
	ParsedURL *url.URL
	Headers   *http.Header
	// contains filtered or unexported fields
}

type ProjectService

type ProjectService struct {
	// contains filtered or unexported fields
}

func (*ProjectService) DeleteAllUserProjects

func (s *ProjectService) DeleteAllUserProjects(user *models.User)

Delete all user projects and crawl data.

func (*ProjectService) DeleteProject

func (s *ProjectService) DeleteProject(p *models.Project)

Delete a project and its related data.

func (*ProjectService) FindProject

func (s *ProjectService) FindProject(id, uid int) (models.Project, error)

Return a project specified by id and user. It populates the Host field from the project's URL.

func (*ProjectService) SaveProject

func (s *ProjectService) SaveProject(project *models.Project, userId int) error

SaveProject stores a new project. It trims the spaces in the project's URL field and checks the scheme to make sure it is http or https.

func (*ProjectService) UpdateProject

func (s *ProjectService) UpdateProject(p *models.Project) error

Update project details.

type ProjectServiceRepository

type ProjectServiceRepository interface {
	SaveProject(*models.Project, int)
	DeleteProject(*models.Project)
	DisableProject(*models.Project)
	UpdateProject(p *models.Project) error
	FindProjectById(id int, uid int) (models.Project, error)
	FindProjectsByUser(userId int) []models.Project

	DeleteProjectCrawls(*models.Project)
}

type ProjectViewService

type ProjectViewService struct {
	// contains filtered or unexported fields
}

func (*ProjectViewService) GetProjectView

func (s *ProjectViewService) GetProjectView(id, uid int) (*models.ProjectView, error)

GetProjectView returns a new ProjectView with the specified project and the project's last crawl.

func (*ProjectViewService) GetProjectViews

func (s *ProjectViewService) GetProjectViews(uid int) []models.ProjectView

GetProjectViews returns a slice of ProjectViews with all of the user's projects and its last crawls.

func (*ProjectViewService) UserIsCrawling

func (s *ProjectViewService) UserIsCrawling(uid int) bool

UserIsCrawling returns true if the user has any project that is currently crawling. Otherwise it returns false.

func (*ProjectViewService) UserIsProcessingProjects

func (s *ProjectViewService) UserIsProcessingProjects(uid int) bool

Returns true if the user is crawling or deleting projects. Otherwise it returns false.

type ProjectViewServiceRepository

type ProjectViewServiceRepository interface {
	FindProjectsByUser(int) []models.Project
	FindProjectById(id int, uid int) (models.Project, error)

	GetLastCrawl(*models.Project) models.Crawl
}

type Renderer

type Renderer struct {
	// contains filtered or unexported fields
}

func NewRenderer

func NewRenderer(config *RendererConfig) (*Renderer, error)

NewRenderer will load a translation file and return a new template renderer.

func (*Renderer) RenderTemplate

func (r *Renderer) RenderTemplate(w io.Writer, t string, v interface{})

Render a template with the specified PageView data.

func (*Renderer) ToKByte

func (r *Renderer) ToKByte(b int64) string

Returns an int formated as KB.

type RendererConfig

type RendererConfig struct {
	TemplatesFolder  string
	TranslationsFile string
}

type ReportManager

type ReportManager struct {
	// contains filtered or unexported fields
}

func NewReportManager

func NewReportManager(r ReportManagerRepository) *ReportManager

Create a new ReportManager with no issue reporters.

func (*ReportManager) AddMultipageReporter

func (rm *ReportManager) AddMultipageReporter(reporter models.MultipageCallback)

Add a multi-page issue reporter to the ReportManager. Multi-page reporters are used to detect issues that affect multiple pages. It will be used when creating the multi page issues once all the pages have been crawled.

func (*ReportManager) AddPageReporter

func (rm *ReportManager) AddPageReporter(reporter *models.PageIssueReporter)

Add an page issue reporter to the ReportManager. It will be used to create issues on each crawled page.

func (*ReportManager) CreateMultipageIssues

func (r *ReportManager) CreateMultipageIssues(crawl *models.Crawl)

CreateMultipageIssues uses the Reporters to create and save issues found in a crawl.

func (*ReportManager) CreatePageIssues

func (r *ReportManager) CreatePageIssues(p *models.PageReport, htmlNode *html.Node, header *http.Header, crawl *models.Crawl)

CreatePageIssues loops the page reporters calling the callback function and creating the issues found in the PageReport.

type ReportManagerRepository

type ReportManagerRepository interface {
	SaveIssues(<-chan *models.Issue)
}

type ReportService

type ReportService struct {
	// contains filtered or unexported fields
}

func NewReportService

func NewReportService(r ReportServiceRepository) *ReportService

func (*ReportService) GetPageReporsByIssueType

func (s *ReportService) GetPageReporsByIssueType(crawlId int64, eid string) <-chan *models.PageReport

Return channel of PageReports by error type.

func (*ReportService) GetPageReport

func (s *ReportService) GetPageReport(rid int, crawlId int64, tab string, page int) *models.PageReportView

Returns a PageReportView by PageReport Id and Crawl Id. It also loads the data specified in the tab paramater.

func (*ReportService) GetPaginatedReports

func (s *ReportService) GetPaginatedReports(crawlId int64, currentPage int, term string) (models.PaginatorView, error)

Returns a PaginatorView with the corresponding page reports.

func (*ReportService) GetSitemapPageReports

func (s *ReportService) GetSitemapPageReports(crawlId int64) <-chan *models.PageReport

Returns a channel of crawlable PageReports that can be included in a sitemap.

type ReportServiceRepository

type ReportServiceRepository interface {
	FindPageReportById(int) models.PageReport
	FindErrorTypesByPage(int, int64) []string
	FindInLinks(string, int64, int) []models.InternalLink
	FindPageReportsRedirectingToURL(string, int64, int) []models.PageReport
	FindAllPageReportsByCrawlIdAndErrorType(int64, string) <-chan *models.PageReport
	FindAllPageReportsByCrawlId(int64) <-chan *models.PageReport
	FindSitemapPageReports(int64) <-chan *models.PageReport
	FindLinks(pageReport *models.PageReport, cid int64, page int) []models.InternalLink
	FindExternalLinks(pageReport *models.PageReport, cid int64, p int) []models.Link
	FindPaginatedPageReports(cid int64, p int, term string) []models.PageReport

	FindPageReportStyles(pageReport *models.PageReport, cid int64) []string
	FindPageReportScripts(pageReport *models.PageReport, cid int64) []string
	FindPageReportVideos(pageReport *models.PageReport, cid int64) []models.Video
	FindPageReportAudios(pageReport *models.PageReport, cid int64) []string
	FindPageReportIframes(pageReport *models.PageReport, cid int64) []string
	FindPageReportImages(pageReport *models.PageReport, cid int64) []models.Image
	FindPageReportHreflangs(pageReport *models.PageReport, cid int64) []models.Hreflang

	GetNumberOfPagesForPageReport(cid int64, term string) int
	GetNumberOfPagesForInlinks(*models.PageReport, int64) int
	GetNumberOfPagesForRedirecting(*models.PageReport, int64) int
	GetNumberOfPagesForLinks(*models.PageReport, int64) int
	GetNumberOfPagesForExternalLinks(pageReport *models.PageReport, cid int64) int
}

type UserService

type UserService struct {
	// contains filtered or unexported fields
}

func NewUserService

func NewUserService(r UserServiceRepository) *UserService

func (*UserService) AddDeleteHook

func (s *UserService) AddDeleteHook(hook DeleteHook)

AddDeleteHook adds a new hook function that will be called when the user is deleted. This is used for user data clean up.

func (*UserService) DeleteUser

func (s *UserService) DeleteUser(user *models.User)

Delete a User and all its associated projects and crawl data. Deleting the user data may take a while, and it's deleted in a go routine. To avoid blocking the execution the user is first disabled, and once the data has been deleted, the user is finally deleted.

func (*UserService) SignIn

func (s *UserService) SignIn(email, password string) (*models.User, error)

SignIn validates the provided email and password combination for user authentication. It compares the provided password with the user's hashed password. If the passwords do not match, it returns an error.

func (*UserService) SignUp

func (s *UserService) SignUp(email, password string) (*models.User, error)

SignUp validates the user email and password, if they are both valid creates a password hash before storing it. If succesful, it returns the new user, otherwise an error is returned.

func (*UserService) UpdatePassword

func (s *UserService) UpdatePassword(user *models.User, currentPassword, newPassword string) error

UpdatePassword updates the password for the user with the given email. It validates the new password and generates a hashed password using bcrypt before storing it.

type UserServiceRepository

type UserServiceRepository interface {
	UserSignup(email, hashedPassword string) (*models.User, error)
	FindUserByEmail(email string) (*models.User, error)
	UserUpdatePassword(email, hashedPassword string) error
	DeleteUser(user *models.User) error
	DisableUser(user *models.User) error
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL