Documentation ¶
Overview ¶
The report_manager takes care of running the issue reporters against the crawled pages. There are two different types of issue reporters. On one hand there's the PageIssueReporters, which are run against single pages as they are crawled. This checks can detect issues in the headers and body of the PageReport, such as wrong headers or missing tags. On the other hand there is the MultipageIssuReporters, which can run checks that affect multiple pages, such as duplicated titles.
Index ¶
- Constants
- Variables
- func NewFromHTTPResponse(r *http.Response) (*models.PageReport, *html.Node, error)
- func NewHTMLParser(u *url.URL, status int, headers *http.Header, body []byte, contentLength int64) (*models.PageReport, *html.Node, error)
- type ArchiveRemover
- type ArchiveService
- func (s *ArchiveService) ArchiveExists(p *models.Project) bool
- func (s *ArchiveService) DeleteArchive(p *models.Project)
- func (s *ArchiveService) GetArchiveFilePath(p *models.Project) (string, error)
- func (s *ArchiveService) GetArchiveWriter(p *models.Project) (*archiver.Writer, error)
- func (s *ArchiveService) ReadArchiveRecord(p *models.Project, urlStr string) *models.ArchiveRecord
- type Archiver
- type Broker
- type CSVWriter
- type Container
- func (c *Container) InitArchiveService()
- func (c *Container) InitConfig(configFile string)
- func (c *Container) InitCookieSession()
- func (c *Container) InitCrawlerService()
- func (c *Container) InitDB()
- func (c *Container) InitDashboardService()
- func (c *Container) InitExportService()
- func (c *Container) InitIssueService()
- func (c *Container) InitProjectService()
- func (c *Container) InitProjectViewService()
- func (c *Container) InitPubSubBroker()
- func (c *Container) InitRenderer()
- func (c *Container) InitReportManager()
- func (c *Container) InitReportService()
- func (c *Container) InitRepositories()
- func (c *Container) InitUserService()
- type CookieSession
- func (s *CookieSession) Auth(f func(w http.ResponseWriter, r *http.Request)) http.HandlerFunc
- func (s *CookieSession) DestroySession(w http.ResponseWriter, r *http.Request) error
- func (s *CookieSession) GetUser(c context.Context) (*models.User, bool)
- func (s *CookieSession) SetSession(user *models.User, w http.ResponseWriter, r *http.Request) error
- type CookieSessionRepository
- type CrawlerHandler
- type CrawlerHandlerRepository
- type CrawlerService
- type CrawlerServiceRepository
- type CrawlerServicesContainer
- type DashboardService
- func (s *DashboardService) GetCanonicalCount(crawlId int64) *models.CanonicalCount
- func (s *DashboardService) GetImageAltCount(crawlId int64) *models.AltCount
- func (s *DashboardService) GetMediaCount(crawlId int64) *models.Chart
- func (s *DashboardService) GetSchemeCount(crawlId int64) *models.SchemeCount
- func (s *DashboardService) GetStatusCodeByDepth(crawlId int64) []models.StatusCodeByDepth
- func (s *DashboardService) GetStatusCount(crawlId int64) *models.Chart
- type DashboardServiceRepository
- type DeleteHook
- type ExportRepository
- type Exporter
- func (e *Exporter) ExportAudios(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportExternalLinks(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportHreflangs(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportIframes(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportImages(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportLinks(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportScripts(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportStyles(f io.Writer, crawl *models.Crawl)
- func (e *Exporter) ExportVideos(f io.Writer, crawl *models.Crawl)
- type IssueService
- type IssueServiceRepository
- type Parser
- type ProjectService
- func (s *ProjectService) DeleteAllUserProjects(user *models.User)
- func (s *ProjectService) DeleteProject(p *models.Project)
- func (s *ProjectService) FindProject(id, uid int) (models.Project, error)
- func (s *ProjectService) SaveProject(project *models.Project, userId int) error
- func (s *ProjectService) UpdateProject(p *models.Project) error
- type ProjectServiceRepository
- type ProjectViewService
- type ProjectViewServiceRepository
- type Renderer
- type RendererConfig
- type ReportManager
- func (rm *ReportManager) AddMultipageReporter(reporter models.MultipageCallback)
- func (rm *ReportManager) AddPageReporter(reporter *models.PageIssueReporter)
- func (r *ReportManager) CreateMultipageIssues(crawl *models.Crawl)
- func (r *ReportManager) CreatePageIssues(p *models.PageReport, htmlNode *html.Node, header *http.Header, ...)
- type ReportManagerRepository
- type ReportService
- func (s *ReportService) GetPageReporsByIssueType(crawlId int64, eid string) <-chan *models.PageReport
- func (s *ReportService) GetPageReport(rid int, crawlId int64, tab string, page int) *models.PageReportView
- func (s *ReportService) GetPaginatedReports(crawlId int64, currentPage int, term string) (models.PaginatorView, error)
- func (s *ReportService) GetSitemapPageReports(crawlId int64) <-chan *models.PageReport
- type ReportServiceRepository
- type UserService
- func (s *UserService) AddDeleteHook(hook DeleteHook)
- func (s *UserService) DeleteUser(user *models.User)
- func (s *UserService) SignIn(email, password string) (*models.User, error)
- func (s *UserService) SignUp(email, password string) (*models.User, error)
- func (s *UserService) UpdatePassword(user *models.User, currentPassword, newPassword string) error
- type UserServiceRepository
Constants ¶
const ( UserKey contextKey = "user" SessionName string = "SESSION_ID" )
const ( CrawlLimit = 20000 // Max number of page reports that will be created LastCrawlsLimit = 5 // Max number returned by GetLastCrawls ClientTimeout = 10 // HTTP client timeout in seconds. )
const ( Critical = iota + 1 Alert Warning )
Variables ¶
var ( // Error returned when the email is not a valid email. ErrInvalidEmail = errors.New("user service: invalid email") // Error returned when the password does not follow the password criteria. ErrInvalidPassword = errors.New("user service: invalid password") // Error returned when the user we are authenticating does not exist. ErrUnexistingUser = errors.New("user service: user does not exist") // Error returned when the password is incorrect for the user we are authenticating. ErrIncorrectPassword = errors.New("user service: incorrect password") // Error returned when trying to create a user that is already signed up. ErrUserExists = errors.New("user service: user already exists") )
Functions ¶
func NewFromHTTPResponse ¶
Create a new PageReport from an http.Response.
Types ¶
type ArchiveRemover ¶
type ArchiveService ¶
type ArchiveService struct {
ArchiveDir string
}
func NewArchiveService ¶
func NewArchiveService(ad string) *ArchiveService
func (*ArchiveService) ArchiveExists ¶
func (s *ArchiveService) ArchiveExists(p *models.Project) bool
ArchiveExists checks if a wacz file exists for the current project. It returns true if it exists, otherwise it returns false.
func (*ArchiveService) DeleteArchive ¶
func (s *ArchiveService) DeleteArchive(p *models.Project)
DeleteArchive removes the wacz archive file for a given project. It checks if the file exists before removing it.
func (*ArchiveService) GetArchiveFilePath ¶
func (s *ArchiveService) GetArchiveFilePath(p *models.Project) (string, error)
GetArchiveFilePath returns the project's wacz file path if it exists, otherwise it returns an error.
func (*ArchiveService) GetArchiveWriter ¶
ArchiveProject returns an archiver for the specified project. It returns an error if the archiver couldn't be created.
func (*ArchiveService) ReadArchiveRecord ¶
func (s *ArchiveService) ReadArchiveRecord(p *models.Project, urlStr string) *models.ArchiveRecord
ReadArchive reads an URLs WACZ record from a project's archive.
type Broker ¶
type Broker struct {
// contains filtered or unexported fields
}
PubSub broker service struct keeps a map of subscribers.
func NewPubSubBroker ¶
func NewPubSubBroker() *Broker
func (*Broker) NewSubscriber ¶
Returns a new subsciber to the topic.
type CSVWriter ¶
type CSVWriter struct {
// contains filtered or unexported fields
}
func NewCSVWriter ¶
func (*CSVWriter) Write ¶
func (cw *CSVWriter) Write(r *models.PageReport)
type Container ¶
type Container struct { Config *config.Config PubSubBroker *Broker IssueService *IssueService ReportService *ReportService ReportManager *ReportManager UserService *UserService DashboardService *DashboardService ProjectService *ProjectService ProjectViewService *ProjectViewService ExportService *Exporter CrawlerService *CrawlerService Renderer *Renderer CookieSession *CookieSession ArchiveService *ArchiveService // contains filtered or unexported fields }
func NewContainer ¶
func (*Container) InitArchiveService ¶
func (c *Container) InitArchiveService()
func (*Container) InitConfig ¶
Load config file using the parameters in configFile.
func (*Container) InitCookieSession ¶
func (c *Container) InitCookieSession()
Create cookie session handler
func (*Container) InitCrawlerService ¶
func (c *Container) InitCrawlerService()
Create Crawler service.
func (*Container) InitDashboardService ¶
func (c *Container) InitDashboardService()
Create the dashboCallbackBuilderard service.
func (*Container) InitExportService ¶
func (c *Container) InitExportService()
Create the Export service.
func (*Container) InitIssueService ¶
func (c *Container) InitIssueService()
Create the issue service.
func (*Container) InitProjectService ¶
func (c *Container) InitProjectService()
Create the Project service.
func (*Container) InitProjectViewService ¶
func (c *Container) InitProjectViewService()
Create the ProjectView service.
func (*Container) InitPubSubBroker ¶
func (c *Container) InitPubSubBroker()
Create the PubSub broker.
func (*Container) InitReportManager ¶
func (c *Container) InitReportManager()
Create the report manager and add all the available reporters.
func (*Container) InitReportService ¶
func (c *Container) InitReportService()
Create the report service.
func (*Container) InitRepositories ¶
func (c *Container) InitRepositories()
Create the data repositories.
type CookieSession ¶
type CookieSession struct {
// contains filtered or unexported fields
}
func NewCookieSession ¶
func NewCookieSession(r CookieSessionRepository) *CookieSession
func (*CookieSession) Auth ¶
func (s *CookieSession) Auth(f func(w http.ResponseWriter, r *http.Request)) http.HandlerFunc
requireAuth is a middleware function that wraps the provided handler function and enforces authentication. It checks if the user is authenticated based on the session data.
func (*CookieSession) DestroySession ¶
func (s *CookieSession) DestroySession(w http.ResponseWriter, r *http.Request) error
Destroys a user authentication session to deauthenticate a user.
func (*CookieSession) GetUser ¶
GetUserFromContext takes a context as input and retrieves the associated User value from it, if present.
func (*CookieSession) SetSession ¶
func (s *CookieSession) SetSession(user *models.User, w http.ResponseWriter, r *http.Request) error
Sets a user authentication session with the user Id.
type CookieSessionRepository ¶
type CrawlerHandler ¶
type CrawlerHandler struct {
// contains filtered or unexported fields
}
func NewCrawlerHandler ¶
func NewCrawlerHandler(r CrawlerHandlerRepository, b *Broker, m *ReportManager) *CrawlerHandler
type CrawlerHandlerRepository ¶
type CrawlerHandlerRepository interface {
SavePageReport(*models.PageReport, int64) (*models.PageReport, error)
}
type CrawlerService ¶
type CrawlerService struct { ArchiveService *ArchiveService // contains filtered or unexported fields }
func NewCrawlerService ¶
func NewCrawlerService(r CrawlerServiceRepository, s CrawlerServicesContainer) *CrawlerService
func (*CrawlerService) GetLastCrawls ¶
func (s *CrawlerService) GetLastCrawls(p models.Project) []models.Crawl
Get a slice with 'LastCrawlsLimit' number of the crawls
func (*CrawlerService) StartCrawler ¶
StartCrawler creates a new crawler and crawls the project's URL. It adds a new crawler for the project, it returns an error if there's one already running or if there's an error creating it. Finally the previous crawl's data is removed and the crawl is returned.
func (*CrawlerService) StopCrawler ¶
func (s *CrawlerService) StopCrawler(p models.Project)
StopCrawler stops a crawler. If the crawler does not exsit it will just return.
type CrawlerServicesContainer ¶
type CrawlerServicesContainer struct { Broker *Broker ReportManager *ReportManager CrawlerHandler *CrawlerHandler ArchiveService *ArchiveService Config *config.CrawlerConfig }
type DashboardService ¶
type DashboardService struct {
// contains filtered or unexported fields
}
func NewDashboardService ¶
func NewDashboardService(r DashboardServiceRepository) *DashboardService
func (*DashboardService) GetCanonicalCount ¶
func (s *DashboardService) GetCanonicalCount(crawlId int64) *models.CanonicalCount
Returns a count of PageReports that are canonical or not.
func (*DashboardService) GetImageAltCount ¶
func (s *DashboardService) GetImageAltCount(crawlId int64) *models.AltCount
Returns the count Images with and without the alt attribute.
func (*DashboardService) GetMediaCount ¶
func (s *DashboardService) GetMediaCount(crawlId int64) *models.Chart
Returns a Chart with the PageReport's media type chart data.
func (*DashboardService) GetSchemeCount ¶
func (s *DashboardService) GetSchemeCount(crawlId int64) *models.SchemeCount
Returns the count of PageReports with and without https.
func (*DashboardService) GetStatusCodeByDepth ¶
func (s *DashboardService) GetStatusCodeByDepth(crawlId int64) []models.StatusCodeByDepth
GetStatusCodeByDepth returns a slice of StatusCodeByDepth models with the total number of pagereports by depth and status code.
func (*DashboardService) GetStatusCount ¶
func (s *DashboardService) GetStatusCount(crawlId int64) *models.Chart
Returns a Chart with the PageReport's status code chart data.
type DashboardServiceRepository ¶
type DashboardServiceRepository interface { CountByMediaType(int64) *models.CountList CountByStatusCode(int64) *models.CountList CountByCanonical(int64) int CountImagesAlt(int64) *models.AltCount CountScheme(int64) *models.SchemeCount CountByNonCanonical(int64) int GetStatusCodeByDepth(crawlId int64) []models.StatusCodeByDepth }
type DeleteHook ¶
type ExportRepository ¶
type ExportRepository interface { ExportLinks(*models.Crawl) <-chan *models.ExportLink ExportExternalLinks(*models.Crawl) <-chan *models.ExportLink ExportImages(crawl *models.Crawl) <-chan *models.ExportImage ExportScripts(crawl *models.Crawl) <-chan *models.Script ExportStyles(crawl *models.Crawl) <-chan *models.Style ExportIframes(crawl *models.Crawl) <-chan *models.Iframe ExportAudios(crawl *models.Crawl) <-chan *models.Audio ExportVideos(crawl *models.Crawl) <-chan *models.ExportVideo ExportHreflangs(crawl *models.Crawl) <-chan *models.ExportHreflang }
type Exporter ¶
type Exporter struct {
// contains filtered or unexported fields
}
func NewExporter ¶
func NewExporter(r ExportRepository) *Exporter
func (*Exporter) ExportAudios ¶
Export all audio as a CSV file
func (*Exporter) ExportExternalLinks ¶
Export internal links as a CSV file
func (*Exporter) ExportHreflangs ¶
Export all hreflangs as a CSV file
func (*Exporter) ExportIframes ¶
Export all CSS styles as a CSV file
func (*Exporter) ExportImages ¶
Export all images as a CSV file
func (*Exporter) ExportLinks ¶
Export internal links as a CSV file
func (*Exporter) ExportScripts ¶
Export all scripts as a CSV file
func (*Exporter) ExportStyles ¶
Export all CSS styles as a CSV file
type IssueService ¶
type IssueService struct {
// contains filtered or unexported fields
}
func NewIssueService ¶
func NewIssueService(r IssueServiceRepository) *IssueService
func (*IssueService) GetIssuesCount ¶
func (s *IssueService) GetIssuesCount(crawlID int64) *models.IssueCount
GetIssuesCount returns an IssueCount with the number of issues by type.
func (*IssueService) GetPaginatedReportsByIssue ¶
func (s *IssueService) GetPaginatedReportsByIssue(crawlId int64, currentPage int, issueId string) (models.PaginatorView, error)
Returns a PaginatorView with the corresponding page reports.
type IssueServiceRepository ¶
type ProjectService ¶
type ProjectService struct {
// contains filtered or unexported fields
}
func NewProjectService ¶
func NewProjectService(r ProjectServiceRepository, a ArchiveRemover) *ProjectService
func (*ProjectService) DeleteAllUserProjects ¶
func (s *ProjectService) DeleteAllUserProjects(user *models.User)
Delete all user projects and crawl data.
func (*ProjectService) DeleteProject ¶
func (s *ProjectService) DeleteProject(p *models.Project)
Delete a project and its related data.
func (*ProjectService) FindProject ¶
func (s *ProjectService) FindProject(id, uid int) (models.Project, error)
Return a project specified by id and user. It populates the Host field from the project's URL.
func (*ProjectService) SaveProject ¶
func (s *ProjectService) SaveProject(project *models.Project, userId int) error
SaveProject stores a new project. It trims the spaces in the project's URL field and checks the scheme to make sure it is http or https.
func (*ProjectService) UpdateProject ¶
func (s *ProjectService) UpdateProject(p *models.Project) error
Update project details.
type ProjectServiceRepository ¶
type ProjectServiceRepository interface { SaveProject(*models.Project, int) DeleteProject(*models.Project) DisableProject(*models.Project) UpdateProject(p *models.Project) error FindProjectById(id int, uid int) (models.Project, error) FindProjectsByUser(userId int) []models.Project DeleteProjectCrawls(*models.Project) }
type ProjectViewService ¶
type ProjectViewService struct {
// contains filtered or unexported fields
}
func NewProjectViewService ¶
func NewProjectViewService(r ProjectViewServiceRepository) *ProjectViewService
func (*ProjectViewService) GetProjectView ¶
func (s *ProjectViewService) GetProjectView(id, uid int) (*models.ProjectView, error)
GetProjectView returns a new ProjectView with the specified project and the project's last crawl.
func (*ProjectViewService) GetProjectViews ¶
func (s *ProjectViewService) GetProjectViews(uid int) []models.ProjectView
GetProjectViews returns a slice of ProjectViews with all of the user's projects and its last crawls.
func (*ProjectViewService) UserIsCrawling ¶
func (s *ProjectViewService) UserIsCrawling(uid int) bool
UserIsCrawling returns true if the user has any project that is currently crawling. Otherwise it returns false.
func (*ProjectViewService) UserIsProcessingProjects ¶
func (s *ProjectViewService) UserIsProcessingProjects(uid int) bool
Returns true if the user is crawling or deleting projects. Otherwise it returns false.
type Renderer ¶
type Renderer struct {
// contains filtered or unexported fields
}
func NewRenderer ¶
func NewRenderer(config *RendererConfig) (*Renderer, error)
NewRenderer will load a translation file and return a new template renderer.
func (*Renderer) RenderTemplate ¶
Render a template with the specified PageView data.
type RendererConfig ¶
type ReportManager ¶
type ReportManager struct {
// contains filtered or unexported fields
}
func NewReportManager ¶
func NewReportManager(r ReportManagerRepository) *ReportManager
Create a new ReportManager with no issue reporters.
func (*ReportManager) AddMultipageReporter ¶
func (rm *ReportManager) AddMultipageReporter(reporter models.MultipageCallback)
Add a multi-page issue reporter to the ReportManager. Multi-page reporters are used to detect issues that affect multiple pages. It will be used when creating the multi page issues once all the pages have been crawled.
func (*ReportManager) AddPageReporter ¶
func (rm *ReportManager) AddPageReporter(reporter *models.PageIssueReporter)
Add an page issue reporter to the ReportManager. It will be used to create issues on each crawled page.
func (*ReportManager) CreateMultipageIssues ¶
func (r *ReportManager) CreateMultipageIssues(crawl *models.Crawl)
CreateMultipageIssues uses the Reporters to create and save issues found in a crawl.
func (*ReportManager) CreatePageIssues ¶
func (r *ReportManager) CreatePageIssues(p *models.PageReport, htmlNode *html.Node, header *http.Header, crawl *models.Crawl)
CreatePageIssues loops the page reporters calling the callback function and creating the issues found in the PageReport.
type ReportManagerRepository ¶
type ReportService ¶
type ReportService struct {
// contains filtered or unexported fields
}
func NewReportService ¶
func NewReportService(r ReportServiceRepository) *ReportService
func (*ReportService) GetPageReporsByIssueType ¶
func (s *ReportService) GetPageReporsByIssueType(crawlId int64, eid string) <-chan *models.PageReport
Return channel of PageReports by error type.
func (*ReportService) GetPageReport ¶
func (s *ReportService) GetPageReport(rid int, crawlId int64, tab string, page int) *models.PageReportView
Returns a PageReportView by PageReport Id and Crawl Id. It also loads the data specified in the tab paramater.
func (*ReportService) GetPaginatedReports ¶
func (s *ReportService) GetPaginatedReports(crawlId int64, currentPage int, term string) (models.PaginatorView, error)
Returns a PaginatorView with the corresponding page reports.
func (*ReportService) GetSitemapPageReports ¶
func (s *ReportService) GetSitemapPageReports(crawlId int64) <-chan *models.PageReport
Returns a channel of crawlable PageReports that can be included in a sitemap.
type ReportServiceRepository ¶
type ReportServiceRepository interface { FindPageReportById(int) models.PageReport FindErrorTypesByPage(int, int64) []string FindInLinks(string, int64, int) []models.InternalLink FindPageReportsRedirectingToURL(string, int64, int) []models.PageReport FindAllPageReportsByCrawlIdAndErrorType(int64, string) <-chan *models.PageReport FindAllPageReportsByCrawlId(int64) <-chan *models.PageReport FindSitemapPageReports(int64) <-chan *models.PageReport FindLinks(pageReport *models.PageReport, cid int64, page int) []models.InternalLink FindExternalLinks(pageReport *models.PageReport, cid int64, p int) []models.Link FindPaginatedPageReports(cid int64, p int, term string) []models.PageReport FindPageReportStyles(pageReport *models.PageReport, cid int64) []string FindPageReportScripts(pageReport *models.PageReport, cid int64) []string FindPageReportVideos(pageReport *models.PageReport, cid int64) []models.Video FindPageReportAudios(pageReport *models.PageReport, cid int64) []string FindPageReportIframes(pageReport *models.PageReport, cid int64) []string FindPageReportImages(pageReport *models.PageReport, cid int64) []models.Image FindPageReportHreflangs(pageReport *models.PageReport, cid int64) []models.Hreflang GetNumberOfPagesForPageReport(cid int64, term string) int GetNumberOfPagesForInlinks(*models.PageReport, int64) int GetNumberOfPagesForRedirecting(*models.PageReport, int64) int GetNumberOfPagesForLinks(*models.PageReport, int64) int GetNumberOfPagesForExternalLinks(pageReport *models.PageReport, cid int64) int }
type UserService ¶
type UserService struct {
// contains filtered or unexported fields
}
func NewUserService ¶
func NewUserService(r UserServiceRepository) *UserService
func (*UserService) AddDeleteHook ¶
func (s *UserService) AddDeleteHook(hook DeleteHook)
AddDeleteHook adds a new hook function that will be called when the user is deleted. This is used for user data clean up.
func (*UserService) DeleteUser ¶
func (s *UserService) DeleteUser(user *models.User)
Delete a User and all its associated projects and crawl data. Deleting the user data may take a while, and it's deleted in a go routine. To avoid blocking the execution the user is first disabled, and once the data has been deleted, the user is finally deleted.
func (*UserService) SignIn ¶
func (s *UserService) SignIn(email, password string) (*models.User, error)
SignIn validates the provided email and password combination for user authentication. It compares the provided password with the user's hashed password. If the passwords do not match, it returns an error.
func (*UserService) SignUp ¶
func (s *UserService) SignUp(email, password string) (*models.User, error)
SignUp validates the user email and password, if they are both valid creates a password hash before storing it. If succesful, it returns the new user, otherwise an error is returned.
func (*UserService) UpdatePassword ¶
func (s *UserService) UpdatePassword(user *models.User, currentPassword, newPassword string) error
UpdatePassword updates the password for the user with the given email. It validates the new password and generates a hashed password using bcrypt before storing it.