Documentation ¶
Overview ¶
Package scraper provides interfaces to interact with the scraper subsystem. The Cache type is the main entry point to the scraper subsystem.
Index ¶
- Constants
- Variables
- type Cache
- func (c Cache) GetScraper(scraperID string) *Scraper
- func (c Cache) ListScrapers(tys []ScrapeContentType) []*Scraper
- func (c *Cache) ReloadScrapers()
- func (c Cache) ScrapeFragment(ctx context.Context, id string, input Input) (ScrapedContent, error)
- func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty ScrapeContentType) (ScrapedContent, error)
- func (c Cache) ScrapeName(ctx context.Context, id, query string, ty ScrapeContentType) ([]ScrapedContent, error)
- func (c Cache) ScrapeURL(ctx context.Context, url string, ty ScrapeContentType) (ScrapedContent, error)
- type GalleryFinder
- type GlobalConfig
- type Input
- type PerformerFinder
- type QueryType
- type Repository
- type SceneFinder
- type ScrapeContentType
- type ScrapeType
- type ScrapedContent
- type ScrapedGallery
- type ScrapedGalleryInput
- type ScrapedMovieInput
- type ScrapedPerformerInput
- type ScrapedScene
- type ScrapedSceneInput
- type Scraper
- type ScraperSpec
- type Source
- type StudioFinder
- type TagFinder
Constants ¶
const FreeonesScraperID = "builtin_freeones"
FreeonesScraperID is the scraper ID for the built-in Freeones scraper
Variables ¶
var ( // ErrMaxRedirects is returned if the max number of HTTP redirects are reached. ErrMaxRedirects = errors.New("maximum number of HTTP redirects reached") // ErrNotFound is returned when an entity isn't found ErrNotFound = errors.New("scraper not found") // ErrNotSupported is returned when a given invocation isn't supported, and there // is a guard function which should be able to guard against it. ErrNotSupported = errors.New("scraper operation not supported") )
var AllScrapeContentType = []ScrapeContentType{ ScrapeContentTypeGallery, ScrapeContentTypeMovie, ScrapeContentTypeGroup, ScrapeContentTypePerformer, ScrapeContentTypeScene, }
var AllScrapeType = []ScrapeType{ ScrapeTypeName, ScrapeTypeFragment, ScrapeTypeURL, }
var ErrScraperScript = errors.New("scraper script error")
Functions ¶
This section is empty.
Types ¶
type Cache ¶ added in v0.3.0
type Cache struct {
// contains filtered or unexported fields
}
Cache stores the database of scrapers
func NewCache ¶ added in v0.3.0
func NewCache(globalConfig GlobalConfig, repo Repository) *Cache
NewCache returns a new Cache.
Scraper configurations are loaded from yml files in the scrapers directory in the config and any subdirectories.
Does not load scrapers. Scrapers will need to be loaded explicitly using ReloadScrapers.
func (Cache) GetScraper ¶ added in v0.11.0
GetScraper returns the scraper matching the provided id.
func (Cache) ListScrapers ¶ added in v0.12.0
func (c Cache) ListScrapers(tys []ScrapeContentType) []*Scraper
ListScrapers lists scrapers matching one of the given types. Returns a list of scrapers, sorted by their name.
func (*Cache) ReloadScrapers ¶ added in v0.3.0
func (c *Cache) ReloadScrapers()
ReloadScrapers clears the scraper cache and reloads from the scraper path. If a scraper cannot be loaded, an error is logged and the scraper is skipped.
func (Cache) ScrapeFragment ¶ added in v0.12.0
ScrapeFragment uses the given fragment input to scrape
func (Cache) ScrapeID ¶ added in v0.12.0
func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty ScrapeContentType) (ScrapedContent, error)
func (Cache) ScrapeName ¶ added in v0.12.0
func (c Cache) ScrapeName(ctx context.Context, id, query string, ty ScrapeContentType) ([]ScrapedContent, error)
func (Cache) ScrapeURL ¶ added in v0.12.0
func (c Cache) ScrapeURL(ctx context.Context, url string, ty ScrapeContentType) (ScrapedContent, error)
ScrapeURL scrapes a given url for the given content. Searches the scraper cache and picks the first scraper capable of scraping the given url into the desired content. Returns the scraped content or an error if the scrape fails.
type GalleryFinder ¶ added in v0.17.0
type GalleryFinder interface { models.GalleryGetter models.FileLoader models.URLLoader }
type GlobalConfig ¶ added in v0.3.0
type GlobalConfig interface { GetScraperUserAgent() string GetScrapersPath() string GetScraperCDPPath() string GetScraperCertCheck() bool GetPythonPath() string GetProxy() string }
GlobalConfig contains the global scraper options.
type Input ¶ added in v0.12.0
type Input struct { Performer *ScrapedPerformerInput Scene *ScrapedSceneInput Gallery *ScrapedGalleryInput }
Input coalesces inputs of different types into a single structure. The system expects one of these to be set, and the remaining to be set to nil.
type PerformerFinder ¶ added in v0.17.0
type PerformerFinder interface { models.PerformerAutoTagQueryer match.PerformerFinder }
type QueryType ¶ added in v0.12.0
type QueryType int
simple type definitions that can help customize actions per query
type Repository ¶ added in v0.17.0
type Repository struct { TxnManager models.TxnManager SceneFinder SceneFinder GalleryFinder GalleryFinder TagFinder TagFinder PerformerFinder PerformerFinder GroupFinder match.GroupNamesFinder StudioFinder StudioFinder }
func NewRepository ¶ added in v0.24.0
func NewRepository(repo models.Repository) Repository
func (*Repository) WithReadTxn ¶ added in v0.24.0
type SceneFinder ¶ added in v0.22.0
type SceneFinder interface { models.SceneGetter models.URLLoader models.VideoFileLoader }
type ScrapeContentType ¶ added in v0.17.0
type ScrapeContentType string
Type of the content a scraper generates
const ( ScrapeContentTypeGallery ScrapeContentType = "GALLERY" ScrapeContentTypeMovie ScrapeContentType = "MOVIE" ScrapeContentTypeGroup ScrapeContentType = "GROUP" ScrapeContentTypePerformer ScrapeContentType = "PERFORMER" ScrapeContentTypeScene ScrapeContentType = "SCENE" )
func (ScrapeContentType) IsValid ¶ added in v0.17.0
func (e ScrapeContentType) IsValid() bool
func (ScrapeContentType) MarshalGQL ¶ added in v0.17.0
func (e ScrapeContentType) MarshalGQL(w io.Writer)
func (ScrapeContentType) String ¶ added in v0.17.0
func (e ScrapeContentType) String() string
func (*ScrapeContentType) UnmarshalGQL ¶ added in v0.17.0
func (e *ScrapeContentType) UnmarshalGQL(v interface{}) error
type ScrapeType ¶ added in v0.17.0
type ScrapeType string
const ( // From text query ScrapeTypeName ScrapeType = "NAME" // From existing object ScrapeTypeFragment ScrapeType = "FRAGMENT" // From URL ScrapeTypeURL ScrapeType = "URL" )
func (ScrapeType) IsValid ¶ added in v0.17.0
func (e ScrapeType) IsValid() bool
func (ScrapeType) MarshalGQL ¶ added in v0.17.0
func (e ScrapeType) MarshalGQL(w io.Writer)
func (ScrapeType) String ¶ added in v0.17.0
func (e ScrapeType) String() string
func (*ScrapeType) UnmarshalGQL ¶ added in v0.17.0
func (e *ScrapeType) UnmarshalGQL(v interface{}) error
type ScrapedContent ¶ added in v0.17.0
type ScrapedContent interface {
IsScrapedContent()
}
Scraped Content is the forming union over the different scrapers
type ScrapedGallery ¶ added in v0.17.0
type ScrapedGallery struct { Title *string `json:"title"` Code *string `json:"code"` Details *string `json:"details"` Photographer *string `json:"photographer"` URLs []string `json:"urls"` Date *string `json:"date"` Studio *models.ScrapedStudio `json:"studio"` Tags []*models.ScrapedTag `json:"tags"` Performers []*models.ScrapedPerformer `json:"performers"` // deprecated URL *string `json:"url"` }
func (ScrapedGallery) IsScrapedContent ¶ added in v0.17.0
func (ScrapedGallery) IsScrapedContent()
type ScrapedGalleryInput ¶ added in v0.17.0
type ScrapedMovieInput ¶ added in v0.17.0
type ScrapedMovieInput struct { Name *string `json:"name"` Aliases *string `json:"aliases"` Duration *string `json:"duration"` Date *string `json:"date"` Rating *string `json:"rating"` Director *string `json:"director"` URLs []string `json:"urls"` Synopsis *string `json:"synopsis"` // deprecated URL *string `json:"url"` }
type ScrapedPerformerInput ¶ added in v0.17.0
type ScrapedPerformerInput struct { // Set if performer matched StoredID *string `json:"stored_id"` Name *string `json:"name"` Disambiguation *string `json:"disambiguation"` Gender *string `json:"gender"` URLs []string `json:"urls"` URL *string `json:"url"` // deprecated Twitter *string `json:"twitter"` // deprecated Instagram *string `json:"instagram"` // deprecated Birthdate *string `json:"birthdate"` Ethnicity *string `json:"ethnicity"` Country *string `json:"country"` EyeColor *string `json:"eye_color"` Height *string `json:"height"` Measurements *string `json:"measurements"` FakeTits *string `json:"fake_tits"` PenisLength *string `json:"penis_length"` Circumcised *string `json:"circumcised"` CareerLength *string `json:"career_length"` Tattoos *string `json:"tattoos"` Piercings *string `json:"piercings"` Aliases *string `json:"aliases"` Details *string `json:"details"` DeathDate *string `json:"death_date"` HairColor *string `json:"hair_color"` Weight *string `json:"weight"` RemoteSiteID *string `json:"remote_site_id"` }
type ScrapedScene ¶ added in v0.17.0
type ScrapedScene struct { Title *string `json:"title"` Code *string `json:"code"` Details *string `json:"details"` Director *string `json:"director"` URL *string `json:"url"` URLs []string `json:"urls"` Date *string `json:"date"` // This should be a base64 encoded data URL Image *string `json:"image"` File *models.SceneFileType `json:"file"` Studio *models.ScrapedStudio `json:"studio"` Tags []*models.ScrapedTag `json:"tags"` Performers []*models.ScrapedPerformer `json:"performers"` Groups []*models.ScrapedGroup `json:"groups"` Movies []*models.ScrapedMovie `json:"movies"` RemoteSiteID *string `json:"remote_site_id"` Duration *int `json:"duration"` Fingerprints []*models.StashBoxFingerprint `json:"fingerprints"` }
func (ScrapedScene) IsScrapedContent ¶ added in v0.17.0
func (ScrapedScene) IsScrapedContent()
type ScrapedSceneInput ¶ added in v0.17.0
type Scraper ¶ added in v0.17.0
type Scraper struct { ID string `json:"id"` Name string `json:"name"` // Details for performer scraper Performer *ScraperSpec `json:"performer"` // Details for scene scraper Scene *ScraperSpec `json:"scene"` // Details for gallery scraper Gallery *ScraperSpec `json:"gallery"` // Details for movie scraper Group *ScraperSpec `json:"group"` // Details for movie scraper Movie *ScraperSpec `json:"movie"` }
type ScraperSpec ¶ added in v0.17.0
type ScraperSpec struct { // URLs matching these can be scraped with Urls []string `json:"urls"` SupportedScrapes []ScrapeType `json:"supported_scrapes"` }
type Source ¶ added in v0.17.0
type Source struct { // Index of the configured stash-box instance to use. Should be unset if scraper_id is set StashBoxIndex *int `json:"stash_box_index"` // Stash-box endpoint StashBoxEndpoint *string `json:"stash_box_endpoint"` // Scraper ID to scrape with. Should be unset if stash_box_index is set ScraperID *string `json:"scraper_id"` }