scraper

package module
v0.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 12, 2024 License: GPL-3.0 Imports: 14 Imported by: 0

README

youtube_scraper

Youtube metadata scraping library for golang

Features

  • Search videos
  • View playlist videos
  • Fetch basic info about channel and videos from videos and livestreams tabs
  • Fetch videos from homepage
  • Fetch video metadata
  • Fetch video sidebar recommendations (video, playlist, radio)
  • Fetch comments and its reply threads
  • Fetch non DRM video media URLs
  • Export pagination and scraper state to continue later

Example

For more examples please look into the "examples" folder

package main

import (
	"encoding/json"
	"github.com/0x090909/youtube_scraper"
	"log"
)

func main() {
	c := scraper.NewChannelScraper("@TomScottGo")

	var printedChannel bool
	for {
		videos, err := c.NextVideosPage()
		if err != nil {
			log.Fatal(err)
		} else if len(videos) == 0 {
			break
		}

		if !printedChannel {
			if available, channel := c.GetChannelInfo(); available {
				bs, err := json.MarshalIndent(channel, "", "	")
				if err != nil {
					log.Fatal(err)
				}
				log.Println(string(bs))
			}

			printedChannel = true
		}

		for _, video := range videos {
			log.Println(video.VideoID, video.Title, video.Views)
		}
	}
}

Documentation

Index

Constants

View Source
const ChannelBadgeVerified = "Verified"
View Source
const ChannelBadgeVerifiedArtistChannel = "Official Artist Channel"
View Source
const DebugDiscriminatorLength = 20

Configure the maximum length that the discriminator should display

View Source
const VideoBadge4k = "4K"
View Source
const VideoBadgeCC = "CC"
View Source
const VideoBadgeNew = "New"

Variables

View Source
var Debug = false

Enables debug json outputs

Functions

func DebugFileOutput

func DebugFileOutput(body []byte, format string, discriminator ...string)

Helper function for debug mode that outputs data to a file with specified name

func ExtractInitialData

func ExtractInitialData(url string) (rawJson string, err error)

Helper function that scraper json from html page

func ExtractInitialDataBytes

func ExtractInitialDataBytes(body []byte) (rawJson string, err error)

func FixUnit

func FixUnit(s string) string

humanize library doesnt seem to understand that "10K" and "10k" are the same thing

func GetVideoThumbnail

func GetVideoThumbnail(id string) string

func ParseViews

func ParseViews(rawViews string) (views float64, err error)

Parses views from youtube outputs

Types

type Channel

type Channel struct {
	Subscribers      int
	IsVerified       bool
	IsVerifiedArtist bool
	ChannelID        string
	NewChannelID     string
	Username         string
	Description      string
	VideosAmount     int

	Avatars []YoutubeImage
	Banners []YoutubeImage
}

type ChannelScraper

type ChannelScraper struct {
	// contains filtered or unexported fields
}

func ChannelScraperFromExport

func ChannelScraperFromExport(export ChannelScraperExport) (c ChannelScraper, err error)

func NewChannelScraper

func NewChannelScraper(id string) (c ChannelScraper, err error)

NewChannelScraper accepts normal id or @username

func (*ChannelScraper) Export

func (*ChannelScraper) GetChannelInfo

func (c *ChannelScraper) GetChannelInfo() (available bool, channel Channel)

GetChannelInfo will output the internal channel struct which will become available after the first call to NextVideosPage() or NextStreamsPage()

func (*ChannelScraper) NextShortsPage

func (c *ChannelScraper) NextShortsPage() (videos []Video, err error)

NextVideosPage scrapes pages of the `/videos` endpoint on channel page

func (*ChannelScraper) NextStreamsPage

func (c *ChannelScraper) NextStreamsPage() (videos []Video, err error)

NextStreamsPage scrapes pages of the `/streams` endpoint on channel page

func (*ChannelScraper) NextVideosPage

func (c *ChannelScraper) NextVideosPage() (videos []Video, err error)

NextVideosPage scrapes pages of the `/videos` endpoint on channel page

type ChannelScraperExport

type ChannelScraperExport struct {
	StreamsUrl string
	VideosUrl  string
	ShortsUrl  string

	VideosInitialComplete bool
	VideosContinueToken   string

	StreamsInitialComplete bool
	StreamsContinueToken   string
}

type ContinueInput

type ContinueInput struct {
	Context struct {
		Client struct {
			Hl            string `json:"hl"`           // language you want the data in, for english "en"
			Gl            string `json:"gl,omitempty"` // data region
			VisitorData   string `json:"visitorData,omitempty"`
			ClientName    string `json:"clientName"`
			ClientVersion string `json:"clientVersion"`
		} `json:"client"`
	} `json:"context"`
	VideoID             string `json:"videoId,omitempty"`
	Continuation        string `json:"continuation"`
	BrowseId            string `json:"browseId,omitempty"`
	InlineSettingStatus string `json:"inlineSettingStatus,omitempty"`
}

Youtube api input json

func (ContinueInput) Construct

func (ci ContinueInput) Construct() ([]byte, error)

func (ContinueInput) FillGenericInfo

func (ci ContinueInput) FillGenericInfo() ContinueInput

type HomeVideosExport

type HomeVideosExport struct {
	ContinueToken   string
	VisitorData     string
	InitialComplete bool
}

type HomeVideosScraper

type HomeVideosScraper struct {
	// contains filtered or unexported fields
}

func HomeVideosScraperFromExport

func HomeVideosScraperFromExport(export HomeVideosExport) (h HomeVideosScraper, err error)

func NewHomeVideosScraper

func NewHomeVideosScraper() (h HomeVideosScraper)

func (*HomeVideosScraper) Export

func (h *HomeVideosScraper) Export() HomeVideosExport

func (*HomeVideosScraper) NextPage

func (h *HomeVideosScraper) NextPage() (videos []Video, err error)

type PlaylistContinueOutput

type PlaylistContinueOutput struct {
	ContinuationToken string                  `` /* 158-byte string literal not displayed */
	Videos            []playlistVideoRenderer `rjson:"onResponseReceivedActions[0]appendContinuationItemsAction.continuationItems[].playlistVideoRenderer"`
}

type PlaylistInfo

type PlaylistInfo struct {
	Title        string
	Description  string
	ChannelName  string
	ChannelID    string
	NewChannelID string
	VideosCount  int
	Views        int
	UpdateStatus string // example: "Updated today"

	ContinuationToken string
	Videos            []PlaylistVideo
}

type PlaylistScraper

type PlaylistScraper struct {
	// contains filtered or unexported fields
}

func NewPlaylistScraper

func NewPlaylistScraper(playlistId string) (p PlaylistScraper, err error)

func (*PlaylistScraper) GetPlaylistInfo

func (p *PlaylistScraper) GetPlaylistInfo() (info PlaylistInfo, err error)

GetPlaylistInfo returns the initial info from the page

func (*PlaylistScraper) NextPage

func (p *PlaylistScraper) NextPage() (videos []PlaylistVideo, err error)

type PlaylistVideo

type PlaylistVideo struct {
	VideoID              string
	Title                string
	PlaylistPosition     int
	ChannelName          string
	ChannelID            string
	VideoLengthInSeconds int
	Views                int
	Date                 string // example: "8 years ago"
	Thumbnails           []YoutubeImage
}

youtube json type playlistVideoRenderer

type Video

type Video struct {
	VideoID string
	Title   string

	// Will be empty if its livestream
	// example value 7:03
	Length string `json:"Length,omitempty"`

	Views   int `json:"Views,omitempty"`   // Will be empty if its livestream
	Viewers int `json:"Viewers,omitempty"` // Empty if it's not a livestream

	/*
		Will be empty if its livestream

		Years
			- 2-11 years ago
			- 1 year ago

		Months
			- 2-11 months ago
			- 1 month ago

		Weeks
			- 2-4 weeks ago

		Days
			- 2-13 days ago
			- 1 day ago

		Hours
			- 2-23 hours ago
			- 1 hour ago

		Minutes
			- 2-59 minutes ago
			- 1 minute ago

		Seconds
			- 2-59 seconds ago
			- 1 second ago
	*/
	Date string `json:"Date,omitempty"`

	Thumbnails []YoutubeImage

	Username      string
	ChannelID     string
	NewChannelID  string // @username
	ChannelAvatar string

	IsLive                 bool
	WasLive                bool
	AuthorIsVerified       bool
	AuthorIsVerifiedArtist bool
}

type YoutubeImage

type YoutubeImage struct {
	Url    string `rjson:"url"`
	Width  int    `rjson:"width"`
	Height int    `rjson:"height"`
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL