ytt

package
v0.0.0-...-8219c2e Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 20, 2024 License: MIT Imports: 16 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var Command = &cobra.Command{
	Use:   "ytt <youtube_url>",
	Short: "Generate cleaned up transcript from YouTube autogenerated captions using an LLM",
	Args:  cobra.ExactArgs(1),
	PreRunE: func(cmd *cobra.Command, args []string) error {
		raw, _ := cmd.Flags().GetBool("raw")
		if raw {
			return nil
		}

		model, _ := cmd.Flags().GetString("model")
		switch model {
		case string(ChatGPT4o), string(ChatGpt4oMini), string(Claude3Dot5Sonnet20240620), string(GroqLlama3170B):
			return nil
		default:
			return fmt.Errorf("invalid model: must be one of %s, %s or %s", ChatGpt4oMini, ChatGPT4o, Claude3Dot5Sonnet20240620)
		}
	},
	RunE: func(cmd *cobra.Command, args []string) error {
		raw, _ := cmd.Flags().GetBool("raw")

		folder, _ := cmd.Flags().GetString("path")
		suffix, _ := cmd.Flags().GetString("suffix")
		if folder != "" {
			fi, err := os.Stat(folder)
			if err != nil || !fi.IsDir() {
				return fmt.Errorf("path not found: %s", folder)
			}
		}
		timestamp := time.Now().Format("2006-01-02-150405")
		var filenameSuffix string
		if suffix == "" {
			filenameSuffix = timestamp
		} else {
			filenameSuffix = fmt.Sprintf("%s_%s", timestamp, suffix)
		}

		videoID, err := ytt.ExtractVideoID(args[0])
		if err != nil {
			return fmt.Errorf("failed to extract video ID: %w", err)
		}

		transcriptList, err := ytt.ListTranscripts(videoID)
		if err != nil {
			return fmt.Errorf("failed to list transcripts: %w", err)
		}

		transcript, err := transcriptList.FindTranscript("en")
		if err != nil {
			return fmt.Errorf("failed to find English transcript: %w", err)
		}

		entries, err := transcript.Fetch()
		if err != nil {
			return fmt.Errorf("failed to fetch transcript: %w", err)
		}

		var transcriptTxt strings.Builder
		for _, entry := range entries {
			transcriptTxt.WriteString(" " + entry.Text)
		}

		rawTranscriptFilename := path.Join(folder, fmt.Sprintf("raw_transcript_%s.txt", filenameSuffix))
		if err = os.WriteFile(rawTranscriptFilename, []byte(transcriptTxt.String()), 0644); err != nil {
			return fmt.Errorf("failed to write raw transcript: %w", err)
		}
		fmt.Printf("wrote raw autogenerated captions to %s\n", rawTranscriptFilename)

		if raw {
			return nil
		}

		m, _ := cmd.Flags().GetString("model")
		model := Model(m)
		tc, err := newTranscriptCleaner(model)
		if err != nil {
			return fmt.Errorf("failed to initialize model %s: %v", model, err)
		}

		cleanedTranscriptTxt, err := tc.cleanupTranscript(transcriptTxt.String())
		if err != nil {
			return fmt.Errorf("failed to transcribe: %w", err)
		}

		cleanedTranscriptFilename := path.Join(folder, fmt.Sprintf("cleaned_transcript_%s.txt", filenameSuffix))
		if err = os.WriteFile(cleanedTranscriptFilename, []byte(cleanedTranscriptTxt), 0644); err != nil {
			return fmt.Errorf("failed to write cleaned transcript: %w", err)
		}
		fmt.Printf("wrote cleaned up transcripts to %s\n", cleanedTranscriptFilename)
		return nil
	},
}

Functions

This section is empty.

Types

type Model

type Model string
const (
	ChatGPT4o                 Model = "gpt-4o"
	ChatGpt4oMini             Model = "gpt-4o-mini"
	Claude3Dot5Sonnet20240620 Model = "claude-3-5-sonnet-20240620"
	GroqLlama3170B            Model = "llama-3.1-70b-versatile"
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL