client

package
v1.3.9 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 27, 2017 License: Apache-2.0 Imports: 16 Imported by: 0

Documentation

Overview

Example (Pfs)
package main

import (
	"bytes"
	"strings"

	"github.com/pachyderm/pachyderm/src/client"
)

func main() {
	c, err := client.NewFromAddress("0.0.0.0:30650")
	if err != nil {
		return // handle error
	}
	// Create a repo called "repo"
	if err := c.CreateRepo("repo"); err != nil {
		return // handle error
	}
	// Start a commit in our new repo on the "master" branch
	commit1, err := c.StartCommit("repo", "master")
	if err != nil {
		return // handle error
	}
	// Put a file called "file" in the newly created commit with the content "foo\n".
	if _, err := c.PutFile("repo", commit1.ID, "file", strings.NewReader("foo\n")); err != nil {
		return // handle error
	}
	// Finish the commit.
	if err := c.FinishCommit("repo", commit1.ID); err != nil {
		return //handle error
	}
	// Read what we wrote.
	var buffer bytes.Buffer
	if err := c.GetFile("repo", commit1.ID, "file", 0, 0, "", false, nil, &buffer); err != nil {
		return //handle error
	}
	// buffer now contains "foo\n"

	// Start another commit with the previous commit as the parent.
	commit2, err := c.StartCommit("repo", commit1.ID)
	if err != nil {
		return //handle error
	}
	// Extend "file" in the newly created commit with the content "bar\n".
	if _, err := c.PutFile("repo", commit2.ID, "file", strings.NewReader("bar\n")); err != nil {
		return // handle error
	}
	// Finish the commit.
	if err := c.FinishCommit("repo", commit2.ID); err != nil {
		return //handle error
	}
	// Read what we wrote.
	buffer.Reset()
	if err := c.GetFile("repo", commit2.ID, "file", 0, 0, "", false, nil, &buffer); err != nil {
		return //handle error
	}
	// buffer now contains "foo\nbar\n"

	// We can still read the old version of the file though:
	buffer.Reset()
	if err := c.GetFile("repo", commit1.ID, "file", 0, 0, "", false, nil, &buffer); err != nil {
		return //handle error
	}
	// buffer now contains "foo\n"

	// We can also see the Diff between the most recent commit and the first one:
	buffer.Reset()
	if err := c.GetFile("repo", commit2.ID, "file", 0, 0, commit1.ID, false, nil, &buffer); err != nil {
		return //handle error
	}
}
Output:

Example (Pps)
package main

import (
	"bytes"

	"github.com/pachyderm/pachyderm/src/client"
	"github.com/pachyderm/pachyderm/src/client/pps"
)

func main() {
	c, err := client.NewFromAddress("0.0.0.0:30650")
	if err != nil {
		return // handle error
	}

	// we assume there's already a repo called "repo"
	// and that it already has some data in it
	// take a look at src/client/pfs_test.go for an example of how to get there.

	// Create a map pipeline
	if err := c.CreatePipeline(
		"map",                  // the name of the pipeline
		"pachyderm/test_image", // your docker image
		[]string{"map"},        // the command run in your docker image
		nil,                    // no stdin
		nil,                    // let pachyderm decide the parallelism
		[]*pps.PipelineInput{
			// map over "repo"
			client.NewPipelineInput("repo", client.MapMethod),
		},
		false, // not an update
	); err != nil {
		return // handle error
	}

	if err := c.CreatePipeline(
		"reduce",               // the name of the pipeline
		"pachyderm/test_image", // your docker image
		[]string{"reduce"},     // the command run in your docker image
		nil,                    // no stdin
		nil,                    // let pachyderm decide the parallelism
		[]*pps.PipelineInput{
			// reduce over "map"
			client.NewPipelineInput("map", client.ReduceMethod),
		},
		false, // not an update
	); err != nil {
		return // handle error
	}

	commits, err := c.ListCommitByRepo( // List commits that are...
		[]string{"reduce"},        // from the "reduce" repo (which the "reduce" pipeline outputs)
		nil,                       // no provenance
		client.CommitTypeRead,     // are readable
		client.CommitStatusNormal, // ignore cancelled commits
		true,                      // block until commits are available
	)
	if err != nil {
		return // handle error
	}
	for _, commitInfo := range commits {
		// Read output from the pipeline
		var buffer bytes.Buffer
		if err := c.GetFile("reduce", commitInfo.Commit.ID, "file", 0, 0, "", false, nil, &buffer); err != nil {
			return //handle error
		}
	}
}
Output:

Index

Examples

Constants

View Source
const (
	CommitTypeNone  = pfs.CommitType_COMMIT_TYPE_NONE
	CommitTypeRead  = pfs.CommitType_COMMIT_TYPE_READ
	CommitTypeWrite = pfs.CommitType_COMMIT_TYPE_WRITE
)

CommitTypes alias pfs.CommitType_*

View Source
const (
	CommitStatusNormal    = pfs.CommitStatus_NORMAL
	CommitStatusArchived  = pfs.CommitStatus_ARCHIVED
	CommitStatusCancelled = pfs.CommitStatus_CANCELLED
	CommitStatusAll       = pfs.CommitStatus_ALL
)

CommitStatus alias pfs.CommitStatus_*

View Source
const (
	// PPSPodNameEnv is the environment variable that a pod can use to
	// see its own name.  The pod name is made available through the Kubernetes
	// downward API.
	PPSPodNameEnv = "PPS_POD_NAME"
	// PPSLeasePeriod is the amount of time for a lease on a chunk to expire.
	// That is, a pod needs to send ContinueJob to PPS at lease once every this
	// amount of time in order to keep owning a chunk.  In reality, pods send
	// ContinueJob more often than that because they need to account for network
	// latency.
	PPSLeasePeriod = 30 * time.Second
)
View Source
const DefaultMaxConcurrentStreams uint = 100

DefaultMaxConcurrentStreams defines the max number of Putfiles or Getfiles happening simultaneously

Variables

View Source
var (
	// MapMethod defines a pps.Method for mapper pipelines.
	MapMethod = &pps.Method{
		Partition:   pps.Partition_BLOCK,
		Incremental: pps.Incremental_DIFF,
	}
	// ReduceMethod defines a pps.Method for non-incremental reducer pipelines.
	ReduceMethod = &pps.Method{
		Partition:   pps.Partition_FILE,
		Incremental: pps.Incremental_NONE,
	}
	// IncrementalReduceMethod defines a pps.Method for incremental reducer pipelines.
	IncrementalReduceMethod = &pps.Method{
		Partition:   pps.Partition_FILE,
		Incremental: pps.Incremental_DIFF,
	}
	// GlobalMethod defines a pps.Method for non-incremental, non-partitioned pipelines.
	GlobalMethod = &pps.Method{
		Partition:   pps.Partition_REPO,
		Incremental: pps.Incremental_NONE,
	}
	// DefaultMethod defines the default pps.Method for a pipeline.
	DefaultMethod = MapMethod
	// MethodAliasMap maps a string to a pps.Method for JSON decoding.
	MethodAliasMap = map[string]*pps.Method{
		"map":                MapMethod,
		"reduce":             ReduceMethod,
		"incremental_reduce": IncrementalReduceMethod,
		"global":             GlobalMethod,
	}
	// ReservedRepoNames defines a set of reserved repo names for internal use.
	ReservedRepoNames = map[string]bool{
		"out":  true,
		"prev": true,
	}
)
View Source
var (
	// MaxMsgSize is used to define the GRPC frame size
	MaxMsgSize = 20 * 1024 * 1024
)

Functions

func NewBlock

func NewBlock(hash string) *pfs.Block

NewBlock creates a pfs.Block.

func NewCommit

func NewCommit(repoName string, commitID string) *pfs.Commit

NewCommit creates a pfs.Commit.

func NewFile

func NewFile(repoName string, commitID string, path string) *pfs.File

NewFile creates a pfs.File.

func NewJob

func NewJob(jobID string) *pps.Job

NewJob creates a pps.Job.

func NewJobInput

func NewJobInput(repoName string, commitID string, method *pps.Method) *pps.JobInput

NewJobInput creates a pps.JobInput.

func NewPipeline

func NewPipeline(pipelineName string) *pps.Pipeline

NewPipeline creates a pps.Pipeline.

func NewPipelineInput

func NewPipelineInput(repoName string, method *pps.Method) *pps.PipelineInput

NewPipelineInput creates a new pps.PipelineInput

func NewRepo

func NewRepo(repoName string) *pfs.Repo

NewRepo creates a pfs.Repo.

Types

type APIClient

type APIClient struct {
	PfsAPIClient
	PpsAPIClient
	BlockAPIClient
	ObjectAPIClient
	// contains filtered or unexported fields
}

An APIClient is a wrapper around pfs, pps and block APIClients.

func NewFromAddress

func NewFromAddress(addr string) (*APIClient, error)

NewFromAddress constructs a new APIClient for the server at addr.

func NewFromAddressWithConcurrency added in v1.3.6

func NewFromAddressWithConcurrency(addr string, maxConcurrentStreams uint) (*APIClient, error)

NewFromAddressWithConcurrency constructs a new APIClient and sets the max concurrency of streaming requests (GetFile / PutFile)

func NewInCluster

func NewInCluster() (*APIClient, error)

NewInCluster constructs a new APIClient using env vars that Kubernetes creates. This should be used to access Pachyderm from within a Kubernetes cluster with Pachyderm running on it.

func NewMetricsClientFromAddress added in v1.2.4

func NewMetricsClientFromAddress(addr string, metrics bool, prefix string) (*APIClient, error)

NewMetricsClientFromAddress Creates a client that will report a user's Metrics

func NewMetricsClientFromAddressWithConcurrency added in v1.3.6

func NewMetricsClientFromAddressWithConcurrency(addr string, metrics bool, prefix string, maxConcurrentStreams uint) (*APIClient, error)

NewMetricsClientFromAddressWithConcurrency Creates a client that will report a user's Metrics, and sets the max concurrency of streaming requests (GetFile / PutFile)

func (APIClient) ArchiveAll added in v1.2.0

func (c APIClient) ArchiveAll() error

ArchiveAll archives all commits in all repos.

func (APIClient) ArchiveCommit added in v1.2.0

func (c APIClient) ArchiveCommit(repoName string, commitID string) error

ArchiveCommit marks a commit as archived. Archived commits are not listed in ListCommit unless commit status is set to Archived or All. Archived commits are not considered by FlushCommit either.

func (APIClient) CancelCommit

func (c APIClient) CancelCommit(repoName string, commitID string) error

CancelCommit ends the process of committing data to a repo. It differs from FinishCommit in that the Commit will not be used as a source for downstream pipelines. CancelCommit is used primarily by PPS for the output commits of errant jobs.

func (*APIClient) Close added in v1.2.0

func (c *APIClient) Close() error

Close the connection to gRPC

func (APIClient) Compact added in v1.3.9

func (c APIClient) Compact() error

Compact forces compaction of objects.

func (APIClient) CreateJob

func (c APIClient) CreateJob(
	image string,
	cmd []string,
	stdin []string,
	parallelismSpec *pps.ParallelismSpec,
	inputs []*pps.JobInput,
	parentJobID string,
	internalPort int32,
	externalPort int32,
) (*pps.Job, error)

CreateJob creates and runs a job in PPS. image is the Docker image to run the job in. cmd is the command passed to the Docker run invocation. NOTE as with Docker cmd is not run inside a shell that means that things like wildcard globbing (*), pipes (|) and file redirects (> and >>) will not work. To get that behavior you should have your command be a shell of your choice and pass a shell script to stdin. stdin is a slice of lines that are sent to your command on stdin. Lines need not end in newline characters. parallelism is how many copies of your container should run in parallel. You may pass 0 for parallelism in which case PPS will set the parallelism based on availabe resources. inputs specifies a set of Commits that will be visible to the job during runtime. parentJobID specifies the a job to use as a parent, it may be left empty in which case there is no parent job. If not left empty your job will use the parent Job's output commit as the parent of its output commit.

func (APIClient) CreatePipeline

func (c APIClient) CreatePipeline(
	name string,
	image string,
	cmd []string,
	stdin []string,
	parallelismSpec *pps.ParallelismSpec,
	inputs []*pps.PipelineInput,
	update bool,
) error

CreatePipeline creates a new pipeline, pipelines are the main computation object in PPS they create a flow of data from a set of input Repos to an output Repo (which has the same name as the pipeline). Whenever new data is committed to one of the input repos the pipelines will create jobs to bring the output Repo up to data. image is the Docker image to run the jobs in. cmd is the command passed to the Docker run invocation. NOTE as with Docker cmd is not run inside a shell that means that things like wildcard globbing (*), pipes (|) and file redirects (> and >>) will not work. To get that behavior you should have your command be a shell of your choice and pass a shell script to stdin. stdin is a slice of lines that are sent to your command on stdin. Lines need not end in newline characters. parallelism is how many copies of your container should run in parallel. You may pass 0 for parallelism in which case PPS will set the parallelism based on availabe resources. inputs specifies a set of Repos that will be visible to the jobs during runtime. commits to these repos will cause the pipeline to create new jobs to process them. update indicates that you want to update an existing pipeline

func (APIClient) CreateRepo

func (c APIClient) CreateRepo(repoName string) error

CreateRepo creates a new Repo object in pfs with the given name. Repos are the top level data object in pfs and should be used to store data of a similar type. For example rather than having a single Repo for an entire project you might have seperate Repos for logs, metrics, database dumps etc.

func (APIClient) DeleteAll

func (c APIClient) DeleteAll() error

DeleteAll deletes everything in the cluster. Use with caution, there is no undo.

func (APIClient) DeleteBlock

func (c APIClient) DeleteBlock(block *pfs.Block) error

DeleteBlock deletes a block from the block store. NOTE: this is lower level function that's used internally and might not be useful to users.

func (APIClient) DeleteCommit

func (c APIClient) DeleteCommit(repoName string, commitID string) error

DeleteCommit deletes a commit. Note it is currently not implemented.

func (APIClient) DeleteFile

func (c APIClient) DeleteFile(repoName string, commitID string, path string) error

DeleteFile deletes a file from a Commit. DeleteFile leaves a tombstone in the Commit, assuming the file isn't written to later attempting to get the file from the finished commit will result in not found error. The file will of course remain intact in the Commit's parent.

func (APIClient) DeleteJob added in v1.3.0

func (c APIClient) DeleteJob(jobID string) error

DeleteJob deletes a job along with its output Repo

func (APIClient) DeletePipeline

func (c APIClient) DeletePipeline(name string) error

DeletePipeline deletes a pipeline along with its output Repo.

func (APIClient) DeleteRepo

func (c APIClient) DeleteRepo(repoName string, force bool) error

DeleteRepo deletes a repo and reclaims the storage space it was using. Note that as of 1.0 we do not reclaim the blocks that the Repo was referencing, this is because they may also be referenced by other Repos and deleting them would make those Repos inaccessible. This will be resolved in later versions. If "force" is set to true, the repo will be removed regardless of errors. This argument should be used with care.

func (APIClient) FinishCommit

func (c APIClient) FinishCommit(repoName string, commitID string) error

FinishCommit ends the process of committing data to a Repo and persists the Commit. Once a Commit is finished the data becomes immutable and future attempts to write to it with PutFile will error.

func (APIClient) FlushCommit

func (c APIClient) FlushCommit(commits []*pfs.Commit, toRepos []*pfs.Repo) ([]*pfs.CommitInfo, error)

FlushCommit blocks until all of the commits which have a set of commits as provenance have finished. For commits to be considered they must have all of the specified commits as provenance. This in effect waits for all of the jobs that are triggered by a set of commits to complete. It returns an error if any of the commits it's waiting on are cancelled due to one of the jobs encountering an error during runtime. If toRepos is not nil then only the commits up to and including those repos will be considered, otherwise all repos are considered. Note that it's never necessary to call FlushCommit to run jobs, they'll run no matter what, FlushCommit just allows you to wait for them to complete and see their output once they do.

func (APIClient) ForkCommit added in v1.2.0

func (c APIClient) ForkCommit(repoName string, parentCommit string, branch string) (*pfs.Commit, error)

ForkCommit is the same as StartCommit except that the commit is created on a new branch.

func (APIClient) GetBlock

func (c APIClient) GetBlock(hash string, offset uint64, size uint64) (io.Reader, error)

GetBlock returns the content of a block using it's hash. offset specifies a number of bytes that should be skipped in the beginning of the block. size limits the total amount of data returned, note you will get fewer bytes than size if you pass a value larger than the size of the block. If size is set to 0 then all of the data will be returned. NOTE: this is lower level function that's used internally and might not be useful to users.

func (APIClient) GetFile

func (c APIClient) GetFile(repoName string, commitID string, path string, offset int64,
	size int64, fromCommitID string, fullFile bool, shard *pfs.Shard, writer io.Writer) error

GetFile returns the contents of a file at a specific Commit. offset specifies a number of bytes that should be skipped in the beginning of the file. size limits the total amount of data returned, note you will get fewer bytes than size if you pass a value larger than the size of the file. If size is set to 0 then all of the data will be returned. fromCommitID lets you get only the data which was added after this Commit. shard allows you to downsample the data, returning only a subset of the blocks in the file. shard may be left nil in which case the entire file will be returned

func (APIClient) GetLogs

func (c APIClient) GetLogs(
	jobID string,
	writer io.Writer,
) error

GetLogs gets logs from a job (logs includes stdout and stderr).

func (APIClient) GetObject added in v1.3.9

func (c APIClient) GetObject(hash string, writer io.Writer) error

GetObject gets an object out of the object store by hash.

func (APIClient) GetTag added in v1.3.9

func (c APIClient) GetTag(tag string, writer io.Writer) error

GetTag gets an object out of the object store by tag.

func (APIClient) InspectBlock

func (c APIClient) InspectBlock(hash string) (*pfs.BlockInfo, error)

InspectBlock returns info about a specific Block.

func (APIClient) InspectCommit

func (c APIClient) InspectCommit(repoName string, commitID string) (*pfs.CommitInfo, error)

InspectCommit returns info about a specific Commit.

func (APIClient) InspectFile

func (c APIClient) InspectFile(repoName string, commitID string, path string,
	fromCommitID string, fullFile bool, shard *pfs.Shard) (*pfs.FileInfo, error)

InspectFile returns info about a specific file. fromCommitID lets you get only info which was added after this Commit. shard allows you to downsample the data, returning info about only a subset of the blocks in the file. shard may be left nil in which case info about the entire file will be returned

func (APIClient) InspectJob

func (c APIClient) InspectJob(jobID string, blockState bool) (*pps.JobInfo, error)

InspectJob returns info about a specific job. blockOutput will cause the call to block until the job has been assigned an output commit. blockState will cause the call to block until the job reaches a terminal state (failure or success).

func (APIClient) InspectObject added in v1.3.9

func (c APIClient) InspectObject(hash string) (*pfs.ObjectInfo, error)

InspectObject returns info about an Object.

func (APIClient) InspectPipeline

func (c APIClient) InspectPipeline(pipelineName string) (*pps.PipelineInfo, error)

InspectPipeline returns info about a specific pipeline.

func (APIClient) InspectRepo

func (c APIClient) InspectRepo(repoName string) (*pfs.RepoInfo, error)

InspectRepo returns info about a specific Repo.

func (*APIClient) KeepConnected added in v1.2.0

func (c *APIClient) KeepConnected(cancel chan bool)

KeepConnected periodically health checks the connection and attempts to reconnect if it becomes unhealthy.

func (APIClient) ListBlock

func (c APIClient) ListBlock() ([]*pfs.BlockInfo, error)

ListBlock returns info about all Blocks.

func (APIClient) ListBranch

func (c APIClient) ListBranch(repoName string, status pfs.CommitStatus) ([]string, error)

ListBranch lists the active branches on a Repo.

func (APIClient) ListCommit

func (c APIClient) ListCommit(exclude []*pfs.Commit, include []*pfs.Commit,
	provenance []*pfs.Commit, commitType pfs.CommitType, status pfs.CommitStatus,
	block bool) ([]*pfs.CommitInfo, error)

ListCommit lists commits.

exclude and include are filters that either include or exclude the ancestors of the given commits. A commit is considered the ancestor of itself. For instance, ListCommit(include("foo/2")) returns commits foo/0, foo/1, and foo/2, if they exist. In contrast, ListCommit(exclude("foo/2")) returns commits that are *not* foo/0, foo/1, or foo/2.

To get all commits on a given branch, simply include a commit whose ID is the branch name: ListCommit(include("foo"))

To get all commits in a repo, use ListCommitByRepo.

To get all commits, simply don't provide include or exclude.

provenance specifies a set of provenance commits, only commits which have ALL of the specified commits as provenance will be returned unless provenance is nil in which case it is ignored.

commitType specifies the type of commit you want returned, normally CommitTypeRead is the most useful option

status specifies the status of commit you want returned. By default, cancelled or archived commits are not returned.

block, when set to true, will cause ListCommit to block until at least 1 new CommitInfo is available. Using fromCommits and block you can get subscription semantics from ListCommit. commitStatus, controls the statuses of the returned commits. The default value `Normal` will filter out archived and cancelled commits.

func (APIClient) ListCommitByRepo added in v1.2.4

func (c APIClient) ListCommitByRepo(repoNames []string, provenance []*pfs.Commit,
	commitType pfs.CommitType, status pfs.CommitStatus, block bool) ([]*pfs.CommitInfo, error)

ListCommitByRepo lists commits in the given repos.

repoNames defines a set of Repos to consider commits from, if repoNames is left nil or empty then the result will be empty.

provenance specifies a set of provenance commits, only commits which have ALL of the specified commits as provenance will be returned unless provenance is nil in which case it is ignored.

commitType specifies the type of commit you want returned, normally CommitTypeRead is the most useful option

status specifies the status of commit you want returned. By default, cancelled or archived commits are not returned.

block, when set to true, will cause ListCommit to block until at least 1 new CommitInfo is available. Using repoNames and block you can get subscription semantics from ListCommit. commitStatus, controls the statuses of the returned commits. The default value `Normal` will filter out archived and cancelled commits.

func (APIClient) ListFile

func (c APIClient) ListFile(repoName string, commitID string, path string, fromCommitID string, fullFile bool, shard *pfs.Shard, recurse bool) ([]*pfs.FileInfo, error)

ListFile returns info about all files in a Commit. fromCommitID lets you get only info which was added after this Commit. shard allows you to downsample the data, returning info about only a subset of the blocks in the files or only a subset of files. shard may be left nil in which case info about all the files and all the blocks in those files will be returned. recurse causes ListFile to accurately report the size of data stored in directories, it makes the call more expensive

func (APIClient) ListFileFast added in v1.2.3

func (c APIClient) ListFileFast(repoName string, commitID string, path string, fromCommitID string, fullFile bool, shard *pfs.Shard) ([]*pfs.FileInfo, error)

ListFileFast is the same as ListFile except that it doesn't compute the sizes of the files. As a result it's faster than ListFile.

func (APIClient) ListJob

func (c APIClient) ListJob(pipelineName string, inputCommit []*pfs.Commit) ([]*pps.JobInfo, error)

ListJob returns info about all jobs. If pipelineName is non empty then only jobs that were started by the named pipeline will be returned If inputCommit is non-nil then only jobs which took the specific commits as inputs will be returned. The order of the inputCommits doesn't matter.

func (APIClient) ListPipeline

func (c APIClient) ListPipeline() ([]*pps.PipelineInfo, error)

ListPipeline returns info about all pipelines.

func (APIClient) ListRepo

func (c APIClient) ListRepo(provenance []string) ([]*pfs.RepoInfo, error)

ListRepo returns info about all Repos. provenance specifies a set of provenance repos, only repos which have ALL of the specified repos as provenance will be returned unless provenance is nil in which case it is ignored.

func (APIClient) MakeDirectory

func (c APIClient) MakeDirectory(repoName string, commitID string, path string) (retErr error)

MakeDirectory creates a directory in PFS. Note directories are created implicitly by PutFile, so you technically never need this function unless you want to create an empty directory.

func (APIClient) PutBlock

func (c APIClient) PutBlock(delimiter pfs.Delimiter, reader io.Reader) (blockRefs *pfs.BlockRefs, retErr error)

PutBlock takes a reader and splits the data in it into blocks. Blocks are guaranteed to be new line delimited. Blocks are content addressed and are thus identified by hashes of the content. NOTE: this is lower level function that's used internally and might not be useful to users.

func (APIClient) PutFile

func (c APIClient) PutFile(repoName string, commitID string, path string, reader io.Reader) (_ int, retErr error)

PutFile writes a file to PFS from a reader.

func (APIClient) PutFileURL added in v1.2.0

func (c APIClient) PutFileURL(repoName string, commitID string, path string, url string, recursive bool) (retErr error)

PutFileURL puts a file using the content found at a URL. The URL is sent to the server which performs the request. recursive allow for recursive scraping of some types URLs for example on s3:// urls.

func (APIClient) PutFileWithDelimiter

func (c APIClient) PutFileWithDelimiter(repoName string, commitID string, path string, delimiter pfs.Delimiter, reader io.Reader) (_ int, retErr error)

PutFileWithDelimiter writes a file to PFS from a reader delimiter is used to tell PFS how to break the input into blocks

func (APIClient) PutFileWriter

func (c APIClient) PutFileWriter(repoName string, commitID string, path string, delimiter pfs.Delimiter) (io.WriteCloser, error)

PutFileWriter writes a file to PFS. NOTE: PutFileWriter returns an io.WriteCloser you must call Close on it when you are done writing.

func (APIClient) PutObject added in v1.3.9

func (c APIClient) PutObject(r io.Reader, tags ...string) (object *pfs.Object, retErr error)

PutObject puts a value into the object store and tags it with 0 or more tags.

func (APIClient) ReadObject added in v1.3.9

func (c APIClient) ReadObject(hash string) ([]byte, error)

ReadObject gets an object by hash and returns it directly as []byte.

func (APIClient) ReadTag added in v1.3.9

func (c APIClient) ReadTag(tag string) ([]byte, error)

ReadTag gets an object by tag and returns it directly as []byte.

func (APIClient) ReplayCommit added in v1.2.0

func (c APIClient) ReplayCommit(repo string, fromCommits []string, to string) ([]*pfs.Commit, error)

ReplayCommit replays a series of commits on top of commit "to". The replayed commits are the ancestors of the commits in "fromCommits", with no duplicates in case of common ancestors.

func (APIClient) RerunPipeline added in v1.3.6

func (c APIClient) RerunPipeline(name string, include []*pfs.Commit, exclude []*pfs.Commit) error

RerunPipeline reruns a pipeline over a given set of commits. Exclude and include are filters that either include or exclude the ancestors of the given commits. A commit is considered the ancestor of itself. The behavior is the same as that of ListCommit.

func (APIClient) SquashCommit added in v1.2.0

func (c APIClient) SquashCommit(repo string, fromCommits []string, to string) error

SquashCommit copies the content of `fromCommits` to `to`, which needs to be an open commit.

func (APIClient) StartCommit

func (c APIClient) StartCommit(repoName string, parentCommit string) (*pfs.Commit, error)

StartCommit begins the process of committing data to a Repo. Once started you can write to the Commit with PutFile and when all the data has been written you must finish the Commit with FinishCommit. NOTE, data is not persisted until FinishCommit is called. parentCommit specifies the parent Commit, upon creation the new Commit will appear identical to the parent Commit, data can safely be added to the new commit without affecting the contents of the parent Commit. You may pass "" as parentCommit in which case the new Commit will have no parent and will initially appear empty. branch is a more convenient way to build linear chains of commits. When a commit is started with a non empty branch the value of branch becomes an alias for the created Commit. This enables a more intuitive access pattern. When the commit is started on a branch the previous head of the branch is used as the parent of the commit.

func (APIClient) StartPipeline added in v1.2.0

func (c APIClient) StartPipeline(name string) error

StartPipeline restarts a stopped pipeline.

func (APIClient) StopPipeline added in v1.2.0

func (c APIClient) StopPipeline(name string) error

StopPipeline prevents a pipeline from processing things, it can be restarted with StartPipeline.

func (APIClient) TagObject added in v1.3.9

func (c APIClient) TagObject(hash string, tags ...string) error

TagObject applies a tag to an existing object.

func (APIClient) Walk added in v1.3.2

func (c APIClient) Walk(repoName string, commitID string, path string, fromCommitID string, fullFile bool, shard *pfs.Shard, walkFn WalkFn) error

Walk walks the pfs filesystem rooted at path. walkFn will be called for each file found under path, this includes both regular files and directories.

type BlockAPIClient

type BlockAPIClient pfs.BlockAPIClient

BlockAPIClient is an alias for pfs.BlockAPIClient.

type ObjectAPIClient added in v1.3.9

type ObjectAPIClient pfs.ObjectAPIClient

ObjectAPIClient is an alias for pfs.ObjectAPIClient

type PfsAPIClient

type PfsAPIClient pfs.APIClient

PfsAPIClient is an alias for pfs.APIClient.

type PpsAPIClient

type PpsAPIClient pps.APIClient

PpsAPIClient is an alias for pps.APIClient.

type WalkFn added in v1.3.2

type WalkFn func(*pfs.FileInfo) error

WalkFn is the type of the function called for each file in Walk. Returning a non-nil error from WalkFn will result in Walk aborting and returning said error.

Directories

Path Synopsis
Package health is a generated protocol buffer package.
Package health is a generated protocol buffer package.
Package pfs is a generated protocol buffer package.
Package pfs is a generated protocol buffer package.
pkg
config
Package config is a generated protocol buffer package.
Package config is a generated protocol buffer package.
shard
Package shard is a generated protocol buffer package.
Package shard is a generated protocol buffer package.
Package pps is a generated protocol buffer package.
Package pps is a generated protocol buffer package.
versionpb
Package versionpb is a generated protocol buffer package.
Package versionpb is a generated protocol buffer package.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL