Documentation ¶
Overview ¶
Example (Pfs) ¶
package main import ( "bytes" "strings" "github.com/pachyderm/pachyderm/src/client" ) func main() { c, err := client.NewFromAddress("0.0.0.0:30650") if err != nil { return // handle error } // Create a repo called "repo" if err := c.CreateRepo("repo"); err != nil { return // handle error } // Start a commit in our new repo on the "master" branch commit1, err := c.StartCommit("repo", "master") if err != nil { return // handle error } // Put a file called "file" in the newly created commit with the content "foo\n". if _, err := c.PutFile("repo", "master", "file", strings.NewReader("foo\n")); err != nil { return // handle error } // Finish the commit. if err := c.FinishCommit("repo", "master"); err != nil { return //handle error } // Read what we wrote. var buffer bytes.Buffer if err := c.GetFile("repo", "master", "file", 0, 0, &buffer); err != nil { return //handle error } // buffer now contains "foo\n" // Start another commit with the previous commit as the parent. commit2, err := c.StartCommit("repo", "master") if err != nil { return //handle error } // Extend "file" in the newly created commit with the content "bar\n". if _, err := c.PutFile("repo", "master", "file", strings.NewReader("bar\n")); err != nil { return // handle error } // Finish the commit. if err := c.FinishCommit("repo", "master"); err != nil { return //handle error } // Read what we wrote. buffer.Reset() if err := c.GetFile("repo", "master", "file", 0, 0, &buffer); err != nil { return //handle error } // buffer now contains "foo\nbar\n" // We can still read the old version of the file though: buffer.Reset() if err := c.GetFile("repo", commit1.ID, "file", 0, 0, &buffer); err != nil { return //handle error } // buffer now contains "foo\n" }
Output:
Example (Pps) ¶
package main import ( "bytes" "github.com/pachyderm/pachyderm/src/client" "github.com/pachyderm/pachyderm/src/client/pps" ) func main() { c, err := client.NewFromAddress("0.0.0.0:30650") if err != nil { return // handle error } // we assume there's already a repo called "repo" // and that it already has some data in it // take a look at src/client/pfs_test.go for an example of how to get there. // Create a map pipeline if err := c.CreatePipeline( "map", // the name of the pipeline "pachyderm/test_image", // your docker image []string{"map"}, // the command run in your docker image nil, // no stdin nil, // let pachyderm decide the parallelism []*pps.PipelineInput{ // map over "repo" client.NewPipelineInput("repo", client.MapMethod), }, false, // not an update ); err != nil { return // handle error } if err := c.CreatePipeline( "reduce", // the name of the pipeline "pachyderm/test_image", // your docker image []string{"reduce"}, // the command run in your docker image nil, // no stdin nil, // let pachyderm decide the parallelism []*pps.PipelineInput{ // reduce over "map" client.NewPipelineInput("map", client.ReduceMethod), }, false, // not an update ); err != nil { return // handle error } commits, err := c.ListCommitByRepo( // List commits that are... []string{"reduce"}, // from the "reduce" repo (which the "reduce" pipeline outputs) nil, // no provenance client.CommitTypeRead, // are readable client.CommitStatusNormal, // ignore cancelled commits true, // block until commits are available ) if err != nil { return // handle error } for _, commitInfo := range commits { // Read output from the pipeline var buffer bytes.Buffer if err := c.GetFile("reduce", commitInfo.Commit.ID, "file", 0, 0, "", false, nil, &buffer); err != nil { return //handle error } } }
Output:
Index ¶
- Constants
- func DatumTagPrefix(salt string) string
- func EtcdDialOptions() []grpc.DialOption
- func GetAddressFromUserMachine(cfg *config.Config) string
- func GetDatumTotalTime(s *pps.ProcessStats) time.Duration
- func NewAtomInput(repo string, glob string) *pps.Input
- func NewAtomInputOpts(name string, repo string, branch string, glob string, lazy bool) *pps.Input
- func NewBlock(hash string) *pfs.Block
- func NewBranch(repoName string, branchName string) *pfs.Branch
- func NewCommit(repoName string, commitID string) *pfs.Commit
- func NewCronInput(name string, spec string) *pps.Input
- func NewCrossInput(input ...*pps.Input) *pps.Input
- func NewFile(repoName string, commitID string, path string) *pfs.File
- func NewJob(jobID string) *pps.Job
- func NewJobInput(repoName string, commitID string, glob string) *pps.JobInput
- func NewPipeline(pipelineName string) *pps.Pipeline
- func NewPipelineInput(repoName string, glob string) *pps.PipelineInput
- func NewRepo(repoName string) *pfs.Repo
- func NewUnionInput(input ...*pps.Input) *pps.Input
- func PachDialOptions() []grpc.DialOption
- type APIClient
- func NewFromAddress(addr string) (*APIClient, error)
- func NewFromAddressWithConcurrency(addr string, maxConcurrentStreams uint) (*APIClient, error)
- func NewInCluster() (*APIClient, error)
- func NewOnUserMachine(reportMetrics bool, prefix string) (*APIClient, error)
- func NewOnUserMachineWithConcurrency(reportMetrics bool, prefix string, maxConcurrentStreams uint) (*APIClient, error)
- func (c *APIClient) AddMetadata(ctx context.Context) context.Context
- func (c APIClient) BlockCommit(repoName string, commitID string) (*pfs.CommitInfo, error)
- func (c APIClient) BuildCommit(repoName string, branch string, parent string, treeObject string) (*pfs.Commit, error)
- func (c *APIClient) Close() error
- func (c APIClient) Compact() error
- func (c APIClient) CopyFile(srcRepo, srcCommit, srcPath, dstRepo, dstCommit, dstPath string, ...) error
- func (c APIClient) CreateBranch(repoName string, branch string, commit string, provenance []*pfs.Branch) error
- func (c APIClient) CreateJob(pipeline string, outputCommit *pfs.Commit) (*pps.Job, error)
- func (c APIClient) CreatePipeline(name string, image string, cmd []string, stdin []string, ...) error
- func (c APIClient) CreatePipelineService(name string, image string, cmd []string, stdin []string, ...) error
- func (c APIClient) CreateRepo(repoName string) error
- func (c *APIClient) Ctx() context.Context
- func (c APIClient) DeleteAll() error
- func (c APIClient) DeleteBranch(repoName string, branch string) error
- func (c APIClient) DeleteCommit(repoName string, commitID string) error
- func (c APIClient) DeleteFile(repoName string, commitID string, path string) error
- func (c APIClient) DeleteJob(jobID string) error
- func (c APIClient) DeletePipeline(name string) error
- func (c APIClient) DeleteRepo(repoName string, force bool) error
- func (c APIClient) DiffFile(newRepoName, newCommitID, newPath, oldRepoName, oldCommitID, oldPath string, ...) ([]*pfs.FileInfo, []*pfs.FileInfo, error)
- func (c APIClient) Extract(objects bool, f func(op *admin.Op) error) error
- func (c APIClient) ExtractAll(objects bool) ([]*admin.Op, error)
- func (c APIClient) ExtractPipeline(pipelineName string) (*pps.CreatePipelineRequest, error)
- func (c APIClient) ExtractURL(url string) error
- func (c APIClient) ExtractWriter(objects bool, w io.Writer) error
- func (c APIClient) FinishCommit(repoName string, commitID string) error
- func (c APIClient) FlushCommit(commits []*pfs.Commit, toRepos []*pfs.Repo) (CommitInfoIterator, error)
- func (c APIClient) FlushCommitF(commits []*pfs.Commit, toRepos []*pfs.Repo, f func(*pfs.CommitInfo) error) error
- func (c APIClient) FlushJob(commits []*pfs.Commit, toPipelines []string, f func(*pps.JobInfo) error) error
- func (c APIClient) FlushJobAll(commits []*pfs.Commit, toPipelines []string) ([]*pps.JobInfo, error)
- func (c APIClient) GarbageCollect() error
- func (c *APIClient) GetAddress() string
- func (c APIClient) GetFile(repoName string, commitID string, path string, offset int64, size int64, ...) error
- func (c APIClient) GetFileReadSeeker(repoName string, commitID string, path string) (io.ReadSeeker, error)
- func (c APIClient) GetFileReader(repoName string, commitID string, path string, offset int64, size int64) (io.Reader, error)
- func (c APIClient) GetLogs(pipelineName string, jobID string, data []string, datumID string, master bool, ...) *LogsIter
- func (c APIClient) GetObject(hash string, writer io.Writer) error
- func (c APIClient) GetObjects(hashes []string, offset uint64, size uint64, totalSize uint64, ...) error
- func (c APIClient) GetTag(tag string, writer io.Writer) error
- func (c APIClient) GlobFile(repoName string, commitID string, pattern string) ([]*pfs.FileInfo, error)
- func (c APIClient) Health() error
- func (c APIClient) InspectBranch(repoName string, branch string) (*pfs.BranchInfo, error)
- func (c APIClient) InspectCluster() (*admin.ClusterInfo, error)
- func (c APIClient) InspectCommit(repoName string, commitID string) (*pfs.CommitInfo, error)
- func (c APIClient) InspectDatum(jobID string, datumID string) (*pps.DatumInfo, error)
- func (c APIClient) InspectFile(repoName string, commitID string, path string) (*pfs.FileInfo, error)
- func (c APIClient) InspectJob(jobID string, blockState bool) (*pps.JobInfo, error)
- func (c APIClient) InspectJobOutputCommit(repoName, commitID string, blockState bool) (*pps.JobInfo, error)
- func (c APIClient) InspectObject(hash string) (*pfs.ObjectInfo, error)
- func (c APIClient) InspectPipeline(pipelineName string) (*pps.PipelineInfo, error)
- func (c APIClient) InspectRepo(repoName string) (*pfs.RepoInfo, error)
- func (c APIClient) ListBranch(repoName string) ([]*pfs.BranchInfo, error)
- func (c APIClient) ListCommit(repoName string, to string, from string, number uint64) ([]*pfs.CommitInfo, error)
- func (c APIClient) ListCommitByRepo(repoName string) ([]*pfs.CommitInfo, error)
- func (c APIClient) ListDatum(jobID string, pageSize int64, page int64) (*pps.ListDatumResponse, error)
- func (c APIClient) ListFile(repoName string, commitID string, path string) ([]*pfs.FileInfo, error)
- func (c APIClient) ListJob(pipelineName string, inputCommit []*pfs.Commit, outputCommit *pfs.Commit) ([]*pps.JobInfo, error)
- func (c APIClient) ListObject(f func(*pfs.Object) error) error
- func (c APIClient) ListPipeline() ([]*pps.PipelineInfo, error)
- func (c APIClient) ListRepo() ([]*pfs.RepoInfo, error)
- func (c APIClient) ListTag(f func(*pfs.ListTagsResponse) error) error
- func (c APIClient) PutFile(repoName string, commitID string, path string, reader io.Reader) (_ int, retErr error)
- func (c APIClient) PutFileOverwrite(repoName string, commitID string, path string, reader io.Reader, ...) (_ int, retErr error)
- func (c APIClient) PutFileSplit(repoName string, commitID string, path string, delimiter pfs.Delimiter, ...) (_ int, retErr error)
- func (c APIClient) PutFileSplitWriter(repoName string, commitID string, path string, delimiter pfs.Delimiter, ...) (io.WriteCloser, error)
- func (c APIClient) PutFileURL(repoName string, commitID string, path string, url string, recursive bool, ...) (retErr error)
- func (c APIClient) PutFileWriter(repoName string, commitID string, path string) (io.WriteCloser, error)
- func (c APIClient) PutObject(_r io.Reader, tags ...string) (object *pfs.Object, _ int64, retErr error)
- func (c APIClient) PutObjectSplit(_r io.Reader) (objects []*pfs.Object, _ int64, retErr error)
- func (c APIClient) ReadObject(hash string) ([]byte, error)
- func (c APIClient) ReadObjects(hashes []string, offset uint64, size uint64) ([]byte, error)
- func (c APIClient) ReadTag(tag string) ([]byte, error)
- func (c APIClient) RerunPipeline(name string, include []*pfs.Commit, exclude []*pfs.Commit) error
- func (c APIClient) RestartDatum(jobID string, datumFilter []string) error
- func (c APIClient) Restore(ops []*admin.Op) (retErr error)
- func (c APIClient) RestoreFrom(objects bool, otherC *APIClient) (retErr error)
- func (c APIClient) RestoreReader(r io.Reader) (retErr error)
- func (c APIClient) RestoreURL(url string) (retErr error)
- func (c *APIClient) SetAuthToken(token string)
- func (c APIClient) SetBranch(repoName string, commit string, branch string) error
- func (c APIClient) SetMaxConcurrentStreams(n int)
- func (c APIClient) StartCommit(repoName string, branch string) (*pfs.Commit, error)
- func (c APIClient) StartCommitParent(repoName string, branch string, parentCommit string) (*pfs.Commit, error)
- func (c APIClient) StartPipeline(name string) error
- func (c APIClient) StopJob(jobID string) error
- func (c APIClient) StopPipeline(name string) error
- func (c APIClient) SubscribeCommit(repo string, branch string, from string, state pfs.CommitState) (CommitInfoIterator, error)
- func (c APIClient) SubscribeCommitF(repo, branch, from string, state pfs.CommitState, ...) error
- func (c APIClient) TagObject(hash string, tags ...string) error
- func (c APIClient) Version() (string, error)
- func (c APIClient) Walk(repoName string, commitID string, path string, walkFn WalkFn) error
- func (c *APIClient) WithCtx(ctx context.Context) *APIClient
- type AdminAPIClient
- type AuthAPIClient
- type CommitInfoIterator
- type DeployAPIClient
- type LogsIter
- type ObjectAPIClient
- type PfsAPIClient
- type PpsAPIClient
- type VersionAPIClient
- type WalkFn
Examples ¶
Constants ¶
const ( // MaxListItemsLog specifies the maximum number of items we log in response to a List* API MaxListItemsLog = 10 // StorageSecretName is the name of the Kubernetes secret in which // storage credentials are stored. StorageSecretName = "pachyderm-storage-secret" )
const ( // PPSEtcdPrefixEnv is the environment variable that specifies the etcd // prefix that PPS uses. PPSEtcdPrefixEnv = "PPS_ETCD_PREFIX" // PPSWorkerIPEnv is the environment variable that a worker can use to // see its own IP. The IP address is made available through the // Kubernetes downward API. PPSWorkerIPEnv = "PPS_WORKER_IP" // PPSPodNameEnv is the environment variable that a pod can use to // see its own name. The pod name is made available through the // Kubernetes downward API. PPSPodNameEnv = "PPS_POD_NAME" // PPSPipelineNameEnv is the env var that sets the name of the pipeline // that the workers are running. PPSPipelineNameEnv = "PPS_PIPELINE_NAME" // PPSNamespaceEnv is the namespace in which pachyderm is deployed PPSNamespaceEnv = "PPS_NAMESPACE" // PPSJobIDEnv is the env var that sets the ID of the job that the // workers are running (if the workers belong to an orphan job, rather than a // pipeline). PPSJobIDEnv = "PPS_JOB_ID" // PPSSpecCommitEnv is the namespace in which pachyderm is deployed PPSSpecCommitEnv = "PPS_SPEC_COMMIT" // PPSInputPrefix is the prefix of the path where datums are downloaded // to. A datum of an input named `XXX` is downloaded to `/pfs/XXX/`. PPSInputPrefix = "/pfs" // PPSScratchSpace is where pps workers store data while it's waiting to be // processed. PPSScratchSpace = "/scratch" // PPSWorkerPort is the port that workers use for their gRPC server PPSWorkerPort = 80 // PPSWorkerVolume is the name of the volume in which workers store // data. PPSWorkerVolume = "pachyderm-worker" // PPSWorkerUserContainerName is the name of the container that runs // the user code to process data. PPSWorkerUserContainerName = "user" // PPSWorkerSidecarContainerName is the name of the sidecar container // that runs alongside of each worker container. PPSWorkerSidecarContainerName = "storage" // GCGenerationKey is the etcd key that stores a counter that the // GC utility increments when it runs, so as to invalidate all cache. GCGenerationKey = "gc-generation" )
const DefaultMaxConcurrentStreams uint = 100
DefaultMaxConcurrentStreams defines the max number of Putfiles or Getfiles happening simultaneously
Variables ¶
This section is empty.
Functions ¶
func DatumTagPrefix ¶ added in v1.5.1
DatumTagPrefix hashes a pipeline salt to a string of a fixed size for use as the prefix for datum output trees. This prefix allows us to do garbage collection correctly.
func EtcdDialOptions ¶ added in v1.4.5
func EtcdDialOptions() []grpc.DialOption
EtcdDialOptions is a helper returning a slice of grpc.Dial options such that grpc.Dial() is synchronous: the call doesn't return until the connection has been established and it's safe to send RPCs
func GetAddressFromUserMachine ¶ added in v1.5.0
GetAddressFromUserMachine interprets the Pachyderm config in 'cfg' in the context of local environment variables and returns a "host:port" string pointing at a Pachd target.
func GetDatumTotalTime ¶ added in v1.5.2
func GetDatumTotalTime(s *pps.ProcessStats) time.Duration
GetDatumTotalTime sums the timing stats from a DatumInfo
func NewAtomInput ¶ added in v1.4.6
NewAtomInput returns a new atom input. It only includes required options.
func NewAtomInputOpts ¶ added in v1.4.6
NewAtomInputOpts returns a new atom input. It includes all options.
func NewCronInput ¶ added in v1.5.3
NewCronInput returns an input which will trigger based on a timed schedule. It uses cron syntax to specify the schedule. The input will be exposed to jobs as `/pfs/<name>/time` which will contain a timestamp.
func NewCrossInput ¶ added in v1.4.6
NewCrossInput returns an input which is the cross product of other inputs. That means that all combination of datums will be seen by the job / pipeline.
func NewJobInput ¶
NewJobInput creates a pps.JobInput.
func NewPipeline ¶
NewPipeline creates a pps.Pipeline.
func NewPipelineInput ¶
func NewPipelineInput(repoName string, glob string) *pps.PipelineInput
NewPipelineInput creates a new pps.PipelineInput
func NewUnionInput ¶ added in v1.4.6
NewUnionInput returns an input which is the union of other inputs. That means that all datums from any of the inputs will be seen individually by the job / pipeline.
func PachDialOptions ¶ added in v1.4.5
func PachDialOptions() []grpc.DialOption
PachDialOptions is a helper returning a slice of grpc.Dial options such that
- TLS is disabled
- Dial is synchronous: the call doesn't return until the connection has been established and it's safe to send RPCs
This is primarily useful for Pachd and Worker clients
Types ¶
type APIClient ¶
type APIClient struct { PfsAPIClient PpsAPIClient ObjectAPIClient AuthAPIClient DeployAPIClient VersionAPIClient AdminAPIClient Enterprise enterprise.APIClient // not embedded--method name conflicts with AuthAPIClient // contains filtered or unexported fields }
An APIClient is a wrapper around pfs, pps and block APIClients.
func NewFromAddress ¶
NewFromAddress constructs a new APIClient for the server at addr.
func NewFromAddressWithConcurrency ¶ added in v1.3.6
NewFromAddressWithConcurrency constructs a new APIClient and sets the max concurrency of streaming requests (GetFile / PutFile)
func NewInCluster ¶
NewInCluster constructs a new APIClient using env vars that Kubernetes creates. This should be used to access Pachyderm from within a Kubernetes cluster with Pachyderm running on it.
func NewOnUserMachine ¶ added in v1.5.0
NewOnUserMachine constructs a new APIClient using env vars that may be set on a user's machine (i.e. ADDRESS), as well as $HOME/.pachyderm/config if it exists. This is primarily intended to be used with the pachctl binary, but may also be useful in tests.
TODO(msteffen) this logic is fairly linux/unix specific, and makes the pachyderm client library incompatible with Windows. We may want to move this (and similar) logic into src/server and have it call a NewFromOptions() constructor.
func NewOnUserMachineWithConcurrency ¶ added in v1.5.0
func NewOnUserMachineWithConcurrency(reportMetrics bool, prefix string, maxConcurrentStreams uint) (*APIClient, error)
NewOnUserMachineWithConcurrency is identical to NewOnUserMachine, but explicitly sets a limit on the number of RPC streams that may be open simultaneously
func (*APIClient) AddMetadata ¶ added in v1.5.1
AddMetadata adds necessary metadata (including authentication credentials) to the context 'ctx', preserving any metadata that is present in either the incoming or outgoing metadata of 'ctx'.
func (APIClient) BlockCommit ¶ added in v1.7.0
BlockCommit returns info about a specific Commit, but blocks until that commit has been finished.
func (APIClient) BuildCommit ¶ added in v1.6.0
func (c APIClient) BuildCommit(repoName string, branch string, parent string, treeObject string) (*pfs.Commit, error)
BuildCommit builds a commit in a single call from an existing HashTree that has already been written to the object store. Note this is a more advanced pattern for creating commits that's mostly used internally.
func (APIClient) CopyFile ¶ added in v1.6.0
func (c APIClient) CopyFile(srcRepo, srcCommit, srcPath, dstRepo, dstCommit, dstPath string, overwrite bool) error
CopyFile copys a file from one pfs location to another. It can be used on directories or regular files.
func (APIClient) CreateBranch ¶ added in v1.7.0
func (c APIClient) CreateBranch(repoName string, branch string, commit string, provenance []*pfs.Branch) error
CreateBranch creates a new branch
func (APIClient) CreateJob ¶
CreateJob creates and runs a job in PPS. This function is mostly useful internally, users should generally run work by creating pipelines as well.
func (APIClient) CreatePipeline ¶
func (c APIClient) CreatePipeline( name string, image string, cmd []string, stdin []string, parallelismSpec *pps.ParallelismSpec, input *pps.Input, outputBranch string, update bool, ) error
CreatePipeline creates a new pipeline, pipelines are the main computation object in PPS they create a flow of data from a set of input Repos to an output Repo (which has the same name as the pipeline). Whenever new data is committed to one of the input repos the pipelines will create jobs to bring the output Repo up to data. image is the Docker image to run the jobs in. cmd is the command passed to the Docker run invocation. NOTE as with Docker cmd is not run inside a shell that means that things like wildcard globbing (*), pipes (|) and file redirects (> and >>) will not work. To get that behavior you should have your command be a shell of your choice and pass a shell script to stdin. stdin is a slice of lines that are sent to your command on stdin. Lines need not end in newline characters. parallelism is how many copies of your container should run in parallel. You may pass 0 for parallelism in which case PPS will set the parallelism based on available resources. input specifies a set of Repos that will be visible to the jobs during runtime. commits to these repos will cause the pipeline to create new jobs to process them. update indicates that you want to update an existing pipeline
func (APIClient) CreatePipelineService ¶ added in v1.6.0
func (c APIClient) CreatePipelineService( name string, image string, cmd []string, stdin []string, parallelismSpec *pps.ParallelismSpec, input *pps.Input, update bool, internalPort int32, externalPort int32, ) error
CreatePipelineService creates a new pipeline service.
func (APIClient) CreateRepo ¶
CreateRepo creates a new Repo object in pfs with the given name. Repos are the top level data object in pfs and should be used to store data of a similar type. For example rather than having a single Repo for an entire project you might have separate Repos for logs, metrics, database dumps etc.
func (*APIClient) Ctx ¶ added in v1.5.1
Ctx is a convenience function that returns adds Pachyderm authn metadata to context.Background().
func (APIClient) DeleteAll ¶
DeleteAll deletes everything in the cluster. Use with caution, there is no undo.
func (APIClient) DeleteBranch ¶ added in v1.3.19
DeleteBranch deletes a branch, but leaves the commits themselves intact. In other words, those commits can still be accessed via commit IDs and other branches they happen to be on.
func (APIClient) DeleteCommit ¶
DeleteCommit deletes a commit. Note it is currently not implemented.
func (APIClient) DeleteFile ¶
DeleteFile deletes a file from a Commit. DeleteFile leaves a tombstone in the Commit, assuming the file isn't written to later attempting to get the file from the finished commit will result in not found error. The file will of course remain intact in the Commit's parent.
func (APIClient) DeletePipeline ¶
DeletePipeline deletes a pipeline along with its output Repo.
func (APIClient) DeleteRepo ¶
DeleteRepo deletes a repo and reclaims the storage space it was using. Note that as of 1.0 we do not reclaim the blocks that the Repo was referencing, this is because they may also be referenced by other Repos and deleting them would make those Repos inaccessible. This will be resolved in later versions. If "force" is set to true, the repo will be removed regardless of errors. This argument should be used with care.
func (APIClient) DiffFile ¶ added in v1.4.8
func (c APIClient) DiffFile(newRepoName, newCommitID, newPath, oldRepoName, oldCommitID, oldPath string, shallow bool) ([]*pfs.FileInfo, []*pfs.FileInfo, error)
DiffFile returns the difference between 2 paths, old path may be omitted in which case the parent of the new path will be used. DiffFile return 2 values (unless it returns an error) the first value is files present under new path, the second is files present under old path, files which are under both paths and have identical content are omitted.
func (APIClient) ExtractAll ¶ added in v1.6.9
ExtractAll cluster state as a slice of operations.
func (APIClient) ExtractPipeline ¶ added in v1.7.0
func (c APIClient) ExtractPipeline(pipelineName string) (*pps.CreatePipelineRequest, error)
ExtractPipeline extracts a single pipeline.
func (APIClient) ExtractURL ¶ added in v1.6.9
ExtractURL extracts all cluster state and marshalls it to object storage.
func (APIClient) ExtractWriter ¶ added in v1.6.9
ExtractWriter extracts all cluster state and marshals it to w.
func (APIClient) FinishCommit ¶
FinishCommit ends the process of committing data to a Repo and persists the Commit. Once a Commit is finished the data becomes immutable and future attempts to write to it with PutFile will error.
func (APIClient) FlushCommit ¶
func (c APIClient) FlushCommit(commits []*pfs.Commit, toRepos []*pfs.Repo) (CommitInfoIterator, error)
FlushCommit returns an iterator that returns commits that have the specified `commits` as provenance. Note that the iterator can block if jobs have not successfully completed. This in effect waits for all of the jobs that are triggered by a set of commits to complete.
If toRepos is not nil then only the commits up to and including those repos will be considered, otherwise all repos are considered.
Note that it's never necessary to call FlushCommit to run jobs, they'll run no matter what, FlushCommit just allows you to wait for them to complete and see their output once they do.
func (APIClient) FlushCommitF ¶ added in v1.7.0
func (c APIClient) FlushCommitF(commits []*pfs.Commit, toRepos []*pfs.Repo, f func(*pfs.CommitInfo) error) error
FlushCommitF calls f with commits that have the specified `commits` as provenance. Note that it can block if jobs have not successfully completed. This in effect waits for all of the jobs that are triggered by a set of commits to complete.
If toRepos is not nil then only the commits up to and including those repos will be considered, otherwise all repos are considered.
Note that it's never necessary to call FlushCommit to run jobs, they'll run no matter what, FlushCommit just allows you to wait for them to complete and see their output once they do.
func (APIClient) FlushJob ¶ added in v1.7.0
func (c APIClient) FlushJob(commits []*pfs.Commit, toPipelines []string, f func(*pps.JobInfo) error) error
FlushJob calls f with all the jobs which were triggered by commits. If toPipelines is non-nil then only the jobs between commits and those pipelines in the DAG will be returned.
func (APIClient) FlushJobAll ¶ added in v1.7.0
FlushJobAll returns all the jobs which were triggered by commits. If toPipelines is non-nil then only the jobs between commits and those pipelines in the DAG will be returned.
func (APIClient) GarbageCollect ¶ added in v1.4.7
GarbageCollect garbage collects unused data. Currently GC needs to be run while no data is being added or removed (which, among other things, implies that there shouldn't be jobs actively running).
func (*APIClient) GetAddress ¶ added in v1.5.0
GetAddress returns the pachd host:port with which 'c' is communicating. If 'c' was created using NewInCluster or NewOnUserMachine then this is how the address may be retrieved from the environment.
func (APIClient) GetFile ¶
func (c APIClient) GetFile(repoName string, commitID string, path string, offset int64, size int64, writer io.Writer) error
GetFile returns the contents of a file at a specific Commit. offset specifies a number of bytes that should be skipped in the beginning of the file. size limits the total amount of data returned, note you will get fewer bytes than size if you pass a value larger than the size of the file. If size is set to 0 then all of the data will be returned.
func (APIClient) GetFileReadSeeker ¶ added in v1.7.0
func (c APIClient) GetFileReadSeeker(repoName string, commitID string, path string) (io.ReadSeeker, error)
GetFileReadSeeker returns a reader for the contents of a file at a specific Commit that permits Seeking to different points in the file.
func (APIClient) GetFileReader ¶ added in v1.3.19
func (c APIClient) GetFileReader(repoName string, commitID string, path string, offset int64, size int64) (io.Reader, error)
GetFileReader returns a reader for the contents of a file at a specific Commit. offset specifies a number of bytes that should be skipped in the beginning of the file. size limits the total amount of data returned, note you will get fewer bytes than size if you pass a value larger than the size of the file. If size is set to 0 then all of the data will be returned.
func (APIClient) GetLogs ¶
func (c APIClient) GetLogs( pipelineName string, jobID string, data []string, datumID string, master bool, follow bool, tail int64, ) *LogsIter
GetLogs gets logs from a job (logs includes stdout and stderr). 'pipelineName', 'jobID', 'data', and 'datumID', are all filters. To forego any filter, simply pass an empty value, though one of 'pipelineName' and 'jobID' must be set. Responses are written to 'messages'
func (APIClient) GetObject ¶ added in v1.3.9
GetObject gets an object out of the object store by hash.
func (APIClient) GetObjects ¶ added in v1.3.19
func (c APIClient) GetObjects(hashes []string, offset uint64, size uint64, totalSize uint64, writer io.Writer) error
GetObjects gets several objects out of the object store by hash.
func (APIClient) GlobFile ¶ added in v1.4.6
func (c APIClient) GlobFile(repoName string, commitID string, pattern string) ([]*pfs.FileInfo, error)
GlobFile returns files that match a given glob pattern in a given commit. The pattern is documented here: https://golang.org/pkg/path/filepath/#Match
func (APIClient) Health ¶ added in v1.7.0
Health health checks pachd, it returns an error if pachd isn't healthy.
func (APIClient) InspectBranch ¶ added in v1.7.0
InspectBranch returns information on a specific PFS branch
func (APIClient) InspectCluster ¶ added in v1.7.0
func (c APIClient) InspectCluster() (*admin.ClusterInfo, error)
InspectCluster retrieves cluster state
func (APIClient) InspectCommit ¶
InspectCommit returns info about a specific Commit.
func (APIClient) InspectDatum ¶ added in v1.5.2
InspectDatum returns info about a single datum
func (APIClient) InspectFile ¶
func (c APIClient) InspectFile(repoName string, commitID string, path string) (*pfs.FileInfo, error)
InspectFile returns info about a specific file.
func (APIClient) InspectJob ¶
InspectJob returns info about a specific job. blockState will cause the call to block until the job reaches a terminal state (failure or success).
func (APIClient) InspectJobOutputCommit ¶ added in v1.7.1
func (c APIClient) InspectJobOutputCommit(repoName, commitID string, blockState bool) (*pps.JobInfo, error)
InspectJobOutputCommit returns info about a job that created a commit. blockState will cause the call to block until the job reaches a terminal state (failure or success).
func (APIClient) InspectObject ¶ added in v1.3.9
func (c APIClient) InspectObject(hash string) (*pfs.ObjectInfo, error)
InspectObject returns info about an Object.
func (APIClient) InspectPipeline ¶
func (c APIClient) InspectPipeline(pipelineName string) (*pps.PipelineInfo, error)
InspectPipeline returns info about a specific pipeline.
func (APIClient) InspectRepo ¶
InspectRepo returns info about a specific Repo.
func (APIClient) ListBranch ¶
func (c APIClient) ListBranch(repoName string) ([]*pfs.BranchInfo, error)
ListBranch lists the active branches on a Repo.
func (APIClient) ListCommit ¶
func (c APIClient) ListCommit(repoName string, to string, from string, number uint64) ([]*pfs.CommitInfo, error)
ListCommit lists commits. If only `repo` is given, all commits in the repo are returned. If `to` is given, only the ancestors of `to`, including `to` itself, are considered. If `from` is given, only the descendents of `from`, including `from` itself, are considered. `number` determines how many commits are returned. If `number` is 0, all commits that match the aforementioned criteria are returned.
func (APIClient) ListCommitByRepo ¶ added in v1.2.4
func (c APIClient) ListCommitByRepo(repoName string) ([]*pfs.CommitInfo, error)
ListCommitByRepo lists all commits in a repo.
func (APIClient) ListDatum ¶ added in v1.5.2
func (c APIClient) ListDatum(jobID string, pageSize int64, page int64) (*pps.ListDatumResponse, error)
ListDatum returns info about all datums in a Job
func (APIClient) ListJob ¶
func (c APIClient) ListJob(pipelineName string, inputCommit []*pfs.Commit, outputCommit *pfs.Commit) ([]*pps.JobInfo, error)
ListJob returns info about all jobs. If pipelineName is non empty then only jobs that were started by the named pipeline will be returned If inputCommit is non-nil then only jobs which took the specific commits as inputs will be returned. The order of the inputCommits doesn't matter. If outputCommit is non-nil then only the job which created that commit as output will be returned.
func (APIClient) ListObject ¶ added in v1.6.9
ListObject lists objects stored in pfs.
func (APIClient) ListPipeline ¶
func (c APIClient) ListPipeline() ([]*pps.PipelineInfo, error)
ListPipeline returns info about all pipelines.
func (APIClient) ListRepo ¶
ListRepo returns info about all Repos. provenance specifies a set of provenance repos, only repos which have ALL of the specified repos as provenance will be returned unless provenance is nil in which case it is ignored.
func (APIClient) ListTag ¶ added in v1.6.9
func (c APIClient) ListTag(f func(*pfs.ListTagsResponse) error) error
ListTag lists tags stored in pfs.
func (APIClient) PutFile ¶
func (c APIClient) PutFile(repoName string, commitID string, path string, reader io.Reader) (_ int, retErr error)
PutFile writes a file to PFS from a reader.
func (APIClient) PutFileOverwrite ¶ added in v1.5.3
func (c APIClient) PutFileOverwrite(repoName string, commitID string, path string, reader io.Reader, overwriteIndex int64) (_ int, retErr error)
PutFileOverwrite is like PutFile but it overwrites the file rather than appending to it. overwriteIndex allows you to specify the index of the object starting from which you'd like to overwrite. If you want to overwrite the entire file, specify an index of 0.
func (APIClient) PutFileSplit ¶ added in v1.3.19
func (c APIClient) PutFileSplit(repoName string, commitID string, path string, delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, overwrite bool, reader io.Reader) (_ int, retErr error)
PutFileSplit writes a file to PFS from a reader delimiter is used to tell PFS how to break the input into blocks
func (APIClient) PutFileSplitWriter ¶ added in v1.3.19
func (c APIClient) PutFileSplitWriter(repoName string, commitID string, path string, delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, overwrite bool) (io.WriteCloser, error)
PutFileSplitWriter writes a multiple files to PFS by splitting up the data that is written to it. NOTE: PutFileSplitWriter returns an io.WriteCloser you must call Close on it when you are done writing.
func (APIClient) PutFileURL ¶ added in v1.2.0
func (c APIClient) PutFileURL(repoName string, commitID string, path string, url string, recursive bool, overwrite bool) (retErr error)
PutFileURL puts a file using the content found at a URL. The URL is sent to the server which performs the request. recursive allow for recursive scraping of some types URLs for example on s3:// urls.
func (APIClient) PutFileWriter ¶
func (c APIClient) PutFileWriter(repoName string, commitID string, path string) (io.WriteCloser, error)
PutFileWriter writes a file to PFS. NOTE: PutFileWriter returns an io.WriteCloser you must call Close on it when you are done writing.
func (APIClient) PutObject ¶ added in v1.3.9
func (c APIClient) PutObject(_r io.Reader, tags ...string) (object *pfs.Object, _ int64, retErr error)
PutObject puts a value into the object store and tags it with 0 or more tags.
func (APIClient) PutObjectSplit ¶ added in v1.6.0
PutObjectSplit is the same as PutObject except that the data is splitted into several smaller objects. This is primarily useful if you'd like to be able to resume upload.
func (APIClient) ReadObject ¶ added in v1.3.9
ReadObject gets an object by hash and returns it directly as []byte.
func (APIClient) ReadObjects ¶ added in v1.3.19
ReadObjects gets several objects by hash and returns them directly as []byte.
func (APIClient) ReadTag ¶ added in v1.3.9
ReadTag gets an object by tag and returns it directly as []byte.
func (APIClient) RerunPipeline ¶ added in v1.3.6
RerunPipeline reruns a pipeline over a given set of commits. Exclude and include are filters that either include or exclude the ancestors of the given commits. A commit is considered the ancestor of itself. The behavior is the same as that of ListCommit.
func (APIClient) RestartDatum ¶ added in v1.4.4
RestartDatum restarts a datum that's being processed as part of a job. datumFilter is a slice of strings which are matched against either the Path or Hash of the datum, the order of the strings in datumFilter is irrelevant.
func (APIClient) Restore ¶ added in v1.6.9
Restore cluster state from an extract series of operations.
func (APIClient) RestoreFrom ¶ added in v1.7.0
RestoreFrom restores state from another cluster which can be access through otherC.
func (APIClient) RestoreReader ¶ added in v1.6.9
RestoreReader restores cluster state from a reader containing marshaled ops. Such as those written by ExtractWriter.
func (APIClient) RestoreURL ¶ added in v1.6.9
RestoreURL restures cluster state from object storage.
func (*APIClient) SetAuthToken ¶ added in v1.5.1
SetAuthToken sets the authentication token that will be used for all API calls for this client.
func (APIClient) SetBranch ¶ added in v1.3.19
SetBranch sets a commit and its ancestors as a branch. SetBranch is deprecated in favor of CommitBranch.
func (APIClient) SetMaxConcurrentStreams ¶ added in v1.3.15
SetMaxConcurrentStreams Sets the maximum number of concurrent streams the client can have. It is not safe to call this operations while operations are outstanding.
func (APIClient) StartCommit ¶
StartCommit begins the process of committing data to a Repo. Once started you can write to the Commit with PutFile and when all the data has been written you must finish the Commit with FinishCommit. NOTE, data is not persisted until FinishCommit is called. branch is a more convenient way to build linear chains of commits. When a commit is started with a non empty branch the value of branch becomes an alias for the created Commit. This enables a more intuitive access pattern. When the commit is started on a branch the previous head of the branch is used as the parent of the commit.
func (APIClient) StartCommitParent ¶ added in v1.3.19
func (c APIClient) StartCommitParent(repoName string, branch string, parentCommit string) (*pfs.Commit, error)
StartCommitParent begins the process of committing data to a Repo. Once started you can write to the Commit with PutFile and when all the data has been written you must finish the Commit with FinishCommit. NOTE, data is not persisted until FinishCommit is called. branch is a more convenient way to build linear chains of commits. When a commit is started with a non empty branch the value of branch becomes an alias for the created Commit. This enables a more intuitive access pattern. When the commit is started on a branch the previous head of the branch is used as the parent of the commit. parentCommit specifies the parent Commit, upon creation the new Commit will appear identical to the parent Commit, data can safely be added to the new commit without affecting the contents of the parent Commit. You may pass "" as parentCommit in which case the new Commit will have no parent and will initially appear empty.
func (APIClient) StartPipeline ¶ added in v1.2.0
StartPipeline restarts a stopped pipeline.
func (APIClient) StopPipeline ¶ added in v1.2.0
StopPipeline prevents a pipeline from processing things, it can be restarted with StartPipeline.
func (APIClient) SubscribeCommit ¶ added in v1.3.19
func (c APIClient) SubscribeCommit(repo string, branch string, from string, state pfs.CommitState) (CommitInfoIterator, error)
SubscribeCommit is like ListCommit but it keeps listening for commits as they come in.
func (APIClient) SubscribeCommitF ¶ added in v1.7.0
func (c APIClient) SubscribeCommitF(repo, branch, from string, state pfs.CommitState, f func(*pfs.CommitInfo) error) error
SubscribeCommitF is like ListCommit but it calls a callback function with the results rather than returning an iterator.
func (APIClient) Walk ¶ added in v1.3.2
Walk walks the pfs filesystem rooted at path. walkFn will be called for each file found under path, this includes both regular files and directories.
type AdminAPIClient ¶ added in v1.6.9
AdminAPIClient is an alias of admin.APIClient
type AuthAPIClient ¶ added in v1.5.1
AuthAPIClient is an alias of auth.APIClient
type CommitInfoIterator ¶ added in v1.3.19
type CommitInfoIterator interface { Next() (*pfs.CommitInfo, error) Close() }
CommitInfoIterator wraps a stream of commits and makes them easy to iterate.
type DeployAPIClient ¶ added in v1.6.0
DeployAPIClient is an alias of auth.APIClient
type LogsIter ¶ added in v1.3.19
type LogsIter struct {
// contains filtered or unexported fields
}
LogsIter iterates through log messages returned from pps.GetLogs. Logs can be fetched with 'Next()'. The log message received can be examined with 'Message()', and any errors can be examined with 'Err()'.
func (*LogsIter) Err ¶ added in v1.3.19
Err retrieves any errors encountered in the course of calling 'Next()'.
func (*LogsIter) Message ¶ added in v1.3.19
func (l *LogsIter) Message() *pps.LogMessage
Message returns the most recently retrieve log message (as an annotated log line, in the form of a pps.LogMessage)
type ObjectAPIClient ¶ added in v1.3.9
type ObjectAPIClient pfs.ObjectAPIClient
ObjectAPIClient is an alias for pfs.ObjectAPIClient
type VersionAPIClient ¶ added in v1.6.9
VersionAPIClient is an alias of versionpb.APIClient
Directories ¶
Path | Synopsis |
---|---|
Package admin is a generated protocol buffer package.
|
Package admin is a generated protocol buffer package. |
Package auth is a generated protocol buffer package.
|
Package auth is a generated protocol buffer package. |
Package deploy is a generated protocol buffer package.
|
Package deploy is a generated protocol buffer package. |
Package enterprise is a generated protocol buffer package.
|
Package enterprise is a generated protocol buffer package. |
Package health is a generated protocol buffer package.
|
Package health is a generated protocol buffer package. |
Package limit provides primitives to limit concurrency.
|
Package limit provides primitives to limit concurrency. |
Package pfs is a generated protocol buffer package.
|
Package pfs is a generated protocol buffer package. |
pkg
|
|
config
Package config is a generated protocol buffer package.
|
Package config is a generated protocol buffer package. |
shard
Package shard is a generated protocol buffer package.
|
Package shard is a generated protocol buffer package. |
Package pps is a generated protocol buffer package.
|
Package pps is a generated protocol buffer package. |
versionpb
Package versionpb is a generated protocol buffer package.
|
Package versionpb is a generated protocol buffer package. |