dataflow

package
v4.7.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 12, 2021 License: Apache-2.0 Imports: 4 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type FlexTemplateJob

type FlexTemplateJob struct {
	pulumi.CustomResourceState

	// The GCS path to the Dataflow job Flex
	// Template.
	ContainerSpecGcsPath pulumi.StringOutput `pulumi:"containerSpecGcsPath"`
	// The unique ID of this job.
	JobId pulumi.StringOutput `pulumi:"jobId"`
	// Deprecated: Deprecated until the API supports this field
	Labels pulumi.MapOutput `pulumi:"labels"`
	// A unique name for the resource, required by Dataflow.
	Name pulumi.StringOutput `pulumi:"name"`
	// One of "drain" or "cancel". Specifies behavior of
	// deletion during `pulumi destroy`.  See above note.
	OnDelete pulumi.StringPtrOutput `pulumi:"onDelete"`
	// Key/Value pairs to be passed to the Dataflow job (as
	// used in the template). Additional [pipeline options](https://cloud.google.com/dataflow/docs/guides/specifying-exec-params#setting-other-cloud-dataflow-pipeline-options)
	// such as `serviceAccount`, `workerMachineType`, etc can be specified here.
	Parameters pulumi.MapOutput `pulumi:"parameters"`
	// The project in which the resource belongs. If it is not
	// provided, the provider project is used.
	Project pulumi.StringOutput `pulumi:"project"`
	// The region in which the created job should run.
	Region pulumi.StringOutput `pulumi:"region"`
	// The current state of the resource, selected from the [JobState enum](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState)
	State pulumi.StringOutput `pulumi:"state"`
}

Creates a [Flex Template](https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates) job on Dataflow, which is an implementation of Apache Beam running on Google Compute Engine. For more information see the official documentation for [Beam](https://beam.apache.org) and [Dataflow](https://cloud.google.com/dataflow/).

## Example Usage

```go package main

import (

"github.com/pulumi/pulumi-gcp/sdk/v4/go/gcp/dataflow"
"github.com/pulumi/pulumi/sdk/v2/go/pulumi"

)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := dataflow.NewFlexTemplateJob(ctx, "bigDataJob", &dataflow.FlexTemplateJobArgs{
			ContainerSpecGcsPath: pulumi.String("gs://my-bucket/templates/template.json"),
			Parameters: pulumi.StringMap{
				"inputSubscription": pulumi.String("messages"),
			},
		}, pulumi.Provider(google_beta))
		if err != nil {
			return err
		}
		return nil
	})
}

``` ## Note on "destroy" / "apply"

There are many types of Dataflow jobs. Some Dataflow jobs run constantly, getting new data from (e.g.) a GCS bucket, and outputting data continuously. Some jobs process a set amount of data then terminate. All jobs can fail while running due to programming errors or other issues. In this way, Dataflow jobs are different from most other provider / Google resources.

The Dataflow resource is considered 'existing' while it is in a nonterminal state. If it reaches a terminal state (e.g. 'FAILED', 'COMPLETE', 'CANCELLED'), it will be recreated on the next 'apply'. This is as expected for jobs which run continuously, but may surprise users who use this resource for other kinds of Dataflow jobs.

A Dataflow job which is 'destroyed' may be "cancelled" or "drained". If "cancelled", the job terminates - any data written remains where it is, but no new data will be processed. If "drained", no new data will enter the pipeline, but any data currently in the pipeline will finish being processed. The default is "cancelled", but if a user sets `onDelete` to `"drain"` in the configuration, you may experience a long wait for your `pulumi destroy` to complete.

## Import

This resource does not support import.

func GetFlexTemplateJob

func GetFlexTemplateJob(ctx *pulumi.Context,
	name string, id pulumi.IDInput, state *FlexTemplateJobState, opts ...pulumi.ResourceOption) (*FlexTemplateJob, error)

GetFlexTemplateJob gets an existing FlexTemplateJob resource's state with the given name, ID, and optional state properties that are used to uniquely qualify the lookup (nil if not required).

func NewFlexTemplateJob

func NewFlexTemplateJob(ctx *pulumi.Context,
	name string, args *FlexTemplateJobArgs, opts ...pulumi.ResourceOption) (*FlexTemplateJob, error)

NewFlexTemplateJob registers a new resource with the given unique name, arguments, and options.

func (FlexTemplateJob) ElementType added in v4.4.0

func (FlexTemplateJob) ElementType() reflect.Type

func (FlexTemplateJob) ToFlexTemplateJobOutput added in v4.4.0

func (i FlexTemplateJob) ToFlexTemplateJobOutput() FlexTemplateJobOutput

func (FlexTemplateJob) ToFlexTemplateJobOutputWithContext added in v4.4.0

func (i FlexTemplateJob) ToFlexTemplateJobOutputWithContext(ctx context.Context) FlexTemplateJobOutput

type FlexTemplateJobArgs

type FlexTemplateJobArgs struct {
	// The GCS path to the Dataflow job Flex
	// Template.
	ContainerSpecGcsPath pulumi.StringInput
	// Deprecated: Deprecated until the API supports this field
	Labels pulumi.MapInput
	// A unique name for the resource, required by Dataflow.
	Name pulumi.StringPtrInput
	// One of "drain" or "cancel". Specifies behavior of
	// deletion during `pulumi destroy`.  See above note.
	OnDelete pulumi.StringPtrInput
	// Key/Value pairs to be passed to the Dataflow job (as
	// used in the template). Additional [pipeline options](https://cloud.google.com/dataflow/docs/guides/specifying-exec-params#setting-other-cloud-dataflow-pipeline-options)
	// such as `serviceAccount`, `workerMachineType`, etc can be specified here.
	Parameters pulumi.MapInput
	// The project in which the resource belongs. If it is not
	// provided, the provider project is used.
	Project pulumi.StringPtrInput
	// The region in which the created job should run.
	Region pulumi.StringPtrInput
}

The set of arguments for constructing a FlexTemplateJob resource.

func (FlexTemplateJobArgs) ElementType

func (FlexTemplateJobArgs) ElementType() reflect.Type

type FlexTemplateJobInput added in v4.4.0

type FlexTemplateJobInput interface {
	pulumi.Input

	ToFlexTemplateJobOutput() FlexTemplateJobOutput
	ToFlexTemplateJobOutputWithContext(ctx context.Context) FlexTemplateJobOutput
}

type FlexTemplateJobOutput added in v4.4.0

type FlexTemplateJobOutput struct {
	*pulumi.OutputState
}

func (FlexTemplateJobOutput) ElementType added in v4.4.0

func (FlexTemplateJobOutput) ElementType() reflect.Type

func (FlexTemplateJobOutput) ToFlexTemplateJobOutput added in v4.4.0

func (o FlexTemplateJobOutput) ToFlexTemplateJobOutput() FlexTemplateJobOutput

func (FlexTemplateJobOutput) ToFlexTemplateJobOutputWithContext added in v4.4.0

func (o FlexTemplateJobOutput) ToFlexTemplateJobOutputWithContext(ctx context.Context) FlexTemplateJobOutput

type FlexTemplateJobState

type FlexTemplateJobState struct {
	// The GCS path to the Dataflow job Flex
	// Template.
	ContainerSpecGcsPath pulumi.StringPtrInput
	// The unique ID of this job.
	JobId pulumi.StringPtrInput
	// Deprecated: Deprecated until the API supports this field
	Labels pulumi.MapInput
	// A unique name for the resource, required by Dataflow.
	Name pulumi.StringPtrInput
	// One of "drain" or "cancel". Specifies behavior of
	// deletion during `pulumi destroy`.  See above note.
	OnDelete pulumi.StringPtrInput
	// Key/Value pairs to be passed to the Dataflow job (as
	// used in the template). Additional [pipeline options](https://cloud.google.com/dataflow/docs/guides/specifying-exec-params#setting-other-cloud-dataflow-pipeline-options)
	// such as `serviceAccount`, `workerMachineType`, etc can be specified here.
	Parameters pulumi.MapInput
	// The project in which the resource belongs. If it is not
	// provided, the provider project is used.
	Project pulumi.StringPtrInput
	// The region in which the created job should run.
	Region pulumi.StringPtrInput
	// The current state of the resource, selected from the [JobState enum](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState)
	State pulumi.StringPtrInput
}

func (FlexTemplateJobState) ElementType

func (FlexTemplateJobState) ElementType() reflect.Type

type Job

type Job struct {
	pulumi.CustomResourceState

	// List of experiments that should be used by the job. An example value is `["enableStackdriverAgentMetrics"]`.
	AdditionalExperiments pulumi.StringArrayOutput `pulumi:"additionalExperiments"`
	// The configuration for VM IPs.  Options are `"WORKER_IP_PUBLIC"` or `"WORKER_IP_PRIVATE"`.
	IpConfiguration pulumi.StringPtrOutput `pulumi:"ipConfiguration"`
	// The unique ID of this job.
	JobId pulumi.StringOutput `pulumi:"jobId"`
	// The name for the Cloud KMS key for the job. Key format is: `projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY`
	KmsKeyName pulumi.StringPtrOutput `pulumi:"kmsKeyName"`
	// User labels to be specified for the job. Keys and values should follow the restrictions
	// specified in the [labeling restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions) page.
	// **NOTE**: Google-provided Dataflow templates often provide default labels that begin with `goog-dataflow-provided`.
	// Unless explicitly set in config, these labels will be ignored to prevent diffs on re-apply.
	Labels pulumi.MapOutput `pulumi:"labels"`
	// The machine type to use for the job.
	MachineType pulumi.StringPtrOutput `pulumi:"machineType"`
	// The number of workers permitted to work on the job.  More workers may improve processing speed at additional cost.
	MaxWorkers pulumi.IntPtrOutput `pulumi:"maxWorkers"`
	// A unique name for the resource, required by Dataflow.
	Name pulumi.StringOutput `pulumi:"name"`
	// The network to which VMs will be assigned. If it is not provided, "default" will be used.
	Network pulumi.StringPtrOutput `pulumi:"network"`
	// One of "drain" or "cancel".  Specifies behavior of deletion during `pulumi destroy`.  See above note.
	OnDelete pulumi.StringPtrOutput `pulumi:"onDelete"`
	// Key/Value pairs to be passed to the Dataflow job (as used in the template).
	Parameters pulumi.MapOutput `pulumi:"parameters"`
	// The project in which the resource belongs. If it is not provided, the provider project is used.
	Project pulumi.StringOutput `pulumi:"project"`
	// The region in which the created job should run.
	Region pulumi.StringPtrOutput `pulumi:"region"`
	// The Service Account email used to create the job.
	ServiceAccountEmail pulumi.StringPtrOutput `pulumi:"serviceAccountEmail"`
	// The current state of the resource, selected from the [JobState enum](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState)
	State pulumi.StringOutput `pulumi:"state"`
	// The subnetwork to which VMs will be assigned. Should be of the form "regions/REGION/subnetworks/SUBNETWORK".
	Subnetwork pulumi.StringPtrOutput `pulumi:"subnetwork"`
	// A writeable location on GCS for the Dataflow job to dump its temporary data.
	TempGcsLocation pulumi.StringOutput `pulumi:"tempGcsLocation"`
	// The GCS path to the Dataflow job template.
	TemplateGcsPath pulumi.StringOutput `pulumi:"templateGcsPath"`
	// Only applicable when updating a pipeline. Map of transform name prefixes of the job to be replaced with the corresponding name prefixes of the new job. This field is not used outside of update.
	TransformNameMapping pulumi.MapOutput `pulumi:"transformNameMapping"`
	// The type of this job, selected from the [JobType enum](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobType)
	Type pulumi.StringOutput `pulumi:"type"`
	// The zone in which the created job should run. If it is not provided, the provider zone is used.
	Zone pulumi.StringPtrOutput `pulumi:"zone"`
}

Creates a job on Dataflow, which is an implementation of Apache Beam running on Google Compute Engine. For more information see the official documentation for [Beam](https://beam.apache.org) and [Dataflow](https://cloud.google.com/dataflow/).

## Example Usage

```go package main

import (

"github.com/pulumi/pulumi-gcp/sdk/v4/go/gcp/dataflow"
"github.com/pulumi/pulumi/sdk/v2/go/pulumi"

)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := dataflow.NewJob(ctx, "bigDataJob", &dataflow.JobArgs{
			Parameters: pulumi.StringMap{
				"baz": pulumi.String("qux"),
				"foo": pulumi.String("bar"),
			},
			TempGcsLocation: pulumi.String("gs://my-bucket/tmp_dir"),
			TemplateGcsPath: pulumi.String("gs://my-bucket/templates/template_file"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}

``` ### Streaming Job ```go package main

import (

"fmt"

"github.com/pulumi/pulumi-gcp/sdk/v4/go/gcp/dataflow"
"github.com/pulumi/pulumi-gcp/sdk/v4/go/gcp/pubsub"
"github.com/pulumi/pulumi-gcp/sdk/v4/go/gcp/storage"
"github.com/pulumi/pulumi/sdk/v2/go/pulumi"

)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		topic, err := pubsub.NewTopic(ctx, "topic", nil)
		if err != nil {
			return err
		}
		bucket1, err := storage.NewBucket(ctx, "bucket1", &storage.BucketArgs{
			ForceDestroy: pulumi.Bool(true),
		})
		if err != nil {
			return err
		}
		_, err = storage.NewBucket(ctx, "bucket2", &storage.BucketArgs{
			ForceDestroy: pulumi.Bool(true),
		})
		if err != nil {
			return err
		}
		_, err = dataflow.NewJob(ctx, "pubsubStream", &dataflow.JobArgs{
			TemplateGcsPath: pulumi.String("gs://my-bucket/templates/template_file"),
			TempGcsLocation: pulumi.String("gs://my-bucket/tmp_dir"),
			Parameters: pulumi.StringMap{
				"inputFilePattern": bucket1.Url.ApplyT(func(url string) (string, error) {
					return fmt.Sprintf("%v%v", url, "/*.json"), nil
				}).(pulumi.StringOutput),
				"outputTopic": topic.ID(),
			},
			TransformNameMapping: pulumi.StringMap{
				"name": pulumi.String("test_job"),
				"env":  pulumi.String("test"),
			},
			OnDelete: pulumi.String("cancel"),
		})
		if err != nil {
			return err
		}
		return nil
	})
}

``` ## Note on "destroy" / "apply"

There are many types of Dataflow jobs. Some Dataflow jobs run constantly, getting new data from (e.g.) a GCS bucket, and outputting data continuously. Some jobs process a set amount of data then terminate. All jobs can fail while running due to programming errors or other issues. In this way, Dataflow jobs are different from most other Google resources.

The Dataflow resource is considered 'existing' while it is in a nonterminal state. If it reaches a terminal state (e.g. 'FAILED', 'COMPLETE', 'CANCELLED'), it will be recreated on the next 'apply'. This is as expected for jobs which run continuously, but may surprise users who use this resource for other kinds of Dataflow jobs.

A Dataflow job which is 'destroyed' may be "cancelled" or "drained". If "cancelled", the job terminates - any data written remains where it is, but no new data will be processed. If "drained", no new data will enter the pipeline, but any data currently in the pipeline will finish being processed. The default is "cancelled", but if a user sets `onDelete` to `"drain"` in the configuration, you may experience a long wait for your `pulumi destroy` to complete.

## Import

This resource does not support import.

func GetJob

func GetJob(ctx *pulumi.Context,
	name string, id pulumi.IDInput, state *JobState, opts ...pulumi.ResourceOption) (*Job, error)

GetJob gets an existing Job resource's state with the given name, ID, and optional state properties that are used to uniquely qualify the lookup (nil if not required).

func NewJob

func NewJob(ctx *pulumi.Context,
	name string, args *JobArgs, opts ...pulumi.ResourceOption) (*Job, error)

NewJob registers a new resource with the given unique name, arguments, and options.

func (Job) ElementType added in v4.4.0

func (Job) ElementType() reflect.Type

func (Job) ToJobOutput added in v4.4.0

func (i Job) ToJobOutput() JobOutput

func (Job) ToJobOutputWithContext added in v4.4.0

func (i Job) ToJobOutputWithContext(ctx context.Context) JobOutput

type JobArgs

type JobArgs struct {
	// List of experiments that should be used by the job. An example value is `["enableStackdriverAgentMetrics"]`.
	AdditionalExperiments pulumi.StringArrayInput
	// The configuration for VM IPs.  Options are `"WORKER_IP_PUBLIC"` or `"WORKER_IP_PRIVATE"`.
	IpConfiguration pulumi.StringPtrInput
	// The name for the Cloud KMS key for the job. Key format is: `projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY`
	KmsKeyName pulumi.StringPtrInput
	// User labels to be specified for the job. Keys and values should follow the restrictions
	// specified in the [labeling restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions) page.
	// **NOTE**: Google-provided Dataflow templates often provide default labels that begin with `goog-dataflow-provided`.
	// Unless explicitly set in config, these labels will be ignored to prevent diffs on re-apply.
	Labels pulumi.MapInput
	// The machine type to use for the job.
	MachineType pulumi.StringPtrInput
	// The number of workers permitted to work on the job.  More workers may improve processing speed at additional cost.
	MaxWorkers pulumi.IntPtrInput
	// A unique name for the resource, required by Dataflow.
	Name pulumi.StringPtrInput
	// The network to which VMs will be assigned. If it is not provided, "default" will be used.
	Network pulumi.StringPtrInput
	// One of "drain" or "cancel".  Specifies behavior of deletion during `pulumi destroy`.  See above note.
	OnDelete pulumi.StringPtrInput
	// Key/Value pairs to be passed to the Dataflow job (as used in the template).
	Parameters pulumi.MapInput
	// The project in which the resource belongs. If it is not provided, the provider project is used.
	Project pulumi.StringPtrInput
	// The region in which the created job should run.
	Region pulumi.StringPtrInput
	// The Service Account email used to create the job.
	ServiceAccountEmail pulumi.StringPtrInput
	// The subnetwork to which VMs will be assigned. Should be of the form "regions/REGION/subnetworks/SUBNETWORK".
	Subnetwork pulumi.StringPtrInput
	// A writeable location on GCS for the Dataflow job to dump its temporary data.
	TempGcsLocation pulumi.StringInput
	// The GCS path to the Dataflow job template.
	TemplateGcsPath pulumi.StringInput
	// Only applicable when updating a pipeline. Map of transform name prefixes of the job to be replaced with the corresponding name prefixes of the new job. This field is not used outside of update.
	TransformNameMapping pulumi.MapInput
	// The zone in which the created job should run. If it is not provided, the provider zone is used.
	Zone pulumi.StringPtrInput
}

The set of arguments for constructing a Job resource.

func (JobArgs) ElementType

func (JobArgs) ElementType() reflect.Type

type JobInput added in v4.4.0

type JobInput interface {
	pulumi.Input

	ToJobOutput() JobOutput
	ToJobOutputWithContext(ctx context.Context) JobOutput
}

type JobOutput added in v4.4.0

type JobOutput struct {
	*pulumi.OutputState
}

func (JobOutput) ElementType added in v4.4.0

func (JobOutput) ElementType() reflect.Type

func (JobOutput) ToJobOutput added in v4.4.0

func (o JobOutput) ToJobOutput() JobOutput

func (JobOutput) ToJobOutputWithContext added in v4.4.0

func (o JobOutput) ToJobOutputWithContext(ctx context.Context) JobOutput

type JobState

type JobState struct {
	// List of experiments that should be used by the job. An example value is `["enableStackdriverAgentMetrics"]`.
	AdditionalExperiments pulumi.StringArrayInput
	// The configuration for VM IPs.  Options are `"WORKER_IP_PUBLIC"` or `"WORKER_IP_PRIVATE"`.
	IpConfiguration pulumi.StringPtrInput
	// The unique ID of this job.
	JobId pulumi.StringPtrInput
	// The name for the Cloud KMS key for the job. Key format is: `projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY`
	KmsKeyName pulumi.StringPtrInput
	// User labels to be specified for the job. Keys and values should follow the restrictions
	// specified in the [labeling restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions) page.
	// **NOTE**: Google-provided Dataflow templates often provide default labels that begin with `goog-dataflow-provided`.
	// Unless explicitly set in config, these labels will be ignored to prevent diffs on re-apply.
	Labels pulumi.MapInput
	// The machine type to use for the job.
	MachineType pulumi.StringPtrInput
	// The number of workers permitted to work on the job.  More workers may improve processing speed at additional cost.
	MaxWorkers pulumi.IntPtrInput
	// A unique name for the resource, required by Dataflow.
	Name pulumi.StringPtrInput
	// The network to which VMs will be assigned. If it is not provided, "default" will be used.
	Network pulumi.StringPtrInput
	// One of "drain" or "cancel".  Specifies behavior of deletion during `pulumi destroy`.  See above note.
	OnDelete pulumi.StringPtrInput
	// Key/Value pairs to be passed to the Dataflow job (as used in the template).
	Parameters pulumi.MapInput
	// The project in which the resource belongs. If it is not provided, the provider project is used.
	Project pulumi.StringPtrInput
	// The region in which the created job should run.
	Region pulumi.StringPtrInput
	// The Service Account email used to create the job.
	ServiceAccountEmail pulumi.StringPtrInput
	// The current state of the resource, selected from the [JobState enum](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState)
	State pulumi.StringPtrInput
	// The subnetwork to which VMs will be assigned. Should be of the form "regions/REGION/subnetworks/SUBNETWORK".
	Subnetwork pulumi.StringPtrInput
	// A writeable location on GCS for the Dataflow job to dump its temporary data.
	TempGcsLocation pulumi.StringPtrInput
	// The GCS path to the Dataflow job template.
	TemplateGcsPath pulumi.StringPtrInput
	// Only applicable when updating a pipeline. Map of transform name prefixes of the job to be replaced with the corresponding name prefixes of the new job. This field is not used outside of update.
	TransformNameMapping pulumi.MapInput
	// The type of this job, selected from the [JobType enum](https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobType)
	Type pulumi.StringPtrInput
	// The zone in which the created job should run. If it is not provided, the provider zone is used.
	Zone pulumi.StringPtrInput
}

func (JobState) ElementType

func (JobState) ElementType() reflect.Type

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL