libcontainer

package
v1.0.0-rc2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 29, 2016 License: Apache-2.0 Imports: 43 Imported by: 0

README

Libcontainer provides a native Go implementation for creating containers with namespaces, cgroups, capabilities, and filesystem access controls. It allows you to manage the lifecycle of the container performing additional operations after the container is created.

Container

A container is a self contained execution environment that shares the kernel of the host system and which is (optionally) isolated from other containers in the system.

Using libcontainer

Because containers are spawned in a two step process you will need a binary that will be executed as the init process for the container. In libcontainer, we use the current binary (/proc/self/exe) to be executed as the init process, and use arg "init", we call the first step process "bootstrap", so you always need a "init" function as the entry of "bootstrap".

func init() {
	if len(os.Args) > 1 && os.Args[1] == "init" {
		runtime.GOMAXPROCS(1)
		runtime.LockOSThread()
		factory, _ := libcontainer.New("")
		if err := factory.StartInitialization(); err != nil {
			logrus.Fatal(err)
		}
		panic("--this line should have never been executed, congratulations--")
	}
}

Then to create a container you first have to initialize an instance of a factory that will handle the creation and initialization for a container.

factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init"))
if err != nil {
	logrus.Fatal(err)
	return
}

Once you have an instance of the factory created we can create a configuration struct describing how the container is to be created. A sample would look similar to this:

defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
config := &configs.Config{
	Rootfs: "/your/path/to/rootfs",
	Capabilities: []string{
		"CAP_CHOWN",
		"CAP_DAC_OVERRIDE",
		"CAP_FSETID",
		"CAP_FOWNER",
		"CAP_MKNOD",
		"CAP_NET_RAW",
		"CAP_SETGID",
		"CAP_SETUID",
		"CAP_SETFCAP",
		"CAP_SETPCAP",
		"CAP_NET_BIND_SERVICE",
		"CAP_SYS_CHROOT",
		"CAP_KILL",
		"CAP_AUDIT_WRITE",
	},
	Namespaces: configs.Namespaces([]configs.Namespace{
		{Type: configs.NEWNS},
		{Type: configs.NEWUTS},
		{Type: configs.NEWIPC},
		{Type: configs.NEWPID},
		{Type: configs.NEWUSER},
		{Type: configs.NEWNET},
	}),
	Cgroups: &configs.Cgroup{
		Name:   "test-container",
		Parent: "system",
		Resources: &configs.Resources{
			MemorySwappiness: nil,
			AllowAllDevices:  nil,
			AllowedDevices:   configs.DefaultAllowedDevices,
		},
	},
	MaskPaths: []string{
		"/proc/kcore",
		"/sys/firmware",
	},
	ReadonlyPaths: []string{
		"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
	},
	Devices:  configs.DefaultAutoCreatedDevices,
	Hostname: "testing",
	Mounts: []*configs.Mount{
		{
			Source:      "proc",
			Destination: "/proc",
			Device:      "proc",
			Flags:       defaultMountFlags,
		},
		{
			Source:      "tmpfs",
			Destination: "/dev",
			Device:      "tmpfs",
			Flags:       syscall.MS_NOSUID | syscall.MS_STRICTATIME,
			Data:        "mode=755",
		},
		{
			Source:      "devpts",
			Destination: "/dev/pts",
			Device:      "devpts",
			Flags:       syscall.MS_NOSUID | syscall.MS_NOEXEC,
			Data:        "newinstance,ptmxmode=0666,mode=0620,gid=5",
		},
		{
			Device:      "tmpfs",
			Source:      "shm",
			Destination: "/dev/shm",
			Data:        "mode=1777,size=65536k",
			Flags:       defaultMountFlags,
		},
		{
			Source:      "mqueue",
			Destination: "/dev/mqueue",
			Device:      "mqueue",
			Flags:       defaultMountFlags,
		},
		{
			Source:      "sysfs",
			Destination: "/sys",
			Device:      "sysfs",
			Flags:       defaultMountFlags | syscall.MS_RDONLY,
		},
	},
	UidMappings: []configs.IDMap{
		{
			ContainerID: 0,
			HostID: 1000,
			Size: 65536,
		},
	},
	GidMappings: []configs.IDMap{
		{
			ContainerID: 0,
			HostID: 1000,
			Size: 65536,
		},
	},
	Networks: []*configs.Network{
		{
			Type:    "loopback",
			Address: "127.0.0.1/0",
			Gateway: "localhost",
		},
	},
	Rlimits: []configs.Rlimit{
		{
			Type: syscall.RLIMIT_NOFILE,
			Hard: uint64(1025),
			Soft: uint64(1025),
		},
	},
}

Once you have the configuration populated you can create a container:

container, err := factory.Create("container-id", config)
if err != nil {
	logrus.Fatal(err)
	return
}

To spawn bash as the initial process inside the container and have the processes pid returned in order to wait, signal, or kill the process:

process := &libcontainer.Process{
	Args:   []string{"/bin/bash"},
	Env:    []string{"PATH=/bin"},
	User:   "daemon",
	Stdin:  os.Stdin,
	Stdout: os.Stdout,
	Stderr: os.Stderr,
}

err := container.Run(process)
if err != nil {
	container.Destroy()
	logrus.Fatal(err)
	return
}

// wait for the process to finish.
_, err := process.Wait()
if err != nil {
	logrus.Fatal(err)
}

// destroy the container.
container.Destroy()

Additional ways to interact with a running container are:

// return all the pids for all processes running inside the container.
processes, err := container.Processes()

// get detailed cpu, memory, io, and network statistics for the container and
// it's processes.
stats, err := container.Stats()

// pause all processes inside the container.
container.Pause()

// resume all paused processes.
container.Resume()

// send signal to container's init process.
container.Signal(signal)

// update container resource constraints.
container.Set(config)

Checkpoint & Restore

libcontainer now integrates CRIU for checkpointing and restoring containers. This let's you save the state of a process running inside a container to disk, and then restore that state into a new process, on the same machine or on another machine.

criu version 1.5.2 or higher is required to use checkpoint and restore. If you don't already have criu installed, you can build it from source, following the online instructions. criu is also installed in the docker image generated when building libcontainer with docker.

Copyright and license

Code and documentation copyright 2014 Docker, inc. Code released under the Apache 2.0 license. Docs released under Creative commons.

Documentation

Overview

Package libcontainer provides a native Go implementation for creating containers with namespaces, cgroups, capabilities, and filesystem access controls. It allows you to manage the lifecycle of the container performing additional operations after the container is created.

Index

Constants

View Source
const (
	CRIU_CG_MODE_SOFT    cgMode = 3 + iota // restore cgroup properties if only dir created by criu
	CRIU_CG_MODE_FULL                      // always restore all cgroups and their properties
	CRIU_CG_MODE_STRICT                    // restore all, requiring them to not present in the system
	CRIU_CG_MODE_DEFAULT                   // the same as CRIU_CG_MODE_SOFT
)
View Source
const (
	InitMsg         uint16 = 62000
	CloneFlagsAttr  uint16 = 27281
	ConsolePathAttr uint16 = 27282
	NsPathsAttr     uint16 = 27283
	UidmapAttr      uint16 = 27284
	GidmapAttr      uint16 = 27285
	SetgroupAttr    uint16 = 27286
)

list of known message types we want to send to bootstrap program The number is randomly chosen to not conflict with known netlink types

View Source
const PR_SET_NO_NEW_PRIVS = 0x26

PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value the kernel

Variables

This section is empty.

Functions

func Cgroupfs

func Cgroupfs(l *LinuxFactory) error

Cgroupfs is an options func to configure a LinuxFactory to return containers that use the native cgroups filesystem implementation to create and manage cgroups.

func CriuPath added in v1.12.3

func CriuPath(criupath string) func(*LinuxFactory) error

CriuPath returns an option func to configure a LinuxFactory with the provided criupath

func InitArgs

func InitArgs(args ...string) func(*LinuxFactory) error

InitArgs returns an options func to configure a LinuxFactory with the provided init binary path and arguments.

func SystemdCgroups

func SystemdCgroups(l *LinuxFactory) error

SystemdCgroups is an options func to configure a LinuxFactory to return containers that use systemd to create and manage cgroups.

func TmpfsRoot

func TmpfsRoot(l *LinuxFactory) error

TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.

Types

type BaseContainer added in v0.0.5

type BaseContainer interface {
	// Returns the ID of the container
	ID() string

	// Returns the current status of the container.
	//
	// errors:
	// ContainerNotExists - Container no longer exists,
	// Systemerror - System error.
	Status() (Status, error)

	// State returns the current container's state information.
	//
	// errors:
	// SystemError - System error.
	State() (*State, error)

	// Returns the current config of the container.
	Config() configs.Config

	// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
	//
	// errors:
	// ContainerNotExists - Container no longer exists,
	// Systemerror - System error.
	//
	// Some of the returned PIDs may no longer refer to processes in the Container, unless
	// the Container state is PAUSED in which case every PID in the slice is valid.
	Processes() ([]int, error)

	// Returns statistics for the container.
	//
	// errors:
	// ContainerNotExists - Container no longer exists,
	// Systemerror - System error.
	Stats() (*Stats, error)

	// Set resources of container as configured
	//
	// We can use this to change resources when containers are running.
	//
	// errors:
	// SystemError - System error.
	Set(config configs.Config) error

	// Start a process inside the container. Returns error if process fails to
	// start. You can track process lifecycle with passed Process structure.
	//
	// errors:
	// ContainerNotExists - Container no longer exists,
	// ConfigInvalid - config is invalid,
	// ContainerPaused - Container is paused,
	// SystemError - System error.
	Start(process *Process) (err error)

	// Run immediatly starts the process inside the conatiner.  Returns error if process
	// fails to start.  It does not block waiting for the exec fifo  after start returns but
	// opens the fifo after start returns.
	//
	// errors:
	// ContainerNotExists - Container no longer exists,
	// ConfigInvalid - config is invalid,
	// ContainerPaused - Container is paused,
	// SystemError - System error.
	Run(process *Process) (err error)

	// Destroys the container after killing all running processes.
	//
	// Any event registrations are removed before the container is destroyed.
	// No error is returned if the container is already destroyed.
	//
	// errors:
	// SystemError - System error.
	Destroy() error

	// Signal sends the provided signal code to the container's initial process.
	//
	// errors:
	// SystemError - System error.
	Signal(s os.Signal) error

	// Exec signals the container to exec the users process at the end of the init.
	//
	// errors:
	// SystemError - System error.
	Exec() error
}

BaseContainer is a libcontainer container object.

Each container is thread-safe within the same process. Since a container can be destroyed by a separate process, any function may return that the container was not found. BaseContainer includes methods that are platform agnostic.

type BaseState added in v0.0.5

type BaseState struct {
	// ID is the container ID.
	ID string `json:"id"`

	// InitProcessPid is the init process id in the parent namespace.
	InitProcessPid int `json:"init_process_pid"`

	// InitProcessStartTime is the init process start time in clock cycles since boot time.
	InitProcessStartTime string `json:"init_process_start"`

	// Created is the unix timestamp for the creation time of the container in UTC
	Created time.Time `json:"created"`

	// Config is the container's configuration.
	Config configs.Config `json:"config"`
}

BaseState represents the platform agnostic pieces relating to a running container's state

type Boolmsg added in v0.0.9

type Boolmsg struct {
	Type  uint16
	Value bool
}

func (*Boolmsg) Len added in v0.0.9

func (msg *Boolmsg) Len() int

func (*Boolmsg) Serialize added in v0.0.9

func (msg *Boolmsg) Serialize() []byte

type Bytemsg added in v0.0.6

type Bytemsg struct {
	Type  uint16
	Value []byte
}

Bytemsg has the following representation | nlattr len | nlattr type | | value | pad |

func (*Bytemsg) Len added in v0.0.6

func (msg *Bytemsg) Len() int

func (*Bytemsg) Serialize added in v0.0.6

func (msg *Bytemsg) Serialize() []byte

type Console

type Console interface {
	io.ReadWriter
	io.Closer

	// Path returns the filesystem path to the slave side of the pty.
	Path() string

	// Fd returns the fd for the master of the pty.
	Fd() uintptr
}

Console represents a pseudo TTY.

func NewConsole added in v0.0.6

func NewConsole(uid, gid int) (Console, error)

NewConsole returns an initalized console that can be used within a container by copying bytes from the master side to the slave that is attached as the tty for the container's init process.

type Container

type Container interface {
	BaseContainer

	// Checkpoint checkpoints the running container's state to disk using the criu(8) utility.
	//
	// errors:
	// Systemerror - System error.
	Checkpoint(criuOpts *CriuOpts) error

	// Restore restores the checkpointed container to a running state using the criu(8) utility.
	//
	// errors:
	// Systemerror - System error.
	Restore(process *Process, criuOpts *CriuOpts) error

	// If the Container state is RUNNING or CREATED, sets the Container state to PAUSING and pauses
	// the execution of any user processes. Asynchronously, when the container finished being paused the
	// state is changed to PAUSED.
	// If the Container state is PAUSED, do nothing.
	//
	// errors:
	// ContainerNotExists - Container no longer exists,
	// ContainerNotRunning - Container not running or created,
	// Systemerror - System error.
	Pause() error

	// If the Container state is PAUSED, resumes the execution of any user processes in the
	// Container before setting the Container state to RUNNING.
	// If the Container state is RUNNING, do nothing.
	//
	// errors:
	// ContainerNotExists - Container no longer exists,
	// ContainerNotPaused - Container is not paused,
	// Systemerror - System error.
	Resume() error

	// NotifyOOM returns a read-only channel signaling when the container receives an OOM notification.
	//
	// errors:
	// Systemerror - System error.
	NotifyOOM() (<-chan struct{}, error)

	// NotifyMemoryPressure returns a read-only channel signaling when the container reaches a given pressure level
	//
	// errors:
	// Systemerror - System error.
	NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error)
}

Container is a libcontainer container object.

Each container is thread-safe within the same process. Since a container can be destroyed by a separate process, any function may return that the container was not found.

type CriuOpts

type CriuOpts struct {
	ImagesDirectory         string             // directory for storing image files
	WorkDirectory           string             // directory to cd and write logs/pidfiles/stats to
	LeaveRunning            bool               // leave container in running state after checkpoint
	TcpEstablished          bool               // checkpoint/restore established TCP connections
	ExternalUnixConnections bool               // allow external unix connections
	ShellJob                bool               // allow to dump and restore shell jobs
	FileLocks               bool               // handle file locks, for safety
	PageServer              CriuPageServerInfo // allow to dump to criu page server
	VethPairs               []VethPairName     // pass the veth to criu when restore
	ManageCgroupsMode       cgMode             // dump or restore cgroup mode
	EmptyNs                 uint32             // don't c/r properties for namespace from this mask
}

type CriuPageServerInfo

type CriuPageServerInfo struct {
	Address string // IP address of CRIU page server
	Port    int32  // port number of CRIU page server
}

type Error

type Error interface {
	error

	// Returns a verbose string including the error message
	// and a representation of the stack trace suitable for
	// printing.
	Detail(w io.Writer) error

	// Returns the error code for this error.
	Code() ErrorCode
}

Error is the API error type.

type ErrorCode

type ErrorCode int

ErrorCode is the API error code type.

const (
	// Factory errors
	IdInUse ErrorCode = iota
	InvalidIdFormat

	// Container errors
	ContainerNotExists
	ContainerPaused
	ContainerNotStopped
	ContainerNotRunning
	ContainerNotPaused

	// Process errors
	NoProcessOps

	// Common errors
	ConfigInvalid
	ConsoleExists
	SystemError
)

API error codes.

func (ErrorCode) String

func (c ErrorCode) String() string

type Factory

type Factory interface {
	// Creates a new container with the given id and starts the initial process inside it.
	// id must be a string containing only letters, digits and underscores and must contain
	// between 1 and 1024 characters, inclusive.
	//
	// The id must not already be in use by an existing container. Containers created using
	// a factory with the same path (and file system) must have distinct ids.
	//
	// Returns the new container with a running process.
	//
	// errors:
	// IdInUse - id is already in use by a container
	// InvalidIdFormat - id has incorrect format
	// ConfigInvalid - config is invalid
	// Systemerror - System error
	//
	// On error, any partially created container parts are cleaned up (the operation is atomic).
	Create(id string, config *configs.Config) (Container, error)

	// Load takes an ID for an existing container and returns the container information
	// from the state.  This presents a read only view of the container.
	//
	// errors:
	// Path does not exist
	// Container is stopped
	// System error
	Load(id string) (Container, error)

	// StartInitialization is an internal API to libcontainer used during the reexec of the
	// container.
	//
	// Errors:
	// Pipe connection error
	// System error
	StartInitialization() error

	// Type returns info string about factory type (e.g. lxc, libcontainer...)
	Type() string
}

func New

func New(root string, options ...func(*LinuxFactory) error) (Factory, error)

New returns a linux based container factory based in the root directory and configures the factory with the provided option funcs.

type IO added in v0.0.7

type IO struct {
	Stdin  io.WriteCloser
	Stdout io.ReadCloser
	Stderr io.ReadCloser
}

IO holds the process's STDIO

type Int32msg added in v0.0.6

type Int32msg struct {
	Type  uint16
	Value uint32
}

func (*Int32msg) Len added in v0.0.6

func (msg *Int32msg) Len() int

func (*Int32msg) Serialize added in v0.0.6

func (msg *Int32msg) Serialize() []byte

Serialize serializes the message. Int32msg has the following representation | nlattr len | nlattr type | | uint32 value |

type LinuxFactory

type LinuxFactory struct {
	// Root directory for the factory to store state.
	Root string

	// InitArgs are arguments for calling the init responsibilities for spawning
	// a container.
	InitArgs []string

	// CriuPath is the path to the criu binary used for checkpoint and restore of
	// containers.
	CriuPath string

	// Validator provides validation to container configurations.
	Validator validate.Validator

	// NewCgroupsManager returns an initialized cgroups manager for a single container.
	NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
}

LinuxFactory implements the default factory interface for linux based systems.

func (*LinuxFactory) Create

func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error)

func (*LinuxFactory) Load

func (l *LinuxFactory) Load(id string) (Container, error)

func (*LinuxFactory) StartInitialization

func (l *LinuxFactory) StartInitialization() (err error)

StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state This is a low level implementation detail of the reexec and should not be consumed externally

func (*LinuxFactory) Type

func (l *LinuxFactory) Type() string

type NetworkInterface

type NetworkInterface struct {
	// Name is the name of the network interface.
	Name string

	RxBytes   uint64
	RxPackets uint64
	RxErrors  uint64
	RxDropped uint64
	TxBytes   uint64
	TxPackets uint64
	TxErrors  uint64
	TxDropped uint64
}

type PressureLevel added in v0.0.7

type PressureLevel uint
const (
	LowPressure PressureLevel = iota
	MediumPressure
	CriticalPressure
)

type Process

type Process struct {
	// The command to be run followed by any arguments.
	Args []string

	// Env specifies the environment variables for the process.
	Env []string

	// User will set the uid and gid of the executing process running inside the container
	// local to the container's user and group configuration.
	User string

	// AdditionalGroups specifies the gids that should be added to supplementary groups
	// in addition to those that the user belongs to.
	AdditionalGroups []string

	// Cwd will change the processes current working directory inside the container's rootfs.
	Cwd string

	// Stdin is a pointer to a reader which provides the standard input stream.
	Stdin io.Reader

	// Stdout is a pointer to a writer which receives the standard output stream.
	Stdout io.Writer

	// Stderr is a pointer to a writer which receives the standard error stream.
	Stderr io.Writer

	// ExtraFiles specifies additional open files to be inherited by the container
	ExtraFiles []*os.File

	// Capabilities specify the capabilities to keep when executing the process inside the container
	// All capabilities not specified will be dropped from the processes capability mask
	Capabilities []string

	// AppArmorProfile specifies the profile to apply to the process and is
	// changed at the time the process is execed
	AppArmorProfile string

	// Label specifies the label to apply to the process.  It is commonly used by selinux
	Label string

	// NoNewPrivileges controls whether processes can gain additional privileges.
	NoNewPrivileges *bool

	// Rlimits specifies the resource limits, such as max open files, to set in the container
	// If Rlimits are not set, the container will inherit rlimits from the parent process
	Rlimits []configs.Rlimit
	// contains filtered or unexported fields
}

Process specifies the configuration and IO for a process inside a container.

func (*Process) ConsoleFromPath added in v0.0.6

func (p *Process) ConsoleFromPath(path string) error

ConsoleFromPath sets the process's console with the path provided

func (*Process) InitializeIO added in v0.0.7

func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error)

InitializeIO creates pipes for use with the process's STDIO and returns the opposite side for each

func (*Process) NewConsole

func (p *Process) NewConsole(rootuid, rootgid int) (Console, error)

NewConsole creates new console for process and returns it

func (Process) Pid

func (p Process) Pid() (int, error)

Pid returns the process ID

func (Process) Signal

func (p Process) Signal(sig os.Signal) error

Signal sends a signal to the Process.

func (Process) Wait

func (p Process) Wait() (*os.ProcessState, error)

Wait waits for the process to exit. Wait releases any resources associated with the Process

type State

type State struct {
	BaseState

	// Path to all the cgroups setup for a container. Key is cgroup subsystem name
	// with the value as the path.
	CgroupPaths map[string]string `json:"cgroup_paths"`

	// NamespacePaths are filepaths to the container's namespaces. Key is the namespace type
	// with the value as the path.
	NamespacePaths map[configs.NamespaceType]string `json:"namespace_paths"`

	// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
	ExternalDescriptors []string `json:"external_descriptors,omitempty"`
}

State represents a running container's state

type Stats

type Stats struct {
	Interfaces  []*NetworkInterface
	CgroupStats *cgroups.Stats
}

type Status

type Status int

Status is the status of a container.

const (
	// Created is the status that denotes the container exists but has not been run yet.
	Created Status = iota
	// Running is the status that denotes the container exists and is running.
	Running
	// Pausing is the status that denotes the container exists, it is in the process of being paused.
	Pausing
	// Paused is the status that denotes the container exists, but all its processes are paused.
	Paused
	// Stopped is the status that denotes the container does not have a created or running process.
	Stopped
)

func (Status) String added in v0.0.7

func (s Status) String() string

type VethPairName added in v0.0.4

type VethPairName struct {
	ContainerInterfaceName string
	HostInterfaceName      string
}

Directories

Path Synopsis
fs
Package criurpc is a generated protocol buffer package.
Package criurpc is a generated protocol buffer package.
integration is used for integration testing of libcontainer
integration is used for integration testing of libcontainer
Package specconv implements conversion of specifications to libcontainer configurations
Package specconv implements conversion of specifications to libcontainer configurations

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL