Documentation ¶
Overview ¶
Package cluster implements a cluster state machine. It relies on a cluster wide keyvalue store for coordinating the state of the cluster. It also stores the state of the cluster in this keyvalue store.
Index ¶
- Constants
- Variables
- func ExternalIp(config *config.ClusterConfig) (string, string, error)
- func Init(cfg config.ClusterConfig) error
- type Cluster
- type ClusterAlerts
- type ClusterData
- type ClusterInfo
- type ClusterInitState
- type ClusterListener
- type ClusterListenerAlertOps
- type ClusterListenerGenericOps
- type ClusterListenerNodeOps
- type ClusterListenerStatusOps
- type ClusterManager
- func (c *ClusterManager) AddEventListener(listener ClusterListener) error
- func (c *ClusterManager) ClearAlert(resource api.ResourceType, alertID int64) error
- func (c *ClusterManager) DeleteNodeConf(nodeID string) error
- func (c *ClusterManager) DisableUpdates() error
- func (c *ClusterManager) EnableUpdates() error
- func (c *ClusterManager) Enumerate() (api.Cluster, error)
- func (c *ClusterManager) EnumerateAlerts(ts, te time.Time, resource api.ResourceType) (*api.Alerts, error)
- func (c *ClusterManager) EnumerateNodeConf() (*osdconfig.NodesConfig, error)
- func (c *ClusterManager) EraseAlert(resource api.ResourceType, alertID int64) error
- func (c *ClusterManager) GetClusterConf() (*osdconfig.ClusterConfig, error)
- func (c *ClusterManager) GetData() (map[string]*api.Node, error)
- func (c *ClusterManager) GetGossipState() *ClusterState
- func (c *ClusterManager) GetNodeConf(nodeID string) (*osdconfig.NodeConfig, error)
- func (c *ClusterManager) GetNodeIdFromIp(idIp string) (string, error)
- func (c *ClusterManager) HandleNotifications(culpritNodeId string, notification api.ClusterNotify) (string, error)
- func (c *ClusterManager) Inspect(nodeID string) (api.Node, error)
- func (c *ClusterManager) NodeRemoveDone(nodeID string, result error)
- func (c *ClusterManager) NodeStatus() (api.Status, error)
- func (c *ClusterManager) ObjectStoreCreate(volumeID string) (*api.ObjectstoreInfo, error)
- func (c *ClusterManager) ObjectStoreDelete(objectstoreID string) error
- func (c *ClusterManager) ObjectStoreInspect(objectstoreID string) (*api.ObjectstoreInfo, error)
- func (c *ClusterManager) ObjectStoreUpdate(objectstoreID string, enable bool) error
- func (c *ClusterManager) PeerStatus(listenerName string) (map[string]api.Status, error)
- func (c *ClusterManager) Remove(nodes []api.Node, forceRemove bool) error
- func (c *ClusterManager) SchedPolicyCreate(name, sched string) error
- func (c *ClusterManager) SchedPolicyDelete(name string) error
- func (c *ClusterManager) SchedPolicyEnumerate() ([]*sched.SchedPolicy, error)
- func (c *ClusterManager) SchedPolicyGet(name string) (*sched.SchedPolicy, error)
- func (c *ClusterManager) SchedPolicyUpdate(name, sched string) error
- func (c *ClusterManager) SetClusterConf(config *osdconfig.ClusterConfig) error
- func (c *ClusterManager) SetNodeConf(config *osdconfig.NodeConfig) error
- func (c *ClusterManager) SetSize(size int) error
- func (c *ClusterManager) Shutdown() error
- func (c *ClusterManager) Start(clusterMaxSize int, nodeInitialized bool, gossipPort string) error
- func (c *ClusterManager) StartWithConfiguration(clusterMaxSize int, nodeInitialized bool, gossipPort string, ...) error
- func (c *ClusterManager) UpdateData(nodeData map[string]interface{}) error
- func (c *ClusterManager) UpdateLabels(nodeLabels map[string]string) error
- type ClusterNotify
- type ClusterRemove
- type ClusterServerConfiguration
- type ClusterState
- type ClusterStatus
- type FinalizeInitCb
- type NodeEntry
- type NullClusterListener
- func (nc *NullClusterListener) Add(node *api.Node) error
- func (nc *NullClusterListener) CanNodeRemove(node *api.Node) (string, error)
- func (nc *NullClusterListener) CleanupInit(self *api.Node, clusterInfo *ClusterInfo) error
- func (nc *NullClusterListener) ClearAlert(resource api.ResourceType, alertID int64) error
- func (nc *NullClusterListener) ClusterInit(self *api.Node) error
- func (nc *NullClusterListener) Enumerate(cluster api.Cluster) error
- func (nc *NullClusterListener) EnumerateAlerts(timeStart, timeEnd time.Time, resource api.ResourceType) (*api.Alerts, error)
- func (nc *NullClusterListener) EraseAlert(resource api.ResourceType, alertID int64) error
- func (nc *NullClusterListener) Halt(self *api.Node, clusterInfo *ClusterInfo) error
- func (nc *NullClusterListener) Init(self *api.Node, state *ClusterInfo) (FinalizeInitCb, error)
- func (nc *NullClusterListener) Join(self *api.Node, state *ClusterInitState, clusterNotify ClusterNotify) error
- func (nc *NullClusterListener) JoinComplete(self *api.Node) error
- func (nc *NullClusterListener) Leave(node *api.Node) error
- func (nc *NullClusterListener) ListenerData() map[string]interface{}
- func (nc *NullClusterListener) ListenerPeerStatus() map[string]api.Status
- func (nc *NullClusterListener) ListenerStatus() api.Status
- func (nc *NullClusterListener) MarkNodeDown(node *api.Node) error
- func (nc *NullClusterListener) QuorumMember(node *api.Node) bool
- func (nc *NullClusterListener) Remove(node *api.Node, forceRemove bool) error
- func (nc *NullClusterListener) String() string
- func (nc *NullClusterListener) Update(node *api.Node) error
- func (nc *NullClusterListener) UpdateCluster(self *api.Node, clusterInfo *ClusterInfo) error
Constants ¶
const ( // APIVersion for cluster APIs APIVersion = "v1" // APIBase url for cluster APIs APIBase = "/var/lib/osd/cluster/" )
const (
// ClusterDBKey is the key at which cluster info is store in kvdb
ClusterDBKey = "cluster/database"
)
Variables ¶
var ( // ErrNodeRemovePending is returned when Node remove does not succeed and is // kept in pending state ErrNodeRemovePending = errors.New("Node remove is pending") ErrInitNodeNotFound = errors.New("This node is already initialized but " + "could not be found in the cluster map.") ErrNodeDecommissioned = errors.New("Node is decomissioned.") ErrRemoveCausesDataLoss = errors.New("Cannot remove node without data loss") )
var ( // Inst returns an instance of an already instantiated cluster manager. // This function can be overridden for testing purposes Inst = func() (Cluster, error) { return clusterInst() } )
Functions ¶
func ExternalIp ¶
func ExternalIp(config *config.ClusterConfig) (string, string, error)
ExternalIp returns the mgmt and data ip based on the config
Types ¶
type Cluster ¶
type Cluster interface { // Inspect the node given a UUID. Inspect(string) (api.Node, error) // AddEventListener adds an event listener and exposes cluster events. AddEventListener(ClusterListener) error // Enumerate lists all the nodes in the cluster. Enumerate() (api.Cluster, error) // SetSize sets the maximum number of nodes in a cluster. SetSize(size int) error // Shutdown can be called when THIS node is gracefully shutting down. Shutdown() error // Start starts the cluster manager and state machine. // It also causes this node to join the cluster. // nodeInitialized indicates if the caller of this method expects the node // to have been in an already-initialized state. // All managers will default returning NotSupported. Start(clusterSize int, nodeInitialized bool, gossipPort string) error // Like Start, but have the ability to pass in managers to the cluster object StartWithConfiguration(clusterMaxSize int, nodeInitialized bool, gossipPort string, config *ClusterServerConfiguration) error ClusterData ClusterRemove ClusterStatus ClusterAlerts osdconfig.ConfigCaller secrets.Secrets sched.SchedulePolicyProvider objectstore.ObjectStore }
Cluster is the API that a cluster provider will implement.
type ClusterAlerts ¶
type ClusterAlerts interface { // Enumerate enumerates alerts on this cluster for the given resource // within a specific time range. EnumerateAlerts(timeStart, timeEnd time.Time, resource api.ResourceType) (*api.Alerts, error) // ClearAlert clears an alert for the given resource ClearAlert(resource api.ResourceType, alertID int64) error // EraseAlert erases an alert for the given resource EraseAlert(resource api.ResourceType, alertID int64) error }
type ClusterData ¶
type ClusterData interface { // UpdateData updates node data associated with this node UpdateData(nodeData map[string]interface{}) error // UpdateLabels updates node labels associated with this node UpdateLabels(nodeLabels map[string]string) error // GetData get sdata associated with all nodes. // Key is the node id GetData() (map[string]*api.Node, error) // GetNodeIdFromIp returns a Node Id given an IP. GetNodeIdFromIp(idIp string) (string, error) // EnableUpdate cluster data updates to be sent to listeners EnableUpdates() error // DisableUpdates disables cluster data updates to be sent to listeners DisableUpdates() error // GetGossipState returns the state of nodes according to gossip GetGossipState() *ClusterState }
ClusterData interface provides apis to handle data of the cluster
type ClusterInfo ¶
ClusterInfo is the basic info about the cluster and its nodes
type ClusterInitState ¶
type ClusterInitState struct { ClusterInfo *ClusterInfo InitDb kvdb.Kvdb Version uint64 Collector kvdb.UpdatesCollector }
ClusterInitState is the snapshot state which should be used to initialize
type ClusterListener ¶
type ClusterListener interface { // String returns a string representation of this listener. String() string // ClusterInit is called when a brand new cluster is initialized. ClusterInit(self *api.Node) error // Init is called when this node is joining an existing cluster for the first time. Init(self *api.Node, state *ClusterInfo) (FinalizeInitCb, error) // Join is called when this node is joining an existing cluster. Join(self *api.Node, state *ClusterInitState, clusterNotify ClusterNotify) error // JoinComplete is called when this node has successfully joined a cluster JoinComplete(self *api.Node) error // CleanupInit is called when Init failed. CleanupInit(self *api.Node, clusterInfo *ClusterInfo) error // Halt is called when a node is gracefully shutting down. Halt(self *api.Node, clusterInfo *ClusterInfo) error ClusterListenerNodeOps ClusterListenerStatusOps ClusterListenerGenericOps ClusterListenerAlertOps }
ClusterListener is an interface to be implemented by a storage driver if it is participating in a multi host environment. It exposes events in the cluster state machine. The basic set of APIs determine the lifecycle of a node and comprises of two operations 1. Setup ClusterInit -> (Node)Init -> Join -> JoinComplete 2. Teardown Halt -> CleanupInit The other APIs are helpers for cluster manager.
type ClusterListenerAlertOps ¶
type ClusterListenerAlertOps interface { ClusterAlerts }
ClusterListenerAlertOps is a wrapper over ClusterAlerts interface which the listeners need to implement if they want to handle alerts
type ClusterListenerGenericOps ¶
type ClusterListenerGenericOps interface { // ListenerData returns the data that the listener wants to share // with ClusterManager and would be stored in NodeData field. ListenerData() map[string]interface{} // QuorumMember returns true if the listener wants this node to // participate in quorum decisions. QuorumMember(node *api.Node) bool // UpdateClusterInfo is called when there is an update to the cluster. // XXX: Remove ClusterInfo from this API UpdateCluster(self *api.Node, clusterInfo *ClusterInfo) error // Enumerate updates listener specific data in Enumerate. Enumerate(cluster api.Cluster) error }
ClusterListenerGenericOps defines a set of generic helper APIs for listeners to implement
type ClusterListenerNodeOps ¶
type ClusterListenerNodeOps interface { // Add is called when a new node joins the cluster. Add(node *api.Node) error // Remove is called when a node leaves the cluster Remove(node *api.Node, forceRemove bool) error // CanNodeRemove test to see if we can remove this node CanNodeRemove(node *api.Node) (string, error) // MarkNodeDown marks the given node's status as down MarkNodeDown(node *api.Node) error // Update is called when a node status changes significantly // in the cluster changes. Update(node *api.Node) error // Leave is called when this node leaves the cluster. Leave(node *api.Node) error }
ClusterListenerNodeOps defines APIs that a listener needs to implement to handle various node operations/updates
type ClusterListenerStatusOps ¶
type ClusterListenerStatusOps interface { // ListenerStatus returns the listener's Status ListenerStatus() api.Status // ListenerPeerStatus returns the peer Statuses for a listener ListenerPeerStatus() map[string]api.Status }
ClusterListenerStatusOps defines APIs that a listener needs to implement to indicate its own/peer statuses
type ClusterManager ¶
ClusterManager implements the cluster interface
func (*ClusterManager) AddEventListener ¶
func (c *ClusterManager) AddEventListener(listener ClusterListener) error
AddEventListener adds a new listener
func (*ClusterManager) ClearAlert ¶
func (c *ClusterManager) ClearAlert(resource api.ResourceType, alertID int64) error
func (*ClusterManager) DeleteNodeConf ¶
func (c *ClusterManager) DeleteNodeConf(nodeID string) error
func (*ClusterManager) DisableUpdates ¶
func (c *ClusterManager) DisableUpdates() error
DisableUpdates disables gossip updates
func (*ClusterManager) EnableUpdates ¶
func (c *ClusterManager) EnableUpdates() error
EnableUpdates enables gossip updates
func (*ClusterManager) Enumerate ¶
func (c *ClusterManager) Enumerate() (api.Cluster, error)
Enumerate lists all the nodes in the cluster.
func (*ClusterManager) EnumerateAlerts ¶
func (c *ClusterManager) EnumerateAlerts(ts, te time.Time, resource api.ResourceType) (*api.Alerts, error)
func (*ClusterManager) EnumerateNodeConf ¶
func (c *ClusterManager) EnumerateNodeConf() (*osdconfig.NodesConfig, error)
func (*ClusterManager) EraseAlert ¶
func (c *ClusterManager) EraseAlert(resource api.ResourceType, alertID int64) error
func (*ClusterManager) GetClusterConf ¶
func (c *ClusterManager) GetClusterConf() (*osdconfig.ClusterConfig, error)
osdconfig.ConfigCaller compliance
func (*ClusterManager) GetData ¶
func (c *ClusterManager) GetData() (map[string]*api.Node, error)
GetData returns self node's data
func (*ClusterManager) GetGossipState ¶
func (c *ClusterManager) GetGossipState() *ClusterState
GetGossipState returns current gossip state
func (*ClusterManager) GetNodeConf ¶
func (c *ClusterManager) GetNodeConf(nodeID string) (*osdconfig.NodeConfig, error)
func (*ClusterManager) GetNodeIdFromIp ¶
func (c *ClusterManager) GetNodeIdFromIp(idIp string) (string, error)
GetNodeIdFromIp returns a Node Id given an IP.
func (*ClusterManager) HandleNotifications ¶
func (c *ClusterManager) HandleNotifications(culpritNodeId string, notification api.ClusterNotify) (string, error)
HandleNotifications is a callback function used by the listeners
func (*ClusterManager) Inspect ¶
func (c *ClusterManager) Inspect(nodeID string) (api.Node, error)
Inspect inspects given node and returns the state
func (*ClusterManager) NodeRemoveDone ¶
func (c *ClusterManager) NodeRemoveDone(nodeID string, result error)
NodeRemoveDone is called from the listeners when their job of Node removal is done.
func (*ClusterManager) NodeStatus ¶
func (c *ClusterManager) NodeStatus() (api.Status, error)
NodeStatus returns the status of a node. It compares the status maintained by the cluster manager and the provided listener and returns the appropriate one
func (*ClusterManager) ObjectStoreCreate ¶
func (c *ClusterManager) ObjectStoreCreate(volumeID string) (*api.ObjectstoreInfo, error)
func (*ClusterManager) ObjectStoreDelete ¶
func (c *ClusterManager) ObjectStoreDelete(objectstoreID string) error
func (*ClusterManager) ObjectStoreInspect ¶
func (c *ClusterManager) ObjectStoreInspect(objectstoreID string) (*api.ObjectstoreInfo, error)
func (*ClusterManager) ObjectStoreUpdate ¶
func (c *ClusterManager) ObjectStoreUpdate(objectstoreID string, enable bool) error
func (*ClusterManager) PeerStatus ¶
PeerStatus returns the status of a peer node as seen by us
func (*ClusterManager) Remove ¶
func (c *ClusterManager) Remove(nodes []api.Node, forceRemove bool) error
Remove node(s) from the cluster permanently.
func (*ClusterManager) SchedPolicyCreate ¶
func (c *ClusterManager) SchedPolicyCreate(name, sched string) error
func (*ClusterManager) SchedPolicyDelete ¶
func (c *ClusterManager) SchedPolicyDelete(name string) error
func (*ClusterManager) SchedPolicyEnumerate ¶
func (c *ClusterManager) SchedPolicyEnumerate() ([]*sched.SchedPolicy, error)
func (*ClusterManager) SchedPolicyGet ¶
func (c *ClusterManager) SchedPolicyGet(name string) (*sched.SchedPolicy, error)
func (*ClusterManager) SchedPolicyUpdate ¶
func (c *ClusterManager) SchedPolicyUpdate(name, sched string) error
func (*ClusterManager) SetClusterConf ¶
func (c *ClusterManager) SetClusterConf(config *osdconfig.ClusterConfig) error
func (*ClusterManager) SetNodeConf ¶
func (c *ClusterManager) SetNodeConf(config *osdconfig.NodeConfig) error
func (*ClusterManager) SetSize ¶
func (c *ClusterManager) SetSize(size int) error
SetSize sets the maximum number of nodes in a cluster.
func (*ClusterManager) Shutdown ¶
func (c *ClusterManager) Shutdown() error
Shutdown can be called when THIS node is gracefully shutting down.
func (*ClusterManager) Start ¶
func (c *ClusterManager) Start( clusterMaxSize int, nodeInitialized bool, gossipPort string, ) error
Start initiates the cluster manager and the cluster state machine
func (*ClusterManager) StartWithConfiguration ¶
func (c *ClusterManager) StartWithConfiguration( clusterMaxSize int, nodeInitialized bool, gossipPort string, config *ClusterServerConfiguration, ) error
func (*ClusterManager) UpdateData ¶
func (c *ClusterManager) UpdateData(nodeData map[string]interface{}) error
UpdateData updates self node data
func (*ClusterManager) UpdateLabels ¶
func (c *ClusterManager) UpdateLabels(nodeLabels map[string]string) error
type ClusterNotify ¶
type ClusterNotify func(string, api.ClusterNotify) (string, error)
ClusterNotify is the callback function listeners can use to notify cluster manager
type ClusterRemove ¶
type ClusterRemove interface { // Remove node(s) from the cluster permanently. Remove(nodes []api.Node, forceRemove bool) error // NodeRemoveDone notify cluster manager NodeRemove is done. NodeRemoveDone(nodeID string, result error) }
ClusterRemove interface provides apis for removing nodes from a cluster
type ClusterServerConfiguration ¶
type ClusterServerConfiguration struct { // holds implementation to Secrets interface ConfigSecretManager secrets.Secrets // holds implementeation to SchedulePolicy interface ConfigSchedManager sched.SchedulePolicyProvider // holds implementation to ObjectStore interface ConfigObjectStoreManager objectstore.ObjectStore }
ClusterServerConfiguration holds manager implementation Caller has to create the manager and passes it in
type ClusterState ¶
ClusterState is the gossip state of all nodes in the cluster
type ClusterStatus ¶
type ClusterStatus interface { // NodeStatus returns the status of THIS node as seen by the Cluster Provider // for a given listener. If listenerName is empty it returns the status of // THIS node maintained by the Cluster Provider. // At any time the status of the Cluster Provider takes precedence over // the status of listener. Precedence is determined by the severity of the status. NodeStatus() (api.Status, error) // PeerStatus returns the statuses of all peer nodes as seen by the // Cluster Provider for a given listener. If listenerName is empty is returns the // statuses of all peer nodes as maintained by the ClusterProvider (gossip) PeerStatus(listenerName string) (map[string]api.Status, error) }
ClusterStatus interface provides apis for cluster and node status
type FinalizeInitCb ¶
type FinalizeInitCb func() error
FinalizeInitCb is invoked when init is complete and is in the process of updating the cluster database. This callback is invoked under lock and must finish quickly, else it will slow down other node joins.
type NodeEntry ¶
type NodeEntry struct { Id string MgmtIp string DataIp string GenNumber uint64 StartTime time.Time MemTotal uint64 Hostname string Status api.Status NodeLabels map[string]string NonQuorumMember bool }
NodeEntry is used to discover other nodes in the cluster and setup the gossip protocol with them.
type NullClusterListener ¶
type NullClusterListener struct { }
NullClusterListener is a NULL implementation of ClusterListener functions ClusterListeners should use this as the base override functions they are interested in.
func (*NullClusterListener) CanNodeRemove ¶
func (nc *NullClusterListener) CanNodeRemove(node *api.Node) (string, error)
func (*NullClusterListener) CleanupInit ¶
func (nc *NullClusterListener) CleanupInit( self *api.Node, clusterInfo *ClusterInfo, ) error
func (*NullClusterListener) ClearAlert ¶
func (nc *NullClusterListener) ClearAlert( resource api.ResourceType, alertID int64, ) error
func (*NullClusterListener) ClusterInit ¶
func (nc *NullClusterListener) ClusterInit(self *api.Node) error
func (*NullClusterListener) Enumerate ¶
func (nc *NullClusterListener) Enumerate(cluster api.Cluster) error
func (*NullClusterListener) EnumerateAlerts ¶
func (nc *NullClusterListener) EnumerateAlerts( timeStart, timeEnd time.Time, resource api.ResourceType, ) (*api.Alerts, error)
func (*NullClusterListener) EraseAlert ¶
func (nc *NullClusterListener) EraseAlert( resource api.ResourceType, alertID int64, ) error
func (*NullClusterListener) Halt ¶
func (nc *NullClusterListener) Halt( self *api.Node, clusterInfo *ClusterInfo) error
func (*NullClusterListener) Init ¶
func (nc *NullClusterListener) Init(self *api.Node, state *ClusterInfo) (FinalizeInitCb, error)
func (*NullClusterListener) Join ¶
func (nc *NullClusterListener) Join( self *api.Node, state *ClusterInitState, clusterNotify ClusterNotify, ) error
func (*NullClusterListener) JoinComplete ¶
func (nc *NullClusterListener) JoinComplete( self *api.Node, ) error
func (*NullClusterListener) ListenerData ¶
func (nc *NullClusterListener) ListenerData() map[string]interface{}
func (*NullClusterListener) ListenerPeerStatus ¶
func (nc *NullClusterListener) ListenerPeerStatus() map[string]api.Status
func (*NullClusterListener) ListenerStatus ¶
func (nc *NullClusterListener) ListenerStatus() api.Status
func (*NullClusterListener) MarkNodeDown ¶
func (nc *NullClusterListener) MarkNodeDown(node *api.Node) error
func (*NullClusterListener) QuorumMember ¶
func (nc *NullClusterListener) QuorumMember(node *api.Node) bool
func (*NullClusterListener) Remove ¶
func (nc *NullClusterListener) Remove(node *api.Node, forceRemove bool) error
func (*NullClusterListener) String ¶
func (nc *NullClusterListener) String() string
func (*NullClusterListener) UpdateCluster ¶
func (nc *NullClusterListener) UpdateCluster(self *api.Node, clusterInfo *ClusterInfo, ) error