fastsync

package module
v0.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 6, 2024 License: MIT Imports: 23 Imported by: 0

README

FastSync

Fast syncronization across networks using speedy compression, lots of parallelization and fast hashmaps for keeping track of things internally

GitHub release (latest SemVer) GitHub all releases GitHub Workflow Status

I made this because a customer asked me to transfer 100TB from one system to another. The data were raw backups with billions of files that were hardlinked together. Using rsync it seemed to progress very slowly and not load the RAID system enough - and it would always end badly with the machine running out of memory

VERIFY YOUR DATA AFTER USING

this is just something I whisked together, so handle with care

Features:

  • server and client - sends files from server to client
  • preserves timestamps, owner UID, group GID, attributes
  • handles character devices, hardlinks, softlinks etc.
  • compresses data over the wire using snappy compression
  • very performant - I've seen speeds up to ~90K files processed/sec when resyncing

FastSync consists of:

Server mode

Start up the source side, listening for unauthenticated clients

fastsync [--directory /your/source/directory] [--bind 0.0.0.0:7331] server

Client mode

Connects to the server and starts syncing files to the client

fastsync [--hardlinks true] [--checksum false] [--delete false] [--acl true] [--pfile 4096] [--pdir 512] [--loglevel info] [--blocksize 131072] [--statsinterval 5] [--queueinterval 30] [--directory /your/target/directory] [--bind serverip:7331] client

Options:

  • pfile sets the number of parallel file IO operations, for large RAID systems with lots of drives or flash storage the default 4096 is probably okay, but expect major load on both systems

  • pdir sets the number of parallel directory listing operations

  • checksum forces fastsync to check all data on all existing files using checksums for every block (otherwise it assumes files with same size, timestamp and attributes are equal)

  • hardlinks enables keeping the same files hardlinked across the network, this is default enabled, and should do no harm even if you don't use hardlinks

  • loglevel sets the verbosity, you can use error, info, debug and trace

  • blocksize is the number of bytes to checksum and the size of the data blocks transferred across the network. If you increase this too much, the RPC traffic will get "choppy" and the parallelization will suffer. If you're running on gigabit the default is probably fine, but if it's 10Gbps I'd probably increase this

  • statsinterval is how often to output performance data, set to 0 to disable

  • queueinterval is how often to output internal queue data, set to 0 to disable (mostly for debugging)

Documentation

Index

Constants

View Source
const PROTOCOLVERSION = 1

Variables

View Source
var (
	ErrNotSupportedByPlatform = errors.New("not supported on this platform")
	ErrTypeError              = errors.New("type error")
)
View Source
var ErrPleaseSayHello = errors.New("Client needs to say hello")
View Source
var ErrPleaseSayHelloOnce = errors.New("Client needs to say hello just once")

Functions

func CompressedReadWriteCloser

func CompressedReadWriteCloser(rwc io.ReadWriteCloser) io.ReadWriteCloser

func NewPerformance

func NewPerformance() *performance

func NewStack

func NewStack[T any](inbuffer, outbuffer int) (*stack[T], <-chan T, chan<- T)

Types

type AtomicAdder

type AtomicAdder func(uint64)

type Client

type Client struct {
	BasePath string

	AlwaysChecksum bool

	Options SharedOptions

	Delete bool

	ParallelFile, ParallelDir, QueueSize int
	PreserveHardlinks                    bool
	BlockSize                            int

	Perf *performance
	// contains filtered or unexported fields
}

func NewClient

func NewClient() *Client

func (*Client) Abort

func (c *Client) Abort()

func (*Client) Done

func (c *Client) Done() bool

func (*Client) PostProcessDir

func (c *Client) PostProcessDir(item *dirinfo)

func (*Client) ProcessedItemInDir

func (c *Client) ProcessedItemInDir(path string)

func (*Client) Run

func (c *Client) Run(client *rpc.Client) error

func (*Client) Stats

func (c *Client) Stats() (inodes, directories, filequeue, directoriestack int)

func (*Client) Wait

func (c *Client) Wait()

type FileInfo

type FileInfo struct {
	Name  string
	Mode  fs.FileMode // Go simplified file type, not for chmod
	Size  int64
	IsDir bool

	Permissions uint32
	Xattrs      map[string][]byte // xattrs contain ACL !!

	Owner, Group uint32
	Inode, Nlink uint64
	Dev, Rdev    uint64
	LinkTo       string

	Atim syscall.Timespec
	Mtim syscall.Timespec
	Ctim syscall.Timespec
}

func InfoToFileInfo

func InfoToFileInfo(info os.FileInfo, absolutepath string) (FileInfo, error)

func PathToFileInfo

func PathToFileInfo(absolutepath string) (FileInfo, error)

func (FileInfo) ApplyChanges

func (fi FileInfo) ApplyChanges(fi2 FileInfo) error

func (FileInfo) Chmod

func (fi FileInfo) Chmod(fi2 FileInfo) error

func (*FileInfo) Chown

func (fi *FileInfo) Chown(fi2 FileInfo) error

func (*FileInfo) CodecDecodeSelf

func (x *FileInfo) CodecDecodeSelf(d *codec1978.Decoder)

func (*FileInfo) CodecEncodeSelf

func (x *FileInfo) CodecEncodeSelf(e *codec1978.Encoder)

func (FileInfo) Create

func (fi FileInfo) Create(fi2 FileInfo) error

func (*FileInfo) IsCodecEmpty

func (x *FileInfo) IsCodecEmpty() bool

func (FileInfo) SetTimestamps

func (fi FileInfo) SetTimestamps(fi2 FileInfo) error

type FileListResponse

type FileListResponse struct {
	ParentDirectory string
	Files           []FileInfo
}

func (*FileListResponse) CodecDecodeSelf

func (x *FileListResponse) CodecDecodeSelf(d *codec1978.Decoder)

func (*FileListResponse) CodecEncodeSelf

func (x *FileListResponse) CodecEncodeSelf(e *codec1978.Encoder)

func (*FileListResponse) IsCodecEmpty

func (x *FileListResponse) IsCodecEmpty() bool

type GetChunkArgs

type GetChunkArgs struct {
	Path   string
	Offset uint64
	Size   uint64
}

func (*GetChunkArgs) CodecDecodeSelf

func (x *GetChunkArgs) CodecDecodeSelf(d *codec1978.Decoder)

func (*GetChunkArgs) CodecEncodeSelf

func (x *GetChunkArgs) CodecEncodeSelf(e *codec1978.Encoder)

func (*GetChunkArgs) IsCodecEmpty

func (x *GetChunkArgs) IsCodecEmpty() bool

type PerformanceCounterType

type PerformanceCounterType int

performance related stuff

const (
	SentOverWire PerformanceCounterType = iota
	RecievedOverWire
	SentBytes
	RecievedBytes
	WrittenBytes
	ReadBytes
	BytesProcessed
	FilesProcessed
	DirectoriesProcessed
	EntriesDeleted
	FileQueue
	FolderQueue
)

type PerformanceEntry

type PerformanceEntry struct {
	// contains filtered or unexported fields
}

func (PerformanceEntry) Add

func (PerformanceEntry) Get

type PerformanceWrapperReadWriteCloser

type PerformanceWrapperReadWriteCloser struct {
	// contains filtered or unexported fields
}

func NewPerformanceWrapper

func NewPerformanceWrapper(rwc io.ReadWriteCloser, onRead, onWrite AtomicAdder) *PerformanceWrapperReadWriteCloser

func (*PerformanceWrapperReadWriteCloser) Close

func (*PerformanceWrapperReadWriteCloser) Read

func (*PerformanceWrapperReadWriteCloser) Write

func (pw *PerformanceWrapperReadWriteCloser) Write(b []byte) (int, error)

type Server

type Server struct {
	BasePath string

	Options SharedOptions

	ReadOnly bool

	Perf *performance
	// contains filtered or unexported fields
}

func NewServer

func NewServer() *Server

func (*Server) ChecksumChunk

func (s *Server) ChecksumChunk(args GetChunkArgs, checksum *uint64) error

func (*Server) Close

func (s *Server) Close(path string, reply *interface{}) error

func (*Server) GetChunk

func (s *Server) GetChunk(args GetChunkArgs, data *[]byte) error

func (*Server) Hello

func (s *Server) Hello(options SharedOptions, reply *any) error

func (*Server) List

func (s *Server) List(path string, reply *FileListResponse) error

func (*Server) Open

func (s *Server) Open(path string, reply *interface{}) error

func (*Server) Shutdown

func (s *Server) Shutdown(input any, reply *any) error

func (*Server) Stat

func (s *Server) Stat(path string, reply *FileInfo) error

func (*Server) Wait

func (s *Server) Wait()

type SharedOptions

type SharedOptions struct {
	ProtocolVersion int
	SendXattr       bool
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL