Documentation ¶
Overview ¶
Package vcfgo implements a Reader and Writer for variant call format. It eases reading, filtering modifying VCF's even if they are not to spec. Example:
f, _ := os.Open("examples/test.auto_dom.no_parents.vcf") rdr, err := vcfgo.NewReader(f) if err != nil { panic(err) } for { variant := rdr.Read() if variant == nil { break } fmt.Printf("%s\t%d\t%s\t%s\n", variant.Chromosome, variant.Pos, variant.Ref, variant.Alt) fmt.Printf("%s", variant.Info["DP"].(int) > 10) sample := variant.Samples[0] // we can get the PL field as a list (-1 is default in case of missing value) fmt.Println("%s", variant.GetGenotypeField(sample, "PL", -1)) _ = sample.DP } fmt.Fprintln(os.Stderr, rdr.Error())
Example ¶
package main import ( "fmt" "os" "github.com/brentp/vcfgo" ) func main() { f, _ := os.Open("examples/test.auto_dom.no_parents.vcf") rdr, err := vcfgo.NewReader(f, false) if err != nil { panic(err) } for { variant := rdr.Read() if variant == nil { break } fmt.Printf("%s\t%d\t%s\t%s\n", variant.Chromosome, variant.Pos, variant.Ref(), variant.Alt()) dp, _ := variant.Info().Get("DP") fmt.Printf("%v", dp.(int) > 10)
Output:
Index ¶
- Constants
- func ItoS(k string, v interface{}) string
- type Header
- type Info
- type InfoByte
- func (i *InfoByte) Add(key string, value interface{})
- func (i InfoByte) Bytes() []byte
- func (i InfoByte) Contains(key string) bool
- func (i *InfoByte) Delete(key string)
- func (i InfoByte) Get(key string) (interface{}, error)
- func (i InfoByte) Keys() []string
- func (i InfoByte) SGet(key string) []byte
- func (i *InfoByte) Set(key string, value interface{}) error
- func (i InfoByte) String() string
- func (i *InfoByte) UpdateHeader(key string, value interface{})
- type Reader
- func (vr *Reader) AddFormatToHeader(id string, num string, stype string, desc string)
- func (vr *Reader) AddInfoToHeader(id string, num string, stype string, desc string)
- func (vr *Reader) Clear()
- func (vr *Reader) Close() error
- func (vr *Reader) Error() error
- func (vr *Reader) GetHeaderType(field string) string
- func (vr *Reader) Parse(fields [][]byte) *Variant
- func (vr *Reader) Read() *Variant
- type SampleFormat
- type SampleGenotype
- type VCFError
- type Variant
- func (v *Variant) Alt() []string
- func (v *Variant) CIEnd() (uint32, uint32, bool)
- func (v *Variant) CIPos() (uint32, uint32, bool)
- func (v *Variant) Chrom() string
- func (v *Variant) End() uint32
- func (v *Variant) GetGenotypeField(g *SampleGenotype, field string, missing interface{}) (interface{}, error)
- func (v *Variant) Id() string
- func (v *Variant) Info() interfaces.Info
- func (v *Variant) Ref() string
- func (v *Variant) Start() uint32
- func (v *Variant) String() string
- type Writer
Examples ¶
Constants ¶
const MISSING_VAL = 256
used for the quality score which is 0 to 255, but allows "."
Variables ¶
This section is empty.
Functions ¶
Types ¶
type Header ¶
type Header struct { sync.RWMutex SampleNames []string Infos map[string]*Info SampleFormats map[string]*SampleFormat Filters map[string]string Extras []string FileFormat string // Contigs is a list of maps of length, URL, etc. Contigs []map[string]string // ##SAMPLE Samples map[string]string Pedigrees []string }
Header holds all the type and format information for the variants.
func NewHeader ¶
func NewHeader() *Header
NewHeader returns a Header with the requisite allocations.
func (*Header) ParseSamples ¶
Force parsing of the sample fields.
type Info ¶
type Info struct { Id string Description string Number string // A G R . ” Type string // STRING INTEGER FLOAT FLAG CHARACTER UNKONWN }
Info holds the Info and Format fields
type InfoByte ¶
type InfoByte struct { Info []byte // contains filtered or unexported fields }
func NewInfoByte ¶
func (*InfoByte) UpdateHeader ¶
type Reader ¶
Reader holds information about the current line number (for errors) and The VCF header that indicates the structure of records.
func NewReader ¶
NewReader returns a Reader. If lazySamples is true, then the user will have to call Reader.ParseSamples() in order to access simple info.
func (*Reader) AddFormatToHeader ¶
AddFormatToHeader adds a FORMAT field to the header.
func (*Reader) AddInfoToHeader ¶
AddInfoToHeader adds a INFO field to the header.
func (*Reader) GetHeaderType ¶
type SampleFormat ¶
type SampleFormat Info
SampleFormat holds the type info for Format fields.
func (*SampleFormat) String ¶
func (i *SampleFormat) String() string
String returns a string representation.
type SampleGenotype ¶
type SampleGenotype struct { Phased bool GT []int DP int GL []float64 GQ int MQ int Fields map[string]string }
SampleGenotype holds the information about a sample. Several fields are pre-parsed, but all fields are kept in Fields as well.
func NewSampleGenotype ¶
func NewSampleGenotype() *SampleGenotype
NewSampleGenotype allocates the internals and returns a *SampleGenotype
func (*SampleGenotype) AltDepths ¶
func (s *SampleGenotype) AltDepths() ([]int, error)
AltDepths returns the depths of the alternates for this sample
func (*SampleGenotype) RefDepth ¶
func (s *SampleGenotype) RefDepth() (int, error)
RefDepth returns the depths of the alternates for this sample
func (*SampleGenotype) String ¶
func (sg *SampleGenotype) String(fields []string) string
String returns the string representation of the sample field.
type VCFError ¶
VCFError satisfies the error interface and allows multiple errors. This is useful because, for example, on a single line, every sample may have a field that doesn't match the description in the header. We want to keep parsing but also let the caller know about the error.
func (*VCFError) Add ¶
Add adds an error and the line number within the vcf where the error took place.
type Variant ¶
type Variant struct { Chromosome string Pos uint64 Id_ string Reference string Alternate []string Quality float32 Filter string Info_ interfaces.Info Format []string Samples []*SampleGenotype Header *Header LineNumber int64 // contains filtered or unexported fields }
Variant holds the information about a single site. It is analagous to a row in a VCF file.
func (*Variant) CIEnd ¶
CIEnd reports the Left and Right end of an SV using the CIEND tag. It is in bed format so the end is +1'ed. E.g. If there is no CIEND, the return value is v.End() - 1, v.End()
func (*Variant) CIPos ¶
CIPos reports the Left and Right end of an SV using the CIPOS tag. It is in bed format so the end is +1'ed. E.g. If there is not CIPOS, the return value is v.Start(), v.Start() + 1
func (*Variant) GetGenotypeField ¶
func (v *Variant) GetGenotypeField(g *SampleGenotype, field string, missing interface{}) (interface{}, error)
GetGenotypeField uses the information from the header to parse the correct time from a genotype field. It returns an interface that can be asserted to the expected type.
func (*Variant) Info ¶
func (v *Variant) Info() interfaces.Info