seqio

package
v0.28.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 28, 2021 License: MIT Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	QuotedQualifierNames = []string{
		"allele", "altitude", "artificial_location", "bio_material",
		"bound_moiety", "cell_line", "cell_type", "chromosome",
		"clone", "clone_lib", "collected_by", "collection_date",
		"country", "cultivar", "culture_collection", "db_xref",
		"dev_stage", "EC_number", "ecotype", "exception",
		"experiment", "frequency", "function", "gap_type", "gene",
		"gene_synonym", "haplogroup", "haplotype", "host",
		"identified_by", "inference", "isolate", "isolation_source",
		"lab_host", "lat_lon", "linkage_evidence", "locus_tag", "map",
		"mating_type", "metagenome_source", "mobile_element_type",
		"mol_type", "ncRNA_class", "note", "old_locus_tag", "operon",
		"organelle", "organism", "PCR_conditions", "PCR_primers",
		"phenotype", "plasmid", "pop_variant", "product",
		"protein_id", "pseudogene", "recombination_class",
		"regulatory_class", "replace", "rpt_family", "rpt_unit_seq",
		"satellite", "segment", "serotype", "serovar", "sex",
		"specimen_voucher", "standard_name", "strain", "sub_clone",
		"submitter_seqid", "sub_species", "sub_strain", "tissue_lib",
		"tissue_type", "translation", "type_material", "variety",
	}

	LiteralQualifierNames = []string{
		"anticodon", "citation", "codon_start", "compare",
		"direction", "estimated_length", "mod_base", "number",
		"rpt_type", "rpt_unit_range", "tag_peptide", "transl_except",
		"transl_table",
	}

	ToggleQualifierNames = []string{
		"environmental_sample", "focus", "germline", "macronuclear",
		"partial", "proviral", "pseudo", "rearranged",
		"ribosomal_slippage", "transgenic", "trans_splicing",
	}
)

Names of qualifiers.

View Source
var FastaParser = pars.Seq(
	'>', pars.Line, pars.Until(pars.Any('>', pars.End)),
).Map(func(result *pars.Result) error {
	desc := string(result.Children[1].Token)
	body := result.Children[2].Token
	lines := bytes.Split(body, []byte{'\n'})
	data := bytes.Join(lines, nil)
	result.SetValue(Fasta{desc, data})
	return nil
})

FastaParser attempts to parse a single FASTA file entry.

Functions

func AddPrefix

func AddPrefix(s, prefix string) string

AddPrefix adds the given prefix after each newline.

func FlatFileSplit

func FlatFileSplit(s string) []string

FlatFileSplit splits the string with the flatfile convention.

func GenBankParser

func GenBankParser(state *pars.State, result *pars.Result) error

GenBankParser attempts to parse a single GenBank record.

func INSDCTableParser added in v0.27.1

func INSDCTableParser(prefix string) pars.Parser

INSDCTableParser attempts to match an INSDC feature table.

func IsLiteralQualifier added in v0.27.1

func IsLiteralQualifier(name string) bool

IsLiteralQualifier tests if the given qualifier name is a literal qualifier.

func IsQuotedQualifier added in v0.27.1

func IsQuotedQualifier(name string) bool

IsQuotedQualifier tests if the given qualifier name is a quoted qualifier.

func IsToggleQualifier added in v0.27.1

func IsToggleQualifier(name string) bool

IsToggleQualifier tests if the given qualifier name is a toggle qualifier.

func QualifierParser added in v0.27.1

func QualifierParser(prefix string) pars.Parser

QualifierParser attempts to match a single qualifier name-value pair.

func RegisterLiteralQualifier added in v0.27.1

func RegisterLiteralQualifier(names ...string)

RegisterLiteralQualifier registers the given qualifier names as being a literal qualifier (i.e. /name=value).

func RegisterQuotedQualifier added in v0.27.1

func RegisterQuotedQualifier(names ...string)

RegisterQuotedQualifier registers the given qualifier names as being a quoted qualifer (i.e. /name="value").

func RegisterToggleQualifier added in v0.27.1

func RegisterToggleQualifier(names ...string)

RegisterToggleQualifier registers the given qualifier names as being a toggle qualifier (i.e. /name).

Types

type AutoWriter added in v0.28.0

type AutoWriter struct {
	// contains filtered or unexported fields
}

func (AutoWriter) WriteSeq added in v0.28.0

func (w AutoWriter) WriteSeq(seq gts.Sequence) (int, error)

type Contig added in v0.23.0

type Contig struct {
	Accession string
	Region    gts.Segment
}

Contig represents a contig field.

func (Contig) String added in v0.23.0

func (contig Contig) String() string

String satisfies the fmt.Stringer interface.

type Date added in v0.11.7

type Date struct {
	Year  int
	Month time.Month
	Day   int
}

Date represents a date stamp for record entries.

func AsDate added in v0.24.2

func AsDate(s string) (Date, error)

AsDate interprets the given string as a Date.

func FromTime added in v0.11.7

func FromTime(t time.Time) Date

FromTime creates a Date object from a time.Time object.

func (Date) ToTime added in v0.11.7

func (d Date) ToTime() time.Time

ToTime converts the Date object into a time.Time object.

type Dictionary added in v0.11.6

type Dictionary []Pair

Dictionary represents an ordered key-value pair.

func (*Dictionary) Del added in v0.11.6

func (d *Dictionary) Del(key string)

Del removes the value associated to the given key.

func (*Dictionary) Get added in v0.11.6

func (d *Dictionary) Get(key string) []string

Get the value associated to the given key.

func (*Dictionary) Set added in v0.11.6

func (d *Dictionary) Set(key, value string)

Set the value associated to the given key.

type ExtraField added in v0.24.3

type ExtraField struct {
	Name   string
	Value  string
	Format func(name, value string) string
}

ExtraField represents an uncommon field of a genome flat-file.

func GenBankExtraField added in v0.24.3

func GenBankExtraField(name, value string) ExtraField

GenBankExtraField creates a new extra field with a default formatter.

func (ExtraField) String added in v0.24.3

func (field ExtraField) String() string

String satisfies the fmt.Stringer interface.

type Fasta

type Fasta struct {
	Desc string
	Data []byte
}

Fasta represents a FASTA format sequence object.

func (Fasta) Bytes

func (f Fasta) Bytes() []byte

Bytes returns the byte representation of the sequence.

func (Fasta) Features

func (f Fasta) Features() gts.FeatureSlice

Features returns the feature table of the sequence.

func (Fasta) Info

func (f Fasta) Info() interface{}

Info returns the metadata of the sequence.

func (Fasta) WriteTo added in v0.28.0

func (f Fasta) WriteTo(w io.Writer) (int64, error)

WriteTo satisfies the io.WriterTo interface.

type FastaWriter added in v0.28.0

type FastaWriter struct {
	// contains filtered or unexported fields
}

FastaWriter writes a gts.Sequence to an io.Writer in FASTA format.

func (FastaWriter) WriteSeq added in v0.28.0

func (w FastaWriter) WriteSeq(seq gts.Sequence) (int, error)

WriteSeq satisfies the seqio.SeqWriter interface.

type FieldFormatter added in v0.24.3

type FieldFormatter func(name, value string) string

FieldFormatter represents a function for formatting a field.

type FileType

type FileType int

FileType represents a file type.

const (
	DefaultFile FileType = iota
	FastaFile
	FastqFile
	GenBankFile
	EMBLFile
)

Available file types in GTS.

func Detect

func Detect(filename string) FileType

Detect returns the FileType associated to extension of the given filename.

func ToFileType

func ToFileType(name string) FileType

ToFileType converts the file type name string to a FileType

type GenBank

type GenBank struct {
	Fields GenBankFields
	Table  gts.FeatureSlice
	Origin *Origin
}

GenBank represents a GenBank sequence record.

func (GenBank) Bytes

func (gb GenBank) Bytes() []byte

Bytes returns the byte representation of the sequence.

func (GenBank) Features

func (gb GenBank) Features() gts.FeatureSlice

Features returns the feature table of the sequence.

func (GenBank) Info

func (gb GenBank) Info() interface{}

Info returns the metadata of the sequence.

func (GenBank) Len

func (gb GenBank) Len() int

Len returns the length of the sequence.

func (GenBank) String

func (gb GenBank) String() string

String satisifes the fmt.Stringer interface.

func (GenBank) WithBytes

func (gb GenBank) WithBytes(p []byte) gts.Sequence

WithBytes creates a shallow copy of the given Sequence object and swaps the byte representation with the given byte slice.

func (GenBank) WithFeatures

func (gb GenBank) WithFeatures(ff []gts.Feature) gts.Sequence

WithFeatures creates a shallow copy of the given Sequence object and swaps the feature table with the given features.

func (GenBank) WithInfo

func (gb GenBank) WithInfo(info interface{}) gts.Sequence

WithInfo creates a shallow copy of the given Sequence object and swaps the metadata with the given value.

func (GenBank) WithTopology added in v0.12.0

func (gb GenBank) WithTopology(t gts.Topology) gts.Sequence

WithTopology creates a shallow copy of the given Sequence object and swaps the topology value with the given value.

func (GenBank) WriteTo

func (gb GenBank) WriteTo(w io.Writer) (int64, error)

WriteTo satisfies the io.WriterTo interface.

type GenBankFields

type GenBankFields struct {
	LocusName string
	Molecule  gts.Molecule
	Topology  gts.Topology
	Division  string
	Date      Date

	Definition string
	Accession  string
	Version    string
	DBLink     Dictionary
	Keywords   []string
	Source     Organism
	References []Reference
	Comments   []string
	Extra      []ExtraField
	Contig     Contig

	Region gts.Region // Appears in sliced files.
}

GenBankFields represents the fields of a GenBank record other than the features and sequence.

func (GenBankFields) ID added in v0.21.0

func (gbf GenBankFields) ID() string

ID returns the ID of the sequence.

func (GenBankFields) Slice added in v0.23.0

func (gbf GenBankFields) Slice(start, end int) interface{}

Slice returns a metadata sliced with the given region.

func (GenBankFields) String

func (gbf GenBankFields) String() string

String satisifes the fmt.Stringer interface.

type GenBankWriter added in v0.28.0

type GenBankWriter struct {
	// contains filtered or unexported fields
}

GenBankWriter writes a gts.Sequence to an io.Writer in GenBank format.

func (GenBankWriter) WriteSeq added in v0.28.0

func (w GenBankWriter) WriteSeq(seq gts.Sequence) (int, error)

WriteSeq satisfies the seqio.SeqWriter interface.

type INSDCFormatter added in v0.27.1

type INSDCFormatter struct {
	Table  []gts.Feature
	Prefix string
	Depth  int
}

INSDCFormatter formats a Feature object with the given prefix and depth.

func (INSDCFormatter) String added in v0.27.1

func (fmtr INSDCFormatter) String() string

String satisfies the fmt.Stringer interface.

func (INSDCFormatter) WriteTo added in v0.27.1

func (fmtr INSDCFormatter) WriteTo(w io.Writer) (int64, error)

WriteTo satisfies the io.WriteTo interface.

type Organism added in v0.11.6

type Organism struct {
	Species string
	Name    string
	Taxon   []string
}

Organism represents an organism of a record.

type Origin

type Origin struct {
	Buffer []byte
	Parsed bool
}

Origin represents a GenBank sequence origin value.

func NewOrigin

func NewOrigin(p []byte) *Origin

NewOrigin formats a byte slice into GenBank sequence origin format.

func (*Origin) Bytes added in v0.16.0

func (o *Origin) Bytes() []byte

Bytes converts the GenBank sequence origin into a byte slice.

func (Origin) Len added in v0.16.0

func (o Origin) Len() int

Len returns the actual sequence length.

func (Origin) String added in v0.16.0

func (o Origin) String() string

String satisfies the fmt.Stringer interface.

type Pair added in v0.11.6

type Pair struct {
	Key   string
	Value string
}

Pair represents a key-value pair of strings.

type QualifierFormatter added in v0.27.1

type QualifierFormatter struct {
	Qualifier QualifierIO
	Prefix    string
}

QualifierFormatter formats a QualifierIO object with the given prefix.

func (QualifierFormatter) String added in v0.27.1

func (qf QualifierFormatter) String() string

String satisfies the fmt.Stringer interface.

func (QualifierFormatter) WriteTo added in v0.27.1

func (qf QualifierFormatter) WriteTo(w io.Writer) (int64, error)

WriteTo satisfies the io.WriterTo interface.

type QualifierIO added in v0.27.1

type QualifierIO [2]string

QualifierIO represents a single qualifier name-value pair.

func (QualifierIO) Format added in v0.27.1

func (q QualifierIO) Format(prefix string) QualifierFormatter

Format creates a QualifierFormatter object for the qualifier with the given prefix.

func (QualifierIO) String added in v0.27.1

func (q QualifierIO) String() string

String satisfies the fmt.Stringer interface.

func (QualifierIO) Unpack added in v0.27.1

func (q QualifierIO) Unpack() (string, string)

Unpack returns the name and value strings of the QualifierIO.

type QualifierType added in v0.27.1

type QualifierType int

QualifierType represents the type of qualifier.

const (
	QuotedQualifier QualifierType = iota
	LiteralQualifier
	ToggleQualifier
	UnknownQualifier
)

Available qualifier types.

func GetQualifierType added in v0.27.1

func GetQualifierType(name string) QualifierType

GetQualifierType returns the qualifier type of the given qualifier name.

type Reference added in v0.11.6

type Reference struct {
	Number  int
	Info    string
	Authors string
	Group   string
	Title   string
	Journal string
	Xref    map[string]string
	Comment string
}

Reference represents a reference of a record.

type Scanner

type Scanner struct {
	// contains filtered or unexported fields
}

Scanner represents a sequence file scanner.

func NewAutoScanner

func NewAutoScanner(r io.Reader) *Scanner

NewAutoScanner creates a new sequence scanner which will automatically detect the sequence format from a list of known parsers on the first scan.

func NewScanner

func NewScanner(p pars.Parser, r io.Reader) *Scanner

NewScanner creates a new sequence scanner.

func (Scanner) Err

func (s Scanner) Err() error

Err returns the first non-EOF error that was encountered by the scanner.

func (*Scanner) Scan

func (s *Scanner) Scan() bool

Scan advances the scanner using the given parser. If the parser is not yet specified, the first scan will match one of the known parsers.

func (Scanner) Value

func (s Scanner) Value() gts.Sequence

Value returns the most recently scanned sequence value.

type SeqWriter added in v0.28.0

type SeqWriter interface {
	WriteSeq(seq gts.Sequence) (int, error)
}

func NewWriter added in v0.28.0

func NewWriter(w io.Writer, filetype FileType) SeqWriter

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL