Documentation ¶
Overview ¶
Package gff provides gff parsers and writers.
GFF stands for "general feature format". It is an alternative to GenBank for storing data about genomic sequences. While not often used in synthetic biology research, it is more commonly used in bioinformatics for digesting features of genomic sequences.
This package provides a parser and writer to convert between the gff file format and the more general poly.Sequence struct.
Example (Basic) ¶
This example shows how to open a gff file and search for a gene given its locus tag. We then display the EC number of that particular gene.
package main import ( "fmt" "github.com/bebop/poly/io/gff" ) func main() { sequence, _ := gff.Read("../../data/ecoli-mg1655-short.gff") for _, feature := range sequence.Features { if feature.Attributes["locus_tag"] == "b0003" { fmt.Println(feature.Attributes["EC_number"]) } } }
Output: 2.7.1.39
Index ¶
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func Build ¶
Build takes an Annotated sequence and returns a byte array representing a gff to be written out.
Example ¶
package main import ( "bytes" "fmt" "github.com/bebop/poly/io/gff" ) func main() { sequence, _ := gff.Read("../../data/ecoli-mg1655-short.gff") gffBytes, _ := gff.Build(sequence) gffReader := bytes.NewReader(gffBytes) reparsedSequence, _ := gff.Parse(gffReader) fmt.Println(reparsedSequence.Meta.Name) }
Output: U00096.3
func Write ¶
Write takes an poly.Sequence struct and a path string and writes out a gff to that path.
Example ¶
package main import ( "fmt" "os" "path/filepath" "github.com/bebop/poly/io/gff" ) func main() { tmpDataDir, err := os.MkdirTemp("", "data-*") if err != nil { fmt.Println(err.Error()) } defer os.RemoveAll(tmpDataDir) sequence, _ := gff.Read("../../data/ecoli-mg1655-short.gff") tmpGffFilePath := filepath.Join(tmpDataDir, "ecoli-mg1655-short.gff") _ = gff.Write(sequence, tmpGffFilePath) testSequence, _ := gff.Read(tmpGffFilePath) fmt.Println(testSequence.Meta.Name) }
Output: U00096.3
Types ¶
type Feature ¶
type Feature struct { Name string `json:"name"` Source string `json:"source"` Type string `json:"type"` Score string `json:"score"` Strand string `json:"strand"` Phase string `json:"phase"` Attributes map[string]string `json:"attributes"` Location Location `json:"location"` ParentSequence *Gff `json:"-"` }
Feature is a struct that represents a feature in a gff file.
func (Feature) GetSequence ¶
GetSequence takes a feature and returns a sequence string for that feature.
Example ¶
package main import ( "fmt" "github.com/bebop/poly/io/gff" ) func main() { // Sequence for greenflourescent protein (GFP) that we're using as test data for this example. gfpSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" // initialize sequence and feature structs. var sequence gff.Gff var feature gff.Feature // set the initialized sequence struct's sequence. sequence.Sequence = gfpSequence // Set the initialized feature name and sequence location. feature.Location.Start = 0 feature.Location.End = len(sequence.Sequence) // Add the GFP feature to the sequence struct. _ = sequence.AddFeature(&feature) // get the GFP feature sequence string from the sequence struct. featureSequence, _ := feature.GetSequence() // check to see if the feature was inserted properly into the sequence. fmt.Println(gfpSequence == featureSequence) }
Output: true
type Gff ¶
type Gff struct { Meta Meta Features []Feature // will need a GetFeatures interface to standardize Sequence string }
Gff is a struct that represents a gff file.
func Parse ¶
Parse Takes in a string representing a gffv3 file and parses it into an Sequence object.
Example ¶
package main import ( "fmt" "os" "github.com/bebop/poly/io/gff" ) func main() { file, _ := os.Open("../../data/ecoli-mg1655-short.gff") sequence, _ := gff.Parse(file) fmt.Println(sequence.Meta.Name) }
Output: U00096.3
func Read ¶
Read takes in a filepath for a .gffv3 file and parses it into an Annotated poly.Sequence struct.
Example ¶
package main import ( "fmt" "github.com/bebop/poly/io/gff" ) func main() { sequence, _ := gff.Read("../../data/ecoli-mg1655-short.gff") fmt.Println(sequence.Meta.Name) }
Output: U00096.3
func (*Gff) AddFeature ¶
AddFeature takes a feature and adds it to the Gff struct.
Example ¶
package main import ( "fmt" "github.com/bebop/poly/io/gff" ) func main() { // Sequence for greenflourescent protein (GFP) that we're using as test data for this example. gfpSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" // initialize sequence and feature structs. var sequence gff.Gff var feature gff.Feature // set the initialized sequence struct's sequence. sequence.Sequence = gfpSequence // Set the initialized feature name and sequence location. feature.Location = gff.Location{} feature.Location.Start = 0 feature.Location.End = len(sequence.Sequence) // Add the GFP feature to the sequence struct. _ = sequence.AddFeature(&feature) // get the GFP feature sequence string from the sequence struct. featureSequence, _ := feature.GetSequence() // check to see if the feature was inserted properly into the sequence. fmt.Println(gfpSequence == featureSequence) }
Output: true
type Location ¶
type Location struct { Start int `json:"start"` End int `json:"end"` Complement bool `json:"complement"` Join bool `json:"join"` FivePrimePartial bool `json:"five_prime_partial"` ThreePrimePartial bool `json:"three_prime_partial"` SubLocations []Location `json:"sub_locations"` }
Location is a struct that represents a location in a gff file.
type Meta ¶
type Meta struct { Name string `json:"name"` Description string `json:"description"` Version string `json:"gff_version"` RegionStart int `json:"region_start"` RegionEnd int `json:"region_end"` Size int `json:"size"` SequenceHash string `json:"sequence_hash"` SequenceHashFunction string `json:"hash_function"` CheckSum [32]byte `json:"checkSum"` // blake3 checksum of the parsed file itself. Useful for if you want to check if incoming genbank/gff files are different. }
Meta holds meta information about a gff file.