Documentation ¶
Overview ¶
Package filters defines filters for .sam/.bam sequencing pipelines.
Index ¶
- Constants
- Variables
- func AddOrReplaceReadGroup(readGroup utils.StringMap) sam.Filter
- func AddPGLine(newPG utils.StringMap) sam.Filter
- func AddREFID(header *sam.Header) sam.AlignmentFilter
- func CleanSam(header *sam.Header) sam.AlignmentFilter
- func ClearDuplicateFlag(_ *sam.Header) sam.AlignmentFilter
- func CombineVcfOutputs(vcfPath, vcfOutput string)
- func FilterReadsBySampleName(sampleName *string) sam.Filter
- func HaplotypeCallAln(hdr *sam.Header) sam.AlignmentFilter
- func KeepOptionalFields(tags []string) sam.Filter
- func LoadAndCombineDuplicateMetrics(metricsPath string) map[string]*DuplicatesCtr
- func MarkDuplicates(alsoOpticals bool) (sam.Filter, *sync.Map, *sync.Map)
- func MarkOpticalDuplicates(reads *sam.Sam, pairs *sync.Map, opticalPixelDistance int) map[string]*DuplicatesCtr
- func PrintDuplicatesMetrics(metrics, commandLine string, ctrs map[string]*DuplicatesCtr)
- func PrintDuplicatesMetricsToIntermediateFile(name string, ctrs map[string]*DuplicatesCtr)
- func RemoveDuplicateReads(_ *sam.Header) sam.AlignmentFilter
- func RemoveMappingQualityLessThan(mq int) sam.Filter
- func RemoveNonExactMappingReads(_ *sam.Header) sam.AlignmentFilter
- func RemoveNonExactMappingReadsStrict(header *sam.Header) sam.AlignmentFilter
- func RemoveNonOverlappingReads(bed bed.Bed) sam.Filter
- func RemoveOptionalFields(tags []string) sam.Filter
- func RemoveOptionalReads(header *sam.Header) sam.AlignmentFilter
- func RemoveUnmappedReads(_ *sam.Header) sam.AlignmentFilter
- func RemoveUnmappedReadsStrict(_ *sam.Header) sam.AlignmentFilter
- func RenameChromosomes(header *sam.Header) sam.AlignmentFilter
- func ReplaceReferenceSequenceDictionary(dict []utils.StringMap) sam.Filter
- func ReplaceReferenceSequenceDictionaryFromSamFile(samFile string) sam.Filter
- type BaseRecalibrator
- type BaseRecalibratorTables
- func (recal *BaseRecalibratorTables) ApplyBQSR(quantizeLevels int, sqqList []uint8, maxCycle int) sam.Filter
- func (recal *BaseRecalibratorTables) FinalizeBQSRTables()
- func (recal *BaseRecalibratorTables) PrintBQSRTables(name string)
- func (recal *BaseRecalibratorTables) PrintBQSRTablesToIntermediateFile(name string)
- type DuplicatesCtr
- type HaplotypeCaller
Constants ¶
const FixedHighQDMessage = ""
Variables ¶
var ( // PID is the HaplotypeCaller phasing ID. PID = utils.Intern("PID") // PGT is the HaplotypeCaller phasing genotype. PGT = utils.Intern("PGT") // PS is the HaplotypeCaller phasing set ID. PS = utils.Intern("PS") )
var ( // DP represenst depth in VCF files. DP = utils.Intern("DP") // ExcessHet represenst Excess Heterozygosity in VCF files. ExcessHet = utils.Intern("ExcessHet") // MLEAC represenst the maximum likelihood expectation for the allele count in VCF files. MLEAC = utils.Intern("MLEAC") // MLEAF represents the maximum likelihood expectation for the allele frequency in VCF files. MLEAF = utils.Intern("MLEAF") // LowQual represents low quality variants in VCF files. LowQual = utils.Intern("LowQual") // Raw_MQandDP represents raw squared mapping quality and depth in VCF files. RAW_MQandDP = utils.Intern("RAW_MQandDP") // MQ represents root mean square of mapping quality in VCF files MQ = utils.Intern("MQ") // BaseQRankSum represents rank sum test of alt vs ref base qualities in VCF files. BaseQRankSum = utils.Intern("BaseQRankSum") // MQRankSum represents rank sum test of alt vs ref read mapping qualities in VCF files. MQRankSum = utils.Intern("MQRankSum") // ReadPosRankSum represents rank sum test of alt vs ref read position bias in VCF files. ReadPosRankSum = utils.Intern("ReadPosRankSum") // AC represents allele count in genotypes for each ALT allele in VCF files. AC = utils.Intern("AC") // AF represents allele frequency for each ALT allele in VCF files. AF = utils.Intern("AF") // AN represents total number of alleles in called genotypes in VCF files. AN = utils.Intern("AN") // AD represents allelic depths in VCF files. AD = utils.Intern("AD") // GQ represents genotype quality in VCF files. GQ = utils.Intern("GQ") // MIN_DP represents minimum depths in GVCF blocks in VCF files. MIN_DP = utils.Intern("MIN_DP") // PL represents likelihoods for genotypes in VCF files. PL = utils.Intern("PL") // SB represents strand bias in VCF files. SB = utils.Intern("SB") // FS represents phred-scaled p-value using Fisher's exact test to detect strand bias in VCF files. FS = utils.Intern("FS") // QD represents variant confidence/quality by depth in VCF files. QD = utils.Intern("QD") // SOR represents symmetric odds ratio of 2x2 contingency table to detect strand bias in VCF files. SOR = utils.Intern("SOR") )
var ( X0 = utils.Intern("X0") X1 = utils.Intern("X1") XM = utils.Intern("XM") XO = utils.Intern("XO") XG = utils.Intern("XG") )
Symbols for optional fields used for determining exact matches. See http://samtools.github.io/hts-specs/SAMv1.pdf - Section 1.5.
Functions ¶
func AddOrReplaceReadGroup ¶
AddOrReplaceReadGroup returns a filter for adding or replacing the read group both in the Header and in each Alignment.
func AddPGLine ¶
AddPGLine returns a filter for adding a @PG tag to a Header, and ensuring that it is the first one in the chain.
func AddREFID ¶
func AddREFID(header *sam.Header) sam.AlignmentFilter
AddREFID is a filter for adding the refid (index in the reference sequence dictionary) to alignments as temporary values.
func CleanSam ¶
func CleanSam(header *sam.Header) sam.AlignmentFilter
CleanSam is a filter for soft-clipping an alignment at the end of a reference sequence, and setting MAPQ to 0 if unmapped.
func ClearDuplicateFlag ¶
func ClearDuplicateFlag(_ *sam.Header) sam.AlignmentFilter
ClearDuplicateFlag clear the duplicate flag in every read
func CombineVcfOutputs ¶
func CombineVcfOutputs(vcfPath, vcfOutput string)
CombineVcfOutputs combines multiple VCF files. This is used in the sfm command.
func FilterReadsBySampleName ¶
FilterReadsBySampleName filters out reads that do not belong to exactly one sample. If *sampleName != "", ensure only reads are passed through that match this sample name. If *sampleName == "", check that the header has only one (or no) sample name, and don't filter reads. In the latter case, after use of this filter in a pipeline, *sampleName will contain the unique sample name detected from the header, if any.
func HaplotypeCallAln ¶
func HaplotypeCallAln(hdr *sam.Header) sam.AlignmentFilter
HaplotypeCallAln filters out the reads that the haplotypecaller cannot process.
func KeepOptionalFields ¶
KeepOptionalFields returns a filter for removing all but a list of given optional fields in an alignment.
func LoadAndCombineDuplicateMetrics ¶
func LoadAndCombineDuplicateMetrics(metricsPath string) map[string]*DuplicatesCtr
LoadAndCombineDuplicateMetrics loads partial duplication metrics from file and combines them
func MarkDuplicates ¶
MarkDuplicates returns a filter for marking duplicate alignments. Depends on the AddREFID filter being called before to fill in the refid.
Duplicate marking is based on an adapted Phred score. In case of ties, the QNAME is used as a tie-breaker.
If alsoOpticals is true, ensure that LIBID is added for all reads.
func MarkOpticalDuplicates ¶
func MarkOpticalDuplicates(reads *sam.Sam, pairs *sync.Map, opticalPixelDistance int) map[string]*DuplicatesCtr
MarkOpticalDuplicates implements a function for calculating duplication metrics for a set of reads.
func PrintDuplicatesMetrics ¶
func PrintDuplicatesMetrics(metrics, commandLine string, ctrs map[string]*DuplicatesCtr)
PrintDuplicatesMetrics writes the duplication metrics for a set of reads to a file.
func PrintDuplicatesMetricsToIntermediateFile ¶
func PrintDuplicatesMetricsToIntermediateFile(name string, ctrs map[string]*DuplicatesCtr)
PrintDuplicatesMetricsToIntermediateFile writes the duplicate metrics to a gob file.
func RemoveDuplicateReads ¶
func RemoveDuplicateReads(_ *sam.Header) sam.AlignmentFilter
RemoveDuplicateReads is a filter for removing duplicate sam-alignment instances, based on FLAG.
func RemoveMappingQualityLessThan ¶
RemoveMappingQualityLessThan is a filter for removing reads that do not match or exceed the given mapping quality.
func RemoveNonExactMappingReads ¶
func RemoveNonExactMappingReads(_ *sam.Header) sam.AlignmentFilter
RemoveNonExactMappingReads is a filter that removes all reads that are not exact matches with the reference (soft-clipping ok), based on CIGAR string (only M and S allowed).
func RemoveNonExactMappingReadsStrict ¶
func RemoveNonExactMappingReadsStrict(header *sam.Header) sam.AlignmentFilter
RemoveNonExactMappingReadsStrict is a filter that removes all reads that are not exact matches with the reference, based on the optional fields X0=1 (unique mapping), X1=0 (no suboptimal hit), XM=0 (no mismatch), XO=0 (no gap opening), XG=0 (no gap extension).
func RemoveNonOverlappingReads ¶
RemoveNonOverlappingReads returns a filter for removing all reads that do not overlap with a set of regions specified by a bed file.
func RemoveOptionalFields ¶
RemoveOptionalFields returns a filter for removing optional fields in an alignment.
func RemoveOptionalReads ¶
func RemoveOptionalReads(header *sam.Header) sam.AlignmentFilter
RemoveOptionalReads is a filter for removing alignments that represent optional information in elPrep.
func RemoveUnmappedReads ¶
func RemoveUnmappedReads(_ *sam.Header) sam.AlignmentFilter
RemoveUnmappedReads is a filter for removing unmapped sam-alignment instances, based on FLAG.
func RemoveUnmappedReadsStrict ¶
func RemoveUnmappedReadsStrict(_ *sam.Header) sam.AlignmentFilter
RemoveUnmappedReadsStrict is a filter for removing unmapped sam-alignment instances, based on FLAG, or POS=0, or RNAME=*.
func RenameChromosomes ¶
func RenameChromosomes(header *sam.Header) sam.AlignmentFilter
RenameChromosomes is a filter for prepending "chr" to the reference sequence names in a Header, and in RNAME and RNEXT in each Alignment.
func ReplaceReferenceSequenceDictionary ¶
ReplaceReferenceSequenceDictionary returns a filter for replacing the reference sequence dictionary in a Header.
func ReplaceReferenceSequenceDictionaryFromSamFile ¶
ReplaceReferenceSequenceDictionaryFromSamFile returns a filter for replacing the reference sequence dictionary in a Header with one parsed from the given SAM/DICT file.
Types ¶
type BaseRecalibrator ¶
type BaseRecalibrator struct {
// contains filtered or unexported fields
}
BaseRecalibrator implements the first step of base recalibration.
func NewBaseRecalibrator ¶
func NewBaseRecalibrator(knownSites []string, referenceFasta *fasta.MappedFasta) (recal *BaseRecalibrator)
NewBaseRecalibrator returns a struct for the first step of base recalibration.
func (*BaseRecalibrator) Recalibrate ¶
func (recal *BaseRecalibrator) Recalibrate(reads *sam.Sam, maxCycle int) *BaseRecalibratorTables
Recalibrate implements the first step of base recalibration.
type BaseRecalibratorTables ¶
type BaseRecalibratorTables struct {
QualityScores, Cycles, Contexts bqsrTable
}
BaseRecalibratorTables is the result of the base recalibration. All subsequent steps, including ApplyBQSR, are based on these tables.
func LoadAndCombineBQSRTables ¶
func LoadAndCombineBQSRTables(bqsrPath string) *BaseRecalibratorTables
LoadAndCombineBQSRTables loads and merges multiple recalibration tables from file into a single, new recalibration table.
func NewBaseRecalibratorTables ¶
func NewBaseRecalibratorTables() BaseRecalibratorTables
NewBaseRecalibratorTables returns a struct for storing the result of the base recalibration.
func (*BaseRecalibratorTables) ApplyBQSR ¶
func (recal *BaseRecalibratorTables) ApplyBQSR(quantizeLevels int, sqqList []uint8, maxCycle int) sam.Filter
ApplyBQSR applies the base recalibration result to the QUAL strings of the given reads.
func (*BaseRecalibratorTables) FinalizeBQSRTables ¶
func (recal *BaseRecalibratorTables) FinalizeBQSRTables()
FinalizeBQSRTables finalizes the first step of base recalibration.
func (*BaseRecalibratorTables) PrintBQSRTables ¶
func (recal *BaseRecalibratorTables) PrintBQSRTables(name string)
PrintBQSRTables creates a recalibration report file.
func (*BaseRecalibratorTables) PrintBQSRTablesToIntermediateFile ¶
func (recal *BaseRecalibratorTables) PrintBQSRTablesToIntermediateFile(name string)
PrintBQSRTablesToIntermediateFile prints the recalibration tables to a gob file.
type DuplicatesCtr ¶
type DuplicatesCtr struct { UnpairedReadsExamined int ReadPairsExamined int SecondaryOrSupplementaryReads int UnmappedReads int UnpairedReadDuplicates int ReadPairDuplicates int ReadPairOpticalDuplicates int // contains filtered or unexported fields }
DuplicatesCtr implements a struct that stores metrics about reads such as the number of (optical) duplicates, unmapped reads, etc.
type HaplotypeCaller ¶
type HaplotypeCaller struct {
// contains filtered or unexported fields
}
parameters that influence how assembly regions are determined
func NewHaplotypeCaller ¶
func NewHaplotypeCaller( reference *fasta.MappedFasta, referenceConfidence string, assemblyRegionPadding int32, activityProfile, assemblyRegions io.Writer, randomSeedFile string, commandLine string) *HaplotypeCaller
NewHaplotypeCaller creates an object that contains the relevent parameters for the haplotypecaller.
func (*HaplotypeCaller) CallVariants ¶
func (hc *HaplotypeCaller) CallVariants(input *sam.Sam, sampleName, vcfOutput string, bedRegions bed.Bed)
CallVariants is the main entry point into the haplotypecaller.
func (*HaplotypeCaller) Close ¶
func (hc *HaplotypeCaller) Close()
Source Files ¶
- assemble-reads.go
- assigngls.go
- bqsr.go
- call-region.go
- doc.go
- graph.go
- haplotypecaller-tables.go
- haplotypecaller.go
- haploutils.go
- mark-duplicates.go
- mark-optical-duplicates.go
- pairhmm.go
- pileup.go
- print-bqsr.go
- randomized-max-qd.go
- realign.go
- ref-confidence.go
- side-channel.go
- simple-filters.go
- sw.go
- unpedantic.go
- utils.go
- variant-combiner.go