sound

package

v1.8.1 Latest Latest Go to latest Published: Jun 26, 2022 License: BSD-3-Clause Imports: 19 Imported by: 3

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/emer/auditory

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func MSecToSamples(ms float64, rate int) int
func Play(fn string, rate int, channnels int, bitdepth int) error
func PlayWav(context *oto.Context, fn string, rate int) error
func PrintMemUsage()
func SamplesToMSec(samples int, rate int) float64
type Endian
type Params
type SndEnv
type SoundSampleType
type Wave

Constants ¶

View Source

const (
	BigEndian    = iota // Samples are big endian byte order
	LittleEndian        // Samples are little endian byte order
)

View Source

const (
	Unknown   = iota // Not set
	SignedInt        // Samples are signed integers
	UnSignedInt
	Float
)

Variables ¶

This section is empty.

Functions ¶

func MSecToSamples ¶ added in v0.9.7

func MSecToSamples(ms float64, rate int) int

MSecToSamples converts milliseconds to samples, in terms of sample_rate

func Play ¶ added in v0.9.16

func Play(fn string, rate int, channnels int, bitdepth int) error

func PlayWav ¶ added in v0.9.16

func PlayWav(context *oto.Context, fn string, rate int) error

func PrintMemUsage ¶ added in v1.6.0

func PrintMemUsage()

func SamplesToMSec ¶ added in v0.9.7

func SamplesToMSec(samples int, rate int) float64

SamplesToMSec converts samples to milliseconds, in terms of sample_rate

Types ¶

type Endian ¶

type Endian int32

type Params ¶ added in v0.9.7

type Params struct {
	WinMs       float64 `def:"25" desc:"input window -- number of milliseconds worth of sound to filter at a time"`
	StepMs      float64 `` /* 139-byte string literal not displayed */
	SegmentMs   float64 `` /* 265-byte string literal not displayed */
	StrideMs    float64 `def:"100" desc:"how far to move on each trial"`
	BorderSteps int     `def:"6" view:"+" desc:"overlap with previous and next segment"`
	Channel     int     `` /* 138-byte string literal not displayed */

	// these are calculated
	WinSamples     int   `inactive:"+" desc:"number of samples to process each step"`
	StepSamples    int   `inactive:"+" desc:"number of samples to step input by"`
	SegmentSamples int   `inactive:"+" desc:"number of samples in a segment"`
	StrideSamples  int   `inactive:"+" desc:"number of samples converted from StrideMS"`
	SegmentSteps   int   `inactive:"+" desc:"includes border steps on both sides"`
	Steps          []int `inactive:"+" desc:"pre-calculated start position for each step"`
}

Params defines the sound input parameters for auditory processing

type SndEnv ¶ added in v0.9.7

type SndEnv struct {
	// "Segment" in var name indicates that the data applies to a segment of samples rather than the entire signal
	Nm     string `desc:"name of this environment"`
	Dsc    string `desc:"description of this environment"`
	On     bool   `desc:"false turns off processing of this sound"`
	Sound  Wave   `desc:"specifications of the raw sensory input"`
	Params Params
	Signal etensor.Float64 `view:"no-inline" desc:" the full sound input"`
	SegCnt int             `desc:"the number of segments in this sound file (based on current segment size)"`
	Window etensor.Float64 `inactive:"+" desc:" [Input.WinSamples] the raw sound input, one channel at a time"`

	DFT             dft.Params
	Power           etensor.Float64 `view:"-" desc:" power of the dft, up to the nyquist limit frequency (1/2 input.WinSamples)"`
	LogPower        etensor.Float64 `view:"-" desc:" log power of the dft, up to the nyquist liit frequency (1/2 input.WinSamples)"`
	PowerSegment    etensor.Float64 `view:"no-inline" desc:" full segment's worth of power of the dft, up to the nyquist limit frequency (1/2 input.win_samples)"`
	LogPowerSegment etensor.Float64 `` /* 128-byte string literal not displayed */
	Mel             mel.Params      `view:"no-inline"`
	MelFBank        etensor.Float64 `` /* 150-byte string literal not displayed */
	MelFBankSegment etensor.Float64 `view:"no-inline" desc:" full segment's worth of mel feature-bank output"`
	MelFilters      etensor.Float64 `view:"no-inline" desc:" the actual filters"`
	Energy          etensor.Float64 `view:"no-inline" desc:" sum of log power per segment step"`
	MFCCDCT         etensor.Float64 `` /* 140-byte string literal not displayed */
	MFCCSegment     etensor.Float64 `` /* 160-byte string literal not displayed */
	MFCCDeltas      etensor.Float64 `view:"no-inline" desc:" "`

	GaborSpecs    []agabor.Filter  `view:"no-inline" desc:" a set of gabor filter specifications, one spec per filter'"`
	GaborFilters  agabor.FilterSet `desc:"the actual gabor filters, the first spec determines the size of all filters in the set"`
	GaborTab      etable.Table     `view:"no-inline" desc:"gabor filter table (view only)"`
	GborOutPoolsX int              `view:"+" desc:" the number of neuron pools along the time dimension in the input layer"`
	GborOutPoolsY int              `view:"+" desc:" the number of neuron pools along the frequency dimension in the input layer"`
	GborOutUnitsX int              `` /* 132-byte string literal not displayed */
	GborOutUnitsY int              `view:"+" desc:" the number of neurons in a pool along the frequency dimension in the input layer"`
	GborOutput    etensor.Float32  `view:"no-inline" desc:" raw output of Gabor -- full segment's worth of gabor steps"`
	GborKwta      etensor.Float32  `view:"no-inline" desc:" post-kwta output of full segment's worth of gabor steps"`
	Inhibs        fffb.Inhibs      `view:"no-inline" desc:"inhibition values for A1 KWTA"`
	ExtGi         etensor.Float32  `view:"no-inline" desc:"A1 simple extra Gi from neighbor inhibition tensor"`
	NeighInhib    kwta.NeighInhib  `` /* 155-byte string literal not displayed */
	Kwta          kwta.KWTA        `desc:"kwta parameters, using FFFB form"`
	KwtaPool      bool             `desc:"if Kwta.On == true, call KwtaPool (true) or KwtaLayer (false)"`
	ByTime        bool             `desc:"display the gabor filtering result by time and then by filter, default is to order by filter and then time"`
}

func (*SndEnv) AdjustForSilence ¶ added in v1.7.0

func (se *SndEnv) AdjustForSilence(add, existing float64) (offset int)

AdjustForSilence trims or adds silence add is the amount of random silence that should precede the start of the sequence. existing is the amount of silence preexisting at start of sound. offset is the amount of silence trimmed from or added to the existing silence. add and existing values are in milliseconds

func (*SndEnv) ApplyGabor ¶ added in v0.9.7

func (se *SndEnv) ApplyGabor() (tsr *etensor.Float32)

ApplyGabor convolves the gabor filters with the mel output

func (*SndEnv) ApplyKwta ¶ added in v0.9.7

func (se *SndEnv) ApplyKwta()

ApplyKwta runs the kwta algorithm on the raw activations

func (*SndEnv) ApplyNeighInhib ¶ added in v1.7.3

func (se *SndEnv) ApplyNeighInhib()

ApplyNeighInhib - each unit gets inhibition from same feature in nearest orthogonal neighbors

func (*SndEnv) Defaults ¶ added in v0.9.7

func (se *SndEnv) Defaults()

Defaults

func (*SndEnv) Desc ¶ added in v0.9.7

func (se *SndEnv) Desc() string

func (*SndEnv) Init ¶ added in v0.9.7

func (se *SndEnv) Init() (err error)

Init sets various sound processing params based on default params and user overrides

func (*SndEnv) Name ¶ added in v0.9.7

func (se *SndEnv) Name() string

func (*SndEnv) Pad ¶ added in v0.9.7

func (se *SndEnv) Pad(signal []float64, value float64) (padded []float64)

Pad pads the signal so that the length of signal divided by stride has no remainder

func (*SndEnv) ParamDefaults ¶ added in v0.9.7

func (se *SndEnv) ParamDefaults()

ParamDefaults initializes the Input

func (*SndEnv) ProcessSegment ¶ added in v0.9.7

func (se *SndEnv) ProcessSegment(segment, add int)

ProcessSegment processes the entire segment's input by processing a small overlapping set of samples on each pass The add argument allows for compensation if there are multiple sounds of different duration to different input layers of the network. For example, durations of 80 and 120 ms. Add half the difference (e.g. 20 ms) so the sounds are centered on the same moment of sound

func (*SndEnv) ProcessStep ¶ added in v0.9.7

func (se *SndEnv) ProcessStep(segment, step, add int) error

ProcessStep processes a step worth of sound input from current input_pos, and increment input_pos by input.step_samples Process the data by doing a fourier transform and computing the power spectrum, then apply mel filters to get the frequency bands that mimic the non-linear human perception of sound

func (*SndEnv) SndToWindow ¶ added in v0.9.7

func (se *SndEnv) SndToWindow(start int) error

SndToWindow gets sound from the signal (i.e. the slice of input values) at given position

func (*SndEnv) Tail ¶ added in v0.9.7

func (se *SndEnv) Tail(signal []float64) int

Tail returns the number of samples that remain beyond the last full stride

func (*SndEnv) ToTensor ¶ added in v1.4.0

func (se *SndEnv) ToTensor() bool

ToTensor

type SoundSampleType ¶

type SoundSampleType int32

type Wave ¶

type Wave struct {
	Buf *audio.IntBuffer `inactive:"+"`
}

func (*Wave) Channels ¶

func (snd *Wave) Channels() int

Channels returns the number of channels in the wav data or 0 is snd is nil

func (*Wave) GetFloatAtIdx ¶

func (snd *Wave) GetFloatAtIdx(buf *audio.IntBuffer, idx int) float64

GetFloatAtIdx

func (*Wave) Load ¶

func (snd *Wave) Load(fn string) error

Load loads the sound file and decodes it

func (*Wave) SampleRate ¶

func (snd *Wave) SampleRate() int

SampleRate returns the sample rate of the sound or 0 is snd is nil

func (*Wave) SampleSize ¶

func (snd *Wave) SampleSize() int

SampleSize returns the sample rate of the sound or 0 is snd is nil

func (*Wave) SampleType ¶

func (snd *Wave) SampleType() SoundSampleType

todo: return to this SampleType

func (*Wave) SoundToTensor ¶

func (snd *Wave) SoundToTensor(samples *etensor.Float64) bool

SoundToTensor converts sound data to floating point etensor with normalized -1..1 values (unless sound is stored as a float natively, in which case it is not guaranteed to be normalized) -- for use in signal processing routines -- can optionally select a specific channel (formats sound_data as a single-dimensional matrix of frames size), and -1 gets all available channels (formats sound_data as two-dimensional matrix with outer dimension as channels and inner dimension frames

func (*Wave) WriteWave ¶

func (snd *Wave) WriteWave(fn string) error

WriteWave encodes the signal data and writes it to file using the sample rate and other values of the buf object

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL