voiceactivedetector

package
v0.0.0-...-e581b2e Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 18, 2025 License: MIT Imports: 8 Imported by: 0

README

Voice Active Detector Interceptor

Voice Active Detector is a Pion Interceptor will allow you to detect any voice activity on the audio track that published to the client. It will provide you with the voice activity status and the audio level of the track.

How to use

  1. Import the package

    import "github.com/inlivedev/sfu/pkg/interceptors/voiceactivedetector"
    
  2. Register the interceptor extension in the media engine when creating a PeerConnection

    m := &webrtc.MediaEngine{}
    voiceactivedetector.RegisterAudioLevelHeaderExtension(m)
    
  3. Create a new VoiceActiveDetectorInterceptor

    var vad *voiceactivedetector.Interceptor
    
    i := &interceptor.Registry{}
    
    //"github.com/pion/logging"
    log:= logging.NewDefaultLoggerFactory().NewLogger("vad")
    
    // enable voice detector
    vadInterceptorFactory := voiceactivedetector.NewInterceptor(localCtx, log)
    
    vads := make(map[uint32]*voiceactivedetector.VoiceDetector)
    
    // enable voice detector
    vadInterceptorFactory.OnNew(func(i *voiceactivedetector.Interceptor) {
    	vadInterceptor = i
    	i.OnNewVAD(func(vad *voiceactivedetector.VoiceDetector) {
    		vad.OnVoiceDetected(func(pkts []voiceactivedetector.VoicePacketData) {
    			// add to vad map
    			vads[vad.SSRC()] = vad
    		})
    	})
    })
    
    
    
    i.Add(vadInterceptorFactory)
    
  4. Use the interceptor registry to create PeerConnection

    // Create a new RTCPeerConnection
    peerConnection, err := webrtc.NewAPI(webrtc.WithMediaEngine(m), webrtc.WithSettingEngine(settingEngine), webrtc.WithInterceptorRegistry(i)).NewPeerConnection(peerConnectionConfig)
    
  5. Use the voice activity detector to detect voice activity on remote track

    peerConnection.OnTrack(func(remoteTrack *webrtc.TrackRemote, receiver *webrtc.RTPReceiver) {
    	vad, ok := vads[uint32(remoteTrack.SSRC())]
    	if ok {
    		vad.OnVoiceDetected(func(pkts []voiceactivedetector.VoicePacketData) {
    			// voice detected on remote track
    			voiceActivity := voiceactivedetector.VoiceActivity{
    						TrackID:     remoteTrack.ID(),
    						StreamID:    remoteTrack.StreamID(),
    						SSRC:        uint32(remoteTrack.SSRC()),
    						ClockRate:  remoteTrack.Codec().ClockRate,
    						AudioLevels: pkts,
    					}
    
    			// do something with voice activity
    			// send to datachannel or to user who subscribe to the event
    
    		})
    	}
    })
    

Documentation

Index

Constants

View Source
const ATTRIBUTE_KEY = "audioLevel"

Variables

This section is empty.

Functions

func RegisterAudioLevelHeaderExtension

func RegisterAudioLevelHeaderExtension(m *webrtc.MediaEngine)

Types

type Config

type Config struct {
	// Interval is the interval at which the VAD will send updates
	Interval   time.Duration
	HeadMargin time.Duration
	TailMargin time.Duration
	Threshold  uint8
}

func DefaultConfig

func DefaultConfig() Config

type Interceptor

type Interceptor struct {
	// contains filtered or unexported fields
}

func (*Interceptor) BindLocalStream

func (v *Interceptor) BindLocalStream(info *interceptor.StreamInfo, writer interceptor.RTPWriter) interceptor.RTPWriter

BindLocalStream lets you modify any outgoing RTP packets. It is called once for per LocalStream. The returned method will be called once per rtp packet.

func (*Interceptor) BindRTCPReader

func (v *Interceptor) BindRTCPReader(reader interceptor.RTCPReader) interceptor.RTCPReader

BindRTCPReader lets you modify any incoming RTCP packets. It is called once per sender/receiver, however this might change in the future. The returned method will be called once per packet batch.

func (*Interceptor) BindRTCPWriter

func (v *Interceptor) BindRTCPWriter(writer interceptor.RTCPWriter) interceptor.RTCPWriter

BindRTCPWriter lets you modify any outgoing RTCP packets. It is called once per PeerConnection. The returned method will be called once per packet batch.

func (*Interceptor) BindRemoteStream

func (v *Interceptor) BindRemoteStream(info *interceptor.StreamInfo, reader interceptor.RTPReader) interceptor.RTPReader

BindRemoteStream lets you modify any incoming RTP packets. It is called once for per RemoteStream. The returned method will be called once per rtp packet.

func (*Interceptor) Close

func (v *Interceptor) Close() error

func (*Interceptor) MapAudioTrack

func (v *Interceptor) MapAudioTrack(ssrc uint32, t *webrtc.TrackRemote) *VoiceDetector

AddAudioTrack adds audio track to interceptor

func (*Interceptor) OnNewVAD

func (v *Interceptor) OnNewVAD(callback func(vad *VoiceDetector))

func (*Interceptor) SetConfig

func (v *Interceptor) SetConfig(config Config)

func (*Interceptor) UnbindLocalStream

func (v *Interceptor) UnbindLocalStream(info *interceptor.StreamInfo)

UnbindLocalStream is called when the Stream is removed. It can be used to clean up any data related to that track.

func (*Interceptor) UnbindRemoteStream

func (v *Interceptor) UnbindRemoteStream(info *interceptor.StreamInfo)

type InterceptorFactory

type InterceptorFactory struct {
	// contains filtered or unexported fields
}

func (*InterceptorFactory) NewInterceptor

func (g *InterceptorFactory) NewInterceptor(_ string) (interceptor.Interceptor, error)

NewInterceptor constructs a new ReceiverInterceptor

func (*InterceptorFactory) OnNew

func (g *InterceptorFactory) OnNew(callback func(i *Interceptor))

type VoiceActivity

type VoiceActivity struct {
	TrackID     string            `json:"trackID"`
	StreamID    string            `json:"streamID"`
	SSRC        uint32            `json:"ssrc"`
	ClockRate   uint32            `json:"clockRate"`
	AudioLevels []VoicePacketData `json:"audioLevels"`
}

type VoiceDetector

type VoiceDetector struct {
	VoicePackets []VoicePacketData
	// contains filtered or unexported fields
}

func (*VoiceDetector) OnVoiceDetected

func (v *VoiceDetector) OnVoiceDetected(callback func([]VoicePacketData))

func (*VoiceDetector) SSRC

func (v *VoiceDetector) SSRC() uint32

func (*VoiceDetector) Stop

func (v *VoiceDetector) Stop()

func (*VoiceDetector) UpdateTrack

func (v *VoiceDetector) UpdateTrack(trackID, streamID string)

type VoicePacketData

type VoicePacketData struct {
	SequenceNo uint16 `json:"sequenceNo"`
	Timestamp  uint32 `json:"timestamp"`
	AudioLevel uint8  `json:"audioLevel"`
	IsVoice    bool   `json:"isVoice"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL