AVAudioConverter with AVAudioConverterInputBlock stutters audio after processing
So I believe I figured it out. The converted buffer frame capacity must be divided by a ratio of the sample rates being converted. So, the complete answer looks like this:
func sendAudio(audioFile: URL, completionHandler: @escaping (Bool, Bool, Data?)->Void) {
createSession(){ sessionUrl, observeURL, session in
let file = try! AVAudioFile(forReading: audioFile)
let formatOfAudio = file.processingFormat
self.engine = AVAudioEngine()
guard let input = self.engine.inputNode else {
print("no input")
return
}
//The audio in format in this case is: <AVAudioFormat 0x61800009d010: 2 ch, 44100 Hz, Float32, non-inter>
let formatIn = formatOfAudio
let formatOut = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 16000, channels: 1, interleaved: true)
let mixer = AVAudioMixerNode()
self.engine.attach(mixer)
mixer.volume = 0.0
self.engine.attach(self.audioPlayerNode)
self.engine.connect(self.audioPlayerNode, to: mixer, format: formatIn)
self.engine.connect(input, to: mixer, format: input.outputFormat(forBus: 0))
self.engine.connect(mixer, to: self.engine.mainMixerNode, format: formatIn)
let audioConverter = AVAudioConverter(from: formatIn, to: formatOut)
//Here is where I adjusted for the sample rate. It's hard coded here, but you would want to adjust so that you're dividing the input sample rate by your chosen sample rate.
let sampleRateConversionRatio: Float = 44100.0/16000.0
mixer.installTap(onBus: 0, bufferSize: 32000, format: formatIn, block: {
(buffer: AVAudioPCMBuffer!, time: AVAudioTime!) -> Void in
//And this is where you set the appropriate capacity!
let capacity = UInt32(Float(buffer.frameCapacity)/ratio)
let convertedBuffer = AVAudioPCMBuffer(pcmFormat: formatOut, frameCapacity: capacity)
let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer
}
var error: NSError? = nil
let status = audioConverter.convert(to: convertedBuffer, error: &error, withInputFrom: inputBlock)
let myData = convertedBuffer.toData()
completionHandler(true, false, myData)
})
self.audioPlayerNode.scheduleFile(file, at: nil){
self.delayWithSeconds(3.0){
self.engine.stop()
mixer.removeTap(onBus: 0)
completionHandler(true, true, nil)
}
}
do {
try self.engine.start()
} catch {
print(error)
}
self.audioPlayerNode.play()
}
}
For anybody finding this, the actual underlying cause is the incorrect use of AVAudioConverterInputBlock
. The destination buffer capacity doesn't matter as long as it is large enough, however the block will be called repeatedly until the destination buffer is filled.
If your source buffer contains ABC
, it will fill up the destination with ABCABCABC...
. Then, if you pipe it to realtime playback, the chunks are getting cut off randomly to fit the playback timing, resulting in this weird crackle.
The actual solution is to properly set AVAudioConverterInputStatus
to .noDataNow
once the buffer is submitted to the converter. Note that returning .endOfStream
will lock up the converter object forever.
var gotData = false
self.converter.convert(to: convertedBuffer, error: nil, withInputFrom: { (_, outStatus) -> AVAudioBuffer? in
if gotData {
outStatus.pointee = .noDataNow
return nil
}
gotData = true
outStatus.pointee = .haveData
return inputBuffer
})