MFTransform encoder->ProcessInput returns E_FAIL - winapi

When I run encoder->ProcessInput(stream_id, sample.Get(), 0) I am getting a E_FAIL ("Unspecified error") error which isn't very helpful.
I am either trying to (1) Figure out what the real error is and/or (2) get past this unspecified error.
Ultimately, my goal is achieving this: http://alax.info/blog/1716
Here's the gist of what I am doing:
(Error occurs in this block)
void encode_frame(ComPtr<ID3D11Texture2D> texture) {
_com_error error = NULL;
IMFTransform *encoder = nullptr;
encoder = get_encoder();
if (!encoder) {
cout << "Did not get a valid encoder to utilize\n";
return;
}
cout << "Making it Direct3D aware...\n";
setup_D3_aware_mft(encoder);
cout << "Setting up input/output media types...\n";
setup_media_types(encoder);
error = encoder->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL); // flush all stored data
error = encoder->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL);
error = encoder->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL); // first sample is about to be processed, req for async
cout << "Encoding image...\n";
IMFMediaEventGenerator *event_generator = nullptr;
error = encoder->QueryInterface(&event_generator);
while (true) {
IMFMediaEvent *event = nullptr;
MediaEventType type;
error = event_generator->GetEvent(0, &event);
error = event->GetType(&type);
uint32_t stream_id = get_stream_id(encoder); // Likely just going to be 0
uint32_t frame = 1;
uint64_t sample_duration = 0;
ComPtr<IMFSample> sample = nullptr;
IMFMediaBuffer *mbuffer = nullptr;
DWORD length = 0;
uint32_t img_size = 0;
MFCalculateImageSize(desktop_info.input_sub_type, desktop_info.width, desktop_info.height, &img_size);
switch (type) {
case METransformNeedInput:
ThrowIfFailed(MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), texture.Get(), 0, false, &mbuffer),
mbuffer, "Failed to generate a media buffer");
ThrowIfFailed(MFCreateSample(&sample), sample.Get(), "Couldn't create sample buffer");
ThrowIfFailed(sample->AddBuffer(mbuffer), sample.Get(), "Couldn't add buffer");
// Test (delete this) - fake buffer
/*byte *buffer_data;
MFCreateMemoryBuffer(img_size, &mbuffer);
mbuffer->Lock(&buffer_data, NULL, NULL);
mbuffer->GetCurrentLength(&length);
memset(buffer_data, 0, img_size);
mbuffer->Unlock();
mbuffer->SetCurrentLength(img_size);
sample->AddBuffer(mbuffer);*/
MFFrameRateToAverageTimePerFrame(desktop_info.fps, 1, &sample_duration);
sample->SetSampleDuration(sample_duration);
// ERROR
ThrowIfFailed(encoder->ProcessInput(stream_id, sample.Get(), 0), sample.Get(), "ProcessInput failed.");
I setup my media types like this:
void setup_media_types(IMFTransform *encoder) {
IMFMediaType *output_type = nullptr;
IMFMediaType *input_type = nullptr;
ThrowIfFailed(MFCreateMediaType(&output_type), output_type, "Failed to create output type");
ThrowIfFailed(MFCreateMediaType(&input_type), input_type, "Failed to create input type");
/*
List of all MF types:
https://learn.microsoft.com/en-us/windows/desktop/medfound/alphabetical-list-of-media-foundation-attributes
*/
_com_error error = NULL;
int stream_id = get_stream_id(encoder);
error = output_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
error = output_type->SetGUID(MF_MT_SUBTYPE, desktop_info.output_sub_type);
error = output_type->SetUINT32(MF_MT_AVG_BITRATE, desktop_info.bitrate);
error = MFSetAttributeSize(output_type, MF_MT_FRAME_SIZE, desktop_info.width, desktop_info.height);
error = MFSetAttributeRatio(output_type, MF_MT_FRAME_RATE, desktop_info.fps, 1);
error = output_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive); // motion will be smoother, fewer artifacts
error = output_type->SetUINT32(MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_High);
error = output_type->SetUINT32(MF_MT_MPEG2_LEVEL, eAVEncH264VLevel3_1);
error = output_type->SetUINT32(CODECAPI_AVEncCommonRateControlMode, eAVEncCommonRateControlMode_CBR); // probably will change this
ThrowIfFailed(encoder->SetOutputType(stream_id, output_type, 0), output_type, "Couldn't set output type");
error = input_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
error = input_type->SetGUID(MF_MT_SUBTYPE, desktop_info.input_sub_type);
error = input_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
error = MFSetAttributeSize(input_type, MF_MT_FRAME_SIZE, desktop_info.width, desktop_info.height);
error = MFSetAttributeRatio(input_type, MF_MT_FRAME_RATE, desktop_info.fps, 1);
error = MFSetAttributeRatio(input_type, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
ThrowIfFailed(encoder->SetInputType(stream_id, input_type, 0), input_type, "Couldn't set input type");
}
My desktop_info struct is:
struct desktop_info {
int fps = 30;
int width = 2560;
int height = 1440;
uint32_t bitrate = 10 * 1000000; // 10Mb
GUID input_sub_type = MFVideoFormat_ARGB32;
GUID output_sub_type = MFVideoFormat_H264;
} desktop_info;
Output of my program prior to reaching ProcessInput:
Hello World!
Number of devices: 3
Device #0
Adapter: Intel(R) HD Graphics 630
Got some information about the device:
\\.\DISPLAY2
Attached to desktop : 1
Got some information about the device:
\\.\DISPLAY1
Attached to desktop : 1
Did not find another adapter. Index higher than the # of outputs.
Successfully duplicated output from IDXGIOutput1
Accumulated frames: 0
Created a 2D texture...
Number of encoders/processors available: 1
Encoder name: Intel« Quick Sync Video H.264 Encoder MFT
Making it Direct3D aware...
Setting up input/output media types...
If you're curious what my Locals were right before ProcessInput: http://prntscr.com/mx1i9t

This may be an "unpopular" answer since it doesn't provide a solution for MFT specifically but after 8 months of working heavily on this stuff, I would highly recommend not using MFT and implementing encoders directly.
My solution was implementing an HW encoder like NVENC/QSV and you could fall back on a software encoder like x264 if the client doesn't have HW acceleration available.
The reason for this is that MFT is far more opaque and not well documented/supported by Microsoft. I think you'll find you want more control over the settings & parameter tuning of the encoder's as well wherein each encoder implementation is subtly different.

We have seen this error coming from the Intel graphics driver. (The H.264 encoder MFT uses the Intel GPU to do the encode the video into H.264 format.)
In our case, I think the bug was triggered by configuring the encoder to a very high bit rate and then configuring to a low bit rate. In your sample code, it does not look like you are changing the bit rate, so I am not sure if it is the same bug.
Intel just released a new driver about two weeks ago, that is supposed to have the fix for the bug that we were seeing. So, you may want to give that new driver a try -- hopefully it will fix the problem that you are having.
The new driver is version 25.20.100.6519. You can get it from the Intel web site: https://downloadcenter.intel.com/download/28566/Intel-Graphics-Driver-for-Windows-10
If the new driver does not fix the problem, you could try running your program on a different PC that uses a NVidia or AMD graphics card, to see if the problem only happens on PCs that have Intel graphics.

Related

VideoToolbox hardware encoded I frame not clear on Intel Mac

When I captured video from camera on Intel Mac, used VideoToolbox to hardware encode raw pixel buffers to H.264 codec slices, I found that the VideoToolbox encoded I frame not clear, causing it looks like blurs every serveral seconds. Below are properties setted:
self.bitrate = 1000000;
self.frameRate = 20;
int interval_second = 2;
int interval_second = 2;
NSDictionary *compressionProperties = #{
(id)kVTCompressionPropertyKey_ProfileLevel: (id)kVTProfileLevel_H264_High_AutoLevel,
(id)kVTCompressionPropertyKey_RealTime: #YES,
(id)kVTCompressionPropertyKey_AllowFrameReordering: #NO,
(id)kVTCompressionPropertyKey_H264EntropyMode: (id)kVTH264EntropyMode_CABAC,
(id)kVTCompressionPropertyKey_PixelTransferProperties: #{
(id)kVTPixelTransferPropertyKey_ScalingMode: (id)kVTScalingMode_Trim,
},
(id)kVTCompressionPropertyKey_AverageBitRate: #(self.bitrate),
(id)kVTCompressionPropertyKey_ExpectedFrameRate: #(self.frameRate),
(id)kVTCompressionPropertyKey_MaxKeyFrameInterval: #(self.frameRate * interval_second),
(id)kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration: #(interval_second),
(id)kVTCompressionPropertyKey_DataRateLimits: #[#(self.bitrate / 8), #1.0],
};
result = VTSessionSetProperties(self.compressionSession, (CFDictionaryRef)compressionProperties);
if (result != noErr) {
NSLog(#"VTSessionSetProperties failed: %d", (int)result);
return;
} else {
NSLog(#"VTSessionSetProperties succeeded");
}
These are very strange compression settings. Do you really need short GOP and very strict data rate limits?
I very much suspect you just copied some code off the internet without having any idea what it does. If it's the case, just set interval_second = 300 and remove kVTCompressionPropertyKey_DataRateLimits completely

H264 Decoding with Apple Video Toolkit

I am trying to get an H264 streaming app working on various platforms using a combination of Apple Video Toolbox and OpenH264. There is one use-case that doesn't work and I can't find any solution. When the source uses video Toolbox on a 2011 iMac running MacOS High Sierra and the receiver is a MacBook pro running Big Sur.
On the receiver the decoded image is about 3/4 green. If I scale the image down to about 1/8 of original before encoding then it works fine. If I capture the frames on the MacBook and then run exactly the same decoding software in a test program on the iMac then it decodes fine. Doing the same on the Macbook (same image of test program) give 3/4 green again. I have a similar problem when receiving from an OpenH264 encoder on a slower Windows machine. I suspect that this has something to do with temporal processing, but really don't understand H264 well enough to work it out. One thing that I did notice is that the decode call returns with no error code but a NULL pixel buffer about 70% of the time.
The "guts" of the decoding part looks like this (modified from a demo on GitHub)
void didDecompress(void *decompressionOutputRefCon, void *sourceFrameRefCon, OSStatus status, VTDecodeInfoFlags infoFlags, CVImageBufferRef pixelBuffer, CMTime presentationTimeStamp, CMTime presentationDuration )
{
CVPixelBufferRef *outputPixelBuffer = (CVPixelBufferRef *)sourceFrameRefCon;
*outputPixelBuffer = CVPixelBufferRetain(pixelBuffer);
}
void initVideoDecodeToolBox ()
{
if (!decodeSession)
{
const uint8_t* parameterSetPointers[2] = { mSPS, mPPS };
const size_t parameterSetSizes[2] = { mSPSSize, mPPSSize };
OSStatus status = CMVideoFormatDescriptionCreateFromH264ParameterSets(kCFAllocatorDefault,2, //param count
parameterSetPointers,
parameterSetSizes,
4, //nal start code size
&formatDescription);
if(status == noErr)
{
CFDictionaryRef attrs = NULL;
const void *keys[] = { kCVPixelBufferPixelFormatTypeKey, kVTDecompressionPropertyKey_RealTime };
uint32_t v = kCVPixelFormatType_32BGRA;
const void *values[] = { CFNumberCreate(NULL, kCFNumberSInt32Type, &v), kCFBooleanTrue };
attrs = CFDictionaryCreate(NULL, keys, values, 2, NULL, NULL);
VTDecompressionOutputCallbackRecord callBackRecord;
callBackRecord.decompressionOutputCallback = didDecompress;
callBackRecord.decompressionOutputRefCon = NULL;
status = VTDecompressionSessionCreate(kCFAllocatorDefault, formatDescription, NULL, attrs, &callBackRecord, &decodeSession);
CFRelease(attrs);
}
else
{
NSLog(#"IOS8VT: reset decoder session failed status=%d", status);
}
}
}
CVPixelBufferRef decode ( const char *NALBuffer, size_t NALSize )
{
CVPixelBufferRef outputPixelBuffer = NULL;
if (decodeSession && formatDescription )
{
// The NAL buffer has been stripped of the NAL length data, so this has to be put back in
MemoryBlock buf ( NALSize + 4);
memcpy ( (char*)buf.getData()+4, NALBuffer, NALSize );
*((uint32*)buf.getData()) = CFSwapInt32HostToBig ((uint32)NALSize);
CMBlockBufferRef blockBuffer = NULL;
OSStatus status = CMBlockBufferCreateWithMemoryBlock(kCFAllocatorDefault, buf.getData(), NALSize+4,kCFAllocatorNull,NULL, 0, NALSize+4, 0, &blockBuffer);
if(status == kCMBlockBufferNoErr)
{
CMSampleBufferRef sampleBuffer = NULL;
const size_t sampleSizeArray[] = {NALSize + 4};
status = CMSampleBufferCreateReady(kCFAllocatorDefault,blockBuffer,formatDescription,1, 0, NULL, 1, sampleSizeArray,&sampleBuffer);
if (status == kCMBlockBufferNoErr && sampleBuffer)
{
VTDecodeFrameFlags flags = 0;VTDecodeInfoFlags flagOut = 0;
// The default is synchronous operation.
// Call didDecompress and call back after returning.
OSStatus decodeStatus = VTDecompressionSessionDecodeFrame ( decodeSession, sampleBuffer, flags, &outputPixelBuffer, &flagOut );
if(decodeStatus != noErr)
{
DBG ( "decode failed status=" + String ( decodeStatus) );
}
CFRelease(sampleBuffer);
}
CFRelease(blockBuffer);
}
}
return outputPixelBuffer;
}
Note: the NAL blocks don't have a 00 00 00 01 separator because they are streamed in blocks with explicit length field.
Decoding works fine on all platforms, and the encoded stream decodes fine with OpenH264.
Well, I finally found the answer so I'm going to leave it here for posterity. It turns out that the Video Toolkit decode function expects the NAL blocks that all belong to the same frame to be copied into a single SampleBuffer. The older Mac is providing the app with single keyframes that are split into separate NAL blocks which the app then sends individually across the network. Unfortunately this means that the first NAL block will be processed, in may case less than a quarter of the picture, and the rest will be discarded. What you need to do is work out which NALs are part of the same frame, and bundle them together. Unfortunately this requires you to partially parse the PPS and the frames themselves, which is not trivial. Many thanks to the post here at the Apple site which put me on the right track.

VoiceProcessingIO Audio Unit adds an unexpected input stream to Built-in output device (macOS)

I work on VoIP app on macOS and use VoiceProcessingIO Audio Unit for audio processing like Echo cancellation and automatic gain control.
Problem is, when I init the audio unit, the list of Core Audio devices changes - not just by adding new aggregate device which VP audio unit uses for it's needs, but also because built-in output device (i.e. "Built - In MacBook Pro Speakers") now appears also as an input device, i.e. having an unexpected input stream in addition to output ones.
This is a list of INPUT devices (aka "microphones") I get from Core Audio before initialising my VP AU:
DEVICE: INPUT 45 BlackHole_UID
DEVICE: INPUT 93 BuiltInMicrophoneDevice
This is the same list when my VP AU is initialised:
DEVICE: INPUT 45 BlackHole_UID
DEVICE: INPUT 93 BuiltInMicrophoneDevice
DEVICE: INPUT 86 BuiltInSpeakerDevice /// WHY?
DEVICE: INPUT 98 VPAUAggregateAudioDevice-0x101046040
This is very frustrating because I need to display a list of devices in the app and even though I can filter out Aggregate devices from device list boldly (they are not usable with VP AU anyway), I cannot exclude our built-in macBook Speaker device.
Maybe someone of You has already been through this and has a clue what's going on and if this can be fixed. Some kAudioObjectPropertyXX I need to watch for to exclude the device from inputs list. Or course this might be a bug/feature on Apple's side and I simply have to hack my way around this.
VP AU works well, and the problem reproduces despite devices used (I tried on built-in and on external/USB/Bluetooth alike). The problem is reproduced on all macOS version I could test on, starting from 10.13 and ending by 11.0 included. This also reproduces on different Macs and different audio device sets connected. I am curious that there is next to zero info on that problem available, which brings me to a thought that I did something wrong.
One more strange thing is, when VP AU is working, the HALLab app indicates the another thing: Built-in Input having two more input streams (ok, I would survive this If it was just that!). But it doesn't indicate that Built-In output has input streams added, like in my app.
Here is extract from cpp code on how I setup VP Audio Unit:
#define MAX_FRAMES_PER_CALLBACK 1024
AudioComponentInstance AvHwVoIP::getComponentInstance(OSType type, OSType subType) {
AudioComponentDescription desc = {0};
desc.componentFlags = 0;
desc.componentFlagsMask = 0;
desc.componentManufacturer = kAudioUnitManufacturer_Apple;
desc.componentSubType = subType;
desc.componentType = type;
AudioComponent ioComponent = AudioComponentFindNext(NULL, &desc);
AudioComponentInstance unit;
OSStatus status = AudioComponentInstanceNew(ioComponent, &unit);
if (status != noErr) {
printf("Error: %d\n", status);
}
return unit;
}
void AvHwVoIP::enableIO(uint32_t enableIO, AudioUnit auDev) {
UInt32 no = 0;
setAudioUnitProperty(auDev,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input,
1,
&enableIO,
sizeof(enableIO));
setAudioUnitProperty(auDev,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Output,
0,
&enableIO,
sizeof(enableIO));
}
void AvHwVoIP::setDeviceAsCurrent(AudioUnit auDev, AudioUnitElement element, AudioObjectID devId) {
//Set the Current Device to the AUHAL.
//this should be done only after IO has been enabled on the AUHAL.
setAudioUnitProperty(auDev,
kAudioOutputUnitProperty_CurrentDevice,
element == 0 ? kAudioUnitScope_Output : kAudioUnitScope_Input,
element,
&devId,
sizeof(AudioDeviceID));
}
void AvHwVoIP::setAudioUnitProperty(AudioUnit auDev,
AudioUnitPropertyID inID,
AudioUnitScope inScope,
AudioUnitElement inElement,
const void* __nullable inData,
uint32_t inDataSize) {
OSStatus status = AudioUnitSetProperty(auDev, inID, inScope, inElement, inData, inDataSize);
if (noErr != status) {
std::cout << "****** ::setAudioUnitProperty failed" << std::endl;
}
}
void AvHwVoIP::start() {
m_auVoiceProcesing = getComponentInstance(kAudioUnitType_Output, kAudioUnitSubType_VoiceProcessingIO);
enableIO(1, m_auVoiceProcesing);
m_format_description = SetAudioUnitStreamFormatFloat(m_auVoiceProcesing);
SetAudioUnitCallbacks(m_auVoiceProcesing);
setDeviceAsCurrent(m_auVoiceProcesing, 0, m_renderDeviceID);//output device AudioDeviceID here
setDeviceAsCurrent(m_auVoiceProcesing, 1, m_capDeviceID);//input device AudioDeviceID here
setInputLevelListener();
setVPEnabled(true);
setAGCEnabled(true);
UInt32 maximumFramesPerSlice = 0;
UInt32 size = sizeof(maximumFramesPerSlice);
OSStatus s1 = AudioUnitGetProperty(m_auVoiceProcesing, kAudioUnitProperty_MaximumFramesPerSlice, kAudioUnitScope_Global, 0, &maximumFramesPerSlice, &size);
printf("max frames per callback: %d\n", maximumFramesPerSlice);
maximumFramesPerSlice = MAX_FRAMES_PER_CALLBACK;
s1 = AudioUnitSetProperty(m_auVoiceProcesing, kAudioUnitProperty_MaximumFramesPerSlice, kAudioUnitScope_Global, 0, &maximumFramesPerSlice, size);
OSStatus status = AudioUnitInitialize(m_auVoiceProcesing);
if (noErr != status) {
printf("*** error AU initialize: %d", status);
}
status = AudioOutputUnitStart(m_auVoiceProcesing);
if (noErr != status) {
printf("*** AU start error: %d", status);
}
}
And Here is how I get my list of devices:
//does this device have input/output streams?
bool hasStreamsForCategory(AudioObjectID devId, bool input)
{
const AudioObjectPropertyScope scope = (input == true ? kAudioObjectPropertyScopeInput : kAudioObjectPropertyScopeOutput);
AudioObjectPropertyAddress propertyAddress{kAudioDevicePropertyStreams, scope, kAudioObjectPropertyElementWildcard};
uint32_t dataSize = 0;
OSStatus status = AudioObjectGetPropertyDataSize(devId,
&propertyAddress,
0,
NULL,
&dataSize);
if (noErr != status)
printf("%s: Error in AudioObjectGetPropertyDataSize: %d \n", __FUNCTION__, status);
return (dataSize / sizeof(AudioStreamID)) > 0;
}
std::set<AudioDeviceID> scanCoreAudioDeviceUIDs(bool isInput)
{
std::set<AudioDeviceID> deviceIDs{};
// find out how many audio devices there are
AudioObjectPropertyAddress propertyAddress = {kAudioHardwarePropertyDevices, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster};
uint32_t dataSize{0};
OSStatus err = AudioObjectGetPropertyDataSize(kAudioObjectSystemObject, &propertyAddress, 0, NULL, &dataSize);
if ( err != noErr )
{
printf("%s: AudioObjectGetPropertyDataSize: %d\n", __FUNCTION__, dataSize);
return deviceIDs;//empty
}
// calculate the number of device available
uint32_t devicesAvailable = dataSize / sizeof(AudioObjectID);
if ( devicesAvailable < 1 )
{
printf("%s: Core audio available devices were not found\n", __FUNCTION__);
return deviceIDs;//empty
}
AudioObjectID devices[devicesAvailable];//devices to get
err = AudioObjectGetPropertyData(kAudioObjectSystemObject, &propertyAddress, 0, NULL, &dataSize, devices);
if ( err != noErr )
{
printf("%s: Core audio available devices were not found\n", __FUNCTION__);
return deviceIDs;//empty
}
const AudioObjectPropertyScope scope = (isInput == true ? kAudioObjectPropertyScopeInput : kAudioObjectPropertyScopeOutput);
for (uint32_t i = 0; i < devicesAvailable; ++i)
{
const bool hasCorrespondingStreams = hasStreamsForCategory(devices[i], isInput);
if (!hasCorrespondingStreams) {
continue;
}
printf("DEVICE: \t %s \t %d \t %s\n", isInput ? "INPUT" : "OUTPUT", devices[i], deviceUIDFromAudioDeviceID(devices[i]).c_str());
deviceIDs.insert(devices[i]);
}//end for
return deviceIDs;
}
Well, replying my own question in 4 months since Apple Feedback Assistant responded to my request:
"There are two things you were noticing, both of which are expected and considered as implementation details of AUVP:
The speaker device has input stream - this is the reference tap stream for echo cancellation.
There is additional input stream under the built-in mic device - this is the raw mic streams enabled by AUVP.
For #1, We'd advise you to treat built-in speaker and (on certain Macs) headphone with special caution when determining whether it’s input/output device based on its input/output streams.
For #2, We'd advise you to ignore the extra streams on the device."
So they suggest me doing exactly what I did then: determine built - in output device before starting AU and then just memorising it; Ignoring any extra streams that appear in built - in devices during VP AU operation.

Format of microphone audio passed to call back in mac OS X core audio example

I need access to audio data from microphone on macbook. I have the an example program for recording microphone data based on the one in "Learning Core Audio". When I run this program and break on the call back routine I see the inBuffer pointer and the mAudioData pointer. However I am having a heck of a time making sense of the data. I've tried casting the void* pointer to mAudioData to SInt16, to SInt32 and to float and tried a number of endian conversions all with nonsense looking results. What I need to know definitively is the number format for the data in the buffer. The example actually works writing microphone data to a file which I can play so I know that real audio is being recorded.
AudioStreamBasicDescription recordFormat;
memset(&recordFormat,0,sizeof(recordFormat));
//recordFormat.mFormatID = kAudioFormatMPEG4AAC;
recordFormat.mFormatID = kAudioFormatLinearPCM;
recordFormat.mChannelsPerFrame = 2;
recordFormat.mBitsPerChannel = 16;
recordFormat.mBytesPerPacket = recordFormat.mBytesPerFrame = recordFormat.mChannelsPerFrame * sizeof(SInt16);
recordFormat.mFramesPerPacket = 1;
MyGetDefaultInputDeviceSampleRate(&recordFormat.mSampleRate);
UInt32 propSize = sizeof(recordFormat);
CheckError(AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
0,
NULL,
&propSize,
&recordFormat),
"AudioFormatProperty failed");
//set up queue
AudioQueueRef queue = {0};
CheckError(AudioQueueNewInput(&recordFormat,
MyAQInputCallback,
&recorder,
NULL,
kCFRunLoopCommonModes,
0,
&queue),
"AudioQueueNewInput failed");
UInt32 size = sizeof(recordFormat);
CheckError(AudioQueueGetProperty(queue,
kAudioConverterCurrentOutputStreamDescription,
&recordFormat,
&size), "Couldn't get queue's format");

m4a audio files not playing on iOS 9

I have an audio related app that has multichannel mixer to play m4a files at a time.
I'm using the AudioToolBox framework to stream audio, but on iOS9 the framework throws me exception in mixer rendering callback where i am streaming the audio files.
Interestingly apps compiled with the iOS9 SDK continue to stream the same file perfectly on iOS7/8 devices, but not iOS9.
Now i can't figure out if Apple broke something in iOS9, or we have the files encoded wrong on our end, but they play just fine on both iOS 7/8 but not 9.
Exception:
malloc: *** error for object 0x7fac74056e08: incorrect checksum for freed object - object was probably modified after being freed.
*** set a breakpoint in malloc_error_break to debug
It works for all other formats does not give any exception or any kind of memory errors but does not work for m4a format which is very surprising.
Here is a code to load files which works for wav,aif etc formats but not for m4a:
- (void)loadFiles{
AVAudioFormat *clientFormat = [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32
sampleRate:kGraphSampleRate
channels:1
interleaved:NO];
for (int i = 0; i < numFiles && i < maxBufs; i++) {
ExtAudioFileRef xafref = 0;
// open one of the two source files
OSStatus result = ExtAudioFileOpenURL(sourceURL[i], &xafref);
if (result || !xafref) {break; }
// get the file data format, this represents the file's actual data format
AudioStreamBasicDescription fileFormat;
UInt32 propSize = sizeof(fileFormat);
result = ExtAudioFileGetProperty(xafref, kExtAudioFileProperty_FileDataFormat, &propSize, &fileFormat);
if (result) { break; }
// set the client format - this is the format we want back from ExtAudioFile and corresponds to the format
// we will be providing to the input callback of the mixer, therefore the data type must be the same
double rateRatio = kGraphSampleRate / fileFormat.mSampleRate;
propSize = sizeof(AudioStreamBasicDescription);
result = ExtAudioFileSetProperty(xafref, kExtAudioFileProperty_ClientDataFormat, propSize, clientFormat.streamDescription);
if (result) { break; }
// get the file's length in sample frames
UInt64 numFrames = 0;
propSize = sizeof(numFrames);
result = ExtAudioFileGetProperty(xafref, kExtAudioFileProperty_FileLengthFrames, &propSize, &numFrames);
if (result) { break; }
if(i==metronomeBusIndex)
numFrames = (numFrames+6484)*4;
//numFrames = (numFrames * rateRatio); // account for any sample rate conversion
numFrames *= rateRatio;
// set up our buffer
mSoundBuffer[i].numFrames = (UInt32)numFrames;
mSoundBuffer[i].asbd = *(clientFormat.streamDescription);
UInt32 samples = (UInt32)numFrames * mSoundBuffer[i].asbd.mChannelsPerFrame;
mSoundBuffer[i].data = (Float32 *)calloc(samples, sizeof(Float32));
mSoundBuffer[i].sampleNum = 0;
// set up a AudioBufferList to read data into
AudioBufferList bufList;
bufList.mNumberBuffers = 1;
bufList.mBuffers[0].mNumberChannels = 1;
bufList.mBuffers[0].mData = mSoundBuffer[i].data;
bufList.mBuffers[0].mDataByteSize = samples * sizeof(Float32);
// perform a synchronous sequential read of the audio data out of the file into our allocated data buffer
UInt32 numPackets = (UInt32)numFrames;
result = ExtAudioFileRead(xafref, &numPackets, &bufList);
if (result) {
free(mSoundBuffer[i].data);
mSoundBuffer[i].data = 0;
}
// close the file and dispose the ExtAudioFileRef
ExtAudioFileDispose(xafref);
}
// [clientFormat release];
}
If anyone could point me in the right direction, how do i go about debugging the issue?
Do we need to re-encode our files in some specific way?
I tried it on iOS 9.1.beta3 yesterday and things seem to be back to normal.
Try it out. Let us know if it works out for you too.

Resources