Using PyAV to encode mono audio to file, params match docs, but still causes Errno 22 - ffmpeg

While trying to use PyAV to encode live mono audio from a microphone to a compressed audio stream (using mp2 or flac as encoder), the program kept raising an exception ValueError: [Errno 22] Invalid argument.
To remove the live microphone source as a cause of the problem, and to make the problematic code easier for others to run/test, I have removed the mic source and now just generate a pure tone as a sequence of input buffers.
All attempts to figure out the missing or mismatched or incorrect argument have just resulted in seeing documentation and examples that are the same as my code.
I would like to know from someone who has used PyAV successfully for mono audio what the correct method and parameters are for encoding mono frames into the mono stream.
The package used is av 10.0.0 installed with
pip3 install av --no-binary av
so it uses my package-manager provided ffmpeg library, which is version 4.2.7.
The problematic python code is:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
Recreating an error 22 when encoding sound with PyAV.
Created on Sun Feb 19 08:10:29 2023
#author: andrewm
import typing
import sys
import math
import fractions
import av
from av import AudioFrame
""" Ensure some PyAudio constants are still defined without changing
the PyAudio recording callback function and without depending
on PyAudio simply for reproducing the PyAV bug [Errno 22] thrown in
File "av/filter/context.pyx", line 89, in av.filter.context.FilterContext.push
class PA_Stub():
paContinue = True
paComplete= False
pyaudio = PA_Stub()
"""Generate pure tone at given frequency with amplitude 0...1.0 at
sampling frewuency fs and beginning at phase offset 'phase'.
Returns the new phase after the sinusoid has cycled over the
sampling window length.
def generate_tone(
freq:int, phase:float, amp:float, fs, samp_fmt, buffer:bytearray
) -> float:
assert samp_fmt == "s16", "Only s16 supported atm"
samp_size_bytes = 2
n_samples = int(len(buffer)/samp_size_bytes)
window = [int(0) for i in range(n_samples)]
theta = phase
phase_inc = 2*math.pi * freq / fs
for i in range(n_samples):
v = amp * math.sin(theta)
theta += phase_inc
s = int((2**15-1)*v)
window[i] = s
for sample_i in range(len(window)):
byte_i = sample_i * samp_size_bytes
enc = window[sample_i].to_bytes(
2, byteorder=sys.byteorder, signed=True
buffer[byte_i] = enc[0]
buffer[byte_i+1] = enc[1]
return theta
channels = 1
fs = 44100 # Record at 44100 samples per second
fft_size_samps = 256
chunk_samps = fft_size_samps * 10 # Record in chunks that are multiples of fft windows.
# print(f"fft_size_samps={fft_size_samps}\nchunk_samps={chunk_samps}")
seconds = 3.0
out_filename = "testoutput.wav"
# Store data in chunks for 3 seconds
sample_limit = int(fs * seconds)
sample_len = 0
frames = [] # Initialize array to store frames
ffmpeg_codec_name = 'mp2' # flac, mp3, or libvorbis make same error.
sample_size_bytes = 2
buffer = bytearray(int(chunk_samps*sample_size_bytes))
chunkperiod = chunk_samps / fs
total_chunks = int(math.ceil(seconds / chunkperiod))
phase = 0.0
### uncomment if you want to see the synthetic data being used as a mic input.
# with open("test.raw","wb") as raw_out:
# for ci in range(total_chunks):
# phase = generate_tone(2600, phase, 0.8, fs, "s16", buffer)
# raw_out.write(buffer)
# print("finished gen test")
# sys.exit(0)
# #----
# Using mp2 or mkv as the container format gets the same error.
with'.mp2', "w", format="mp2") as output_con:
output_con.metadata["title"] = "My title"
output_con.metadata["key"] = "value"
channel_layout = "mono"
sample_fmt = "s16p"
ostream = output_con.add_stream(ffmpeg_codec_name, fs, layout=channel_layout)
assert ostream is not None, "No stream!"
cctx = ostream.codec_context
cctx.sample_rate = fs
cctx.time_base = fractions.Fraction(numerator=1,denominator=fs)
cctx.format = sample_fmt
cctx.channels = channels
cctx.layout = channel_layout
print(cctx, f"layout#{cctx.channel_layout}")
# Define PyAudio-style callback for recording plus PyAV transcoding.
def rec_callback(in_data, frame_count, time_info, status):
global sample_len
global ostream
nsamples = int(len(in_data) / (channels*sample_size_bytes))
frame = AudioFrame(format=sample_fmt, layout=channel_layout, samples=nsamples)
frame.sample_rate = fs
frame.time_base = fractions.Fraction(numerator=1,denominator=fs)
frame.pts = sample_len
print(frame, len(in_data))
for out_packet in ostream.encode(frame):
for out_packet in ostream.encode(None):
sample_len += nsamples
retflag = pyaudio.paContinue if sample_len<sample_limit else pyaudio.paComplete
return (in_data, retflag)
### some e.g. PyAudio code which starts the recording process normally.
# istream =
# format=sample_format,
# channels=channels,
# rate=fs,
# frames_per_buffer=chunk_samps,
# input=True,
# stream_callback=rec_callback
# )
# print(istream)
# Normally at this point you just sleep the main thread while
# PyAudio calls back with mic data, but here it is all generated.
for ci in range(total_chunks):
phase = generate_tone(2600, phase, 0.8, fs, "s16", buffer)
ret_data, ret_flag = rec_callback(buffer, ci, {}, 1)
print('.', end='')
print(" closing.")
# Stop and close the istream
# istream.stop_stream()
# istream.close()
If you uncomment the RAW output part you will find the generated data can be imported as PCM s16 Mono 44100Hz into Audacity and plays the expected tone, so the generated audio data does not seem to be the problem.
The normal program console output up until the exception is:
<av.AudioCodecContext audio/mp2 at 0x7f8e38202cf0> layout#4
<av.AudioFrame 0, pts=0, 2560 samples at 44100Hz, mono, s16p at 0x7f8e38202eb0> 5120
.<av.AudioFrame 0, pts=2560, 2560 samples at 44100Hz, mono, s16p at 0x7f8e382025f0> 5120
The stack trace is:
Traceback (most recent call last):
File "Dev/multichan_recording/", line 147, in <module>
ret_data, ret_flag = rec_callback(buffer, ci, {}, 1)
File "Dev/multichan_recording/", line 121, in rec_callback
for out_packet in ostream.encode(frame):
File "av/stream.pyx", line 153, in
File "av/codec/context.pyx", line 484, in av.codec.context.CodecContext.encode
File "av/audio/codeccontext.pyx", line 42, in
File "av/audio/resampler.pyx", line 101, in
File "av/filter/graph.pyx", line 211, in av.filter.graph.Graph.push
File "av/filter/context.pyx", line 89, in av.filter.context.FilterContext.push
File "av/error.pyx", line 336, in av.error.err_check
ValueError: [Errno 22] Invalid argument
edit: It's interesting that the error happens on the 2nd AudioFrame, as apparently the first one was encoded okay, because they are given the same attribute values aside from the Presentation Time Stamp (pts), but leaving this out and letting PyAV/ffmpeg generate the PTS by itself does not fix the error, so an incorrect PTS does not seem the cause.
After a brief glance in av/filter/context.pyx the exception must come from a bad return value from res = lib.av_buffersrc_write_frame(self.ptr, frame.ptr)
Trying to dig into av_buffersrc_write_frame from the ffmpeg source it is not clear what could be causing this error. The only obvious one is a mismatch between channel layouts, but my code is setting the layout the same in the Stream and the Frame. That problem had been found by an old question pyav - cannot save stream as mono and their answer (that one parameter required is undocumented) is the only reason the code now has the layout='mono' argument when making the stream.
The program output shows layout #4 is being used, and from you can see this is the value for symbol AV_CH_FRONT_CENTER which is the only channel in the MONO layout.
The mismatch is surely some other object property or an undocumented parameter requirement.
How do you encode mono audio to a compressed stream with PyAV?


Google assistant - Rspi 3 - "sounddevice.PortAudioError: Error querying device -1"

Installed google assistant sdk on raspi3, the speaker is a home mini bluetooth, it is paired and connected to raspi, played from youtube and it works! even google says it's connected!
However, when running command in terminal as (env) "googlesamples-assistant-pushtotalk --project-id (not going to paste ID) --device-model-id" I get the following:
/home/pi/env/lib/python3.5/site-packages/google/auth/crypt/ CryptographyDeprecationWarning: Python 3.5 support will be dropped in the next release of cryptography. Please upgrade your Python.
import cryptography.exceptions
INFO:root:Connecting to
Traceback (most recent call last):
File "/home/pi/env/bin/googlesamples-assistant-pushtotalk", line 8, in
File "/home/pi/env/lib/python3.5/site-packages/click/", line 722, in call
return self.main(*args, **kwargs)
File "/home/pi/env/lib/python3.5/site-packages/click/", line 697, in main
rv = self.invoke(ctx)
File "/home/pi/env/lib/python3.5/site-packages/click/", line 895, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/pi/env/lib/python3.5/site-packages/click/", line 535, in invoke
return callback(*args, **kwargs)
File "/home/pi/env/lib/python3.5/site-packages/googlesamples/assistant/grpc/", line 351, in main
File "/home/pi/env/lib/python3.5/site-packages/googlesamples/assistant/grpc/", line 190, in init
blocksize=int(block_size/2), # blocksize is in number of frames.
File "/home/pi/env/lib/python3.5/site-packages/", line 1345, in init
File "/home/pi/env/lib/python3.5/site-packages/", line 762, in init
File "/home/pi/env/lib/python3.5/site-packages/", line 2571, in _get_stream_parameters
info = query_devices(device)
File "/home/pi/env/lib/python3.5/site-packages/", line 569, in query_devices
raise PortAudioError('Error querying device {0}'.format(device))
sounddevice.PortAudioError: Error querying device -1
When using arecord -l or aplay -l in terminal, get the same message for both: "aplay: device_list:270: no soundcards found..."
Also, running test in terminal using "speaker-test -t wav", the test runs but no sound is working"
" speaker-test 1.1.3
Playback device is default
Stream parameters are 48000Hz, S16_LE, 1 channels
WAV file(s)
Rate set to 48000Hz (requested 48000Hz)
Buffer size range from 9600 to 4194304
Period size range from 480 to 4096
Using max buffer size 4194304
Periods = 4
was set period_size = 4096
was set buffer_size = 4194304
0 - Front Left
Time per period = 0.339021
0 - Front Left
Time per period = 0.315553
0 - Front Left
Time per period = 0.315577
*Keeps generating but with no sound."
Finally, going through sudo nano /home/pi/.asoundrc file, when connected to speaker is:
pcm.!default {
type plug
slave.pcm {
type bluealsa
device "x❌x❌x:x"
profile "a2dp"
ctl.!default {
type bluealsa
AND when going to "sudo nano /etc/asound.conf" it seems that it generates another code, when also connected to same speaker:
pcm.!default {
type asym
capture.pcm "mic"
playback.pcm "speaker"
pcm.mic {
type plug
slave.pcm {
type bluealsa device "x❌x❌x:x"
profile "sco"
pcm.speaker {
type plug
slave.pcm {
type bluealsa device "x❌x❌x:x"
profile "sco"
I tried copy/paste code of /etc/asound.conf into /home/pi/.asoundrc and run speaker-test -t wav, but i get:
speaker-test 1.1.3
Playback device is default
Stream parameters are 48000Hz, S16_LE, 1 channels
WAV file(s)
ALSA lib bluealsa-pcm.c:680:(_snd_pcm_bluealsa_open) Couldn't get BlueALSA transport: No such device
Playback open error: -19,No such device"
So, whats the deal??

How can you decode output sequences from TFGPT2Model?

I'm trying to get generated text from the TFGPT2Model in the Transformers library. I can see the output tensor, but I'm not able to decode it. Is the tokenizer not compatible with the TF model for decoding?
Code is:
import tensorflow as tf
from transformers import (
model_name = "gpt2-medium"
config = GPT2Config.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = TFGPT2Model.from_pretrained(model_name, config=config)
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute",
add_special_tokens=True))[None, :] # Batch size 1
outputs = model(input_ids)
result = tokenizer.decode(outputs[0])
The resulting output is:
$ python
2020-04-16 23:43:11.753181: I tensorflow/stream_executor/platform/default/] Successfully opened dynamic library
2020-04-16 23:43:11.777487: I tensorflow/stream_executor/platform/default/] Successfully opened dynamic library
2020-04-16 23:43:27.617982: W tensorflow/python/util/] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
2020-04-16 23:43:27.693316: I tensorflow/stream_executor/platform/default/] Successfully opened dynamic library
2020-04-16 23:43:27.824075: I tensorflow/stream_executor/cuda/] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA n
ode, so returning NUMA node zero
2020-04-16 23:43:38.149860: I tensorflow/core/common_runtime/gpu/] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10565 MB memory) -> physical GPU (device: 1, name: Tesla K80, pci bus id: 0000:25:00.0, compute capability: 3.7)
2020-04-16 23:43:38.150217: I tensorflow/stream_executor/cuda/] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-04-16 23:43:38.150913: I tensorflow/core/common_runtime/gpu/] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10565 MB memory) -> physical GPU (device: 2, name: Tesla K80, pci bus id: 0000:26:00.0, compute capability: 3.7)
2020-04-16 23:43:44.438587: I tensorflow/stream_executor/platform/default/] Successfully opened dynamic library
[[[ 0.671073 0.60760975 -0.10744217 ... -0.51132596 -0.3369941
[ 0.6403012 0.00396247 0.7443729 ... 0.2058892 -0.43869907
0.2180479 ]
[ 0.5131284 -0.35192695 0.12285632 ... -0.30060387 -1.0279727
[ 0.3083361 -0.05588413 1.0543617 ... -0.11589152 -1.0487361
[ 0.70787597 -0.40516227 0.4160383 ... 0.44217822 -0.34975922
[-0.03940453 -0.1243843 0.40204537 ... 0.04586177 -0.48230025
0.5768887 ]]], shape=(1, 6, 1024), dtype=float32)
Traceback (most recent call last):
File "", line 19, in <module>
result = tokenizer.decode(outputs[0])
File "/home/.../transformers/src/transformers/", line 1605, in decode
filtered_tokens = self.convert_ids_to_tokens(token_ids, skip_special_tokens=skip_special_tokens)
File "/home/.../transformers/src/transformers/", line 1575, in convert_ids_to_tokens
index = int(index)
File "/home/.../venv/lib/python3.7/site-packages/tensorflow_core/python/framework/", line 853, in __int__
return int(self._numpy())
TypeError: only size-1 arrays can be converted to Python scalars
(I removed all the TF messages and modified paths of my environment)
Apparently, you are using the wrong GPT2-Model. I tried your example by using the GPT2LMHeadModel which is the same Transformer just with a language modeling head on top. It also returns prediction_scores. In addition to that, you need to use model.generate(input_ids) in order to get an output for decoding. By default, a greedy search is performed.
import tensorflow as tf
from transformers import (
model_name = "gpt2-medium"
config = GPT2Config.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = TFGPT2LMHeadModel.from_pretrained(model_name, config=config)
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
outputs = model.generate(input_ids=input_ids)
result = tokenizer.decode(outputs[0])

Pyautogui and pyscreeze crash with windll.user32.ReleaseDC failed

I'm trying to compare certain pixel values in my pyautogui script, but it crashes with following error message after either multiple successful runs, or sometimes just straight on the first call:
Traceback (most recent call last):
File "F:\Koodit\Python\HeroWars NNet\Assets\", line 219, in <module>
battle = observeBattle()
File "F:\Koodit\Python\HeroWars NNet\Assets\", line 180, in observeBattle
statii = getHeroBattlePixels()
File "F:\Koodit\Python\HeroWars NNet\Assets\", line 32, in getHeroBattlePixels
colormatch = pyautogui.pixelMatchesColor(location[0], location[1], alive, tolerance=5)
File "E:\Program Files\Python\lib\site-packages\pyscreeze\", line 557, in pixelMatchesColor
pix = pixel(x, y)
File "E:\Program Files\Python\lib\site-packages\pyscreeze\", line 582, in pixel
return (r, g, b)
File "E:\Program Files\Python\lib\", line 120, in __exit__
File "E:\Program Files\Python\lib\site-packages\pyscreeze\", line 111, in __win32_openDC
raise WindowsError("windll.user32.ReleaseDC failed : return 0")
OSError: windll.user32.ReleaseDC failed : return 0
My code (this is called multiple times, sometimes it crashes on first run, sometimes it runs nicely for around 100 calls before failing, also, my screen is 4K, so the resolutions get big):
def getSomePixelStatuses():
someLocations= [
[1200, 990],
[1300, 990],
[1400, 990],
[1500, 990],
[1602, 990],
[1768, 990],
[1868, 990],
[1968, 990],
[2068, 990],
[2169, 990]
status = []
someValue= (92, 13, 12)
for location in someLocations:
colormatch = pyautogui.pixelMatchesColor(location[0], location[1], someValue, tolerance=5)
return status
I have no idea how to mitigate this problem. It would seem that pyautogui uses pyscreeze to read pixel values on screen, and most probable candidate for the place where error occurs is the pyscreeze pixel function:
def pixel(x, y):
if sys.platform == 'win32':
# On Windows, calling GetDC() and GetPixel() is twice as fast as using our screenshot() function.
with __win32_openDC(0) as hdc: # handle will be released automatically
color = windll.gdi32.GetPixel(hdc, x, y)
if color < 0:
raise WindowsError("windll.gdi32.GetPixel failed : return {}".format(color))
# color is in the format 0xbbggrr
bbggrr = "{:0>6x}".format(color) # bbggrr => 'bbggrr' (hex)
b, g, r = (int(bbggrr[i:i+2], 16) for i in range(0, 6, 2))
return (r, g, b)
# Need to select only the first three values of the color in
# case the returned pixel has an alpha channel
return RGB(*(screenshot().getpixel((x, y))[:3]))
I installed these libraries just yesterday, and I'm running python 3.8 on windows 10, and pyscreeze is version 0.1.25 so in theory everything should be up to date, but somehow something ends up crashing. Is there a way to mitigate this, either modifying my code, or even the library itself, or is my environment not suitable for this operation?
Well I know it's not particularly helpful; but for me, this error was fixed simply by running my code on 3.7 instead of 3.8. There shouldn't be any changes you have to make to your code, however (unless you were using walrus!)
On Windows, this can be done with the -3.7 command line flag, as long as 3.7 is installed
PyScreeze and PyAutoGUI maintainer here. This is an issue that has been fixed in PyScreeze 0.1.28, so you just need to update it by running pip install -U pyscreeze.
For more context, here's the GitHub issue where it was reported:
It's a bug. You were on the right track, as the problem is indeed in this line of the pixel() function:
with __win32_openDC(0) as hdc
That function uses cyptes.windll which doesn't seem to do well with the negative values sometimes returned from windll.user32.GetDC(), which subsequently creates an exception when windll.user32.ReleaseDC() is called.
The folks at pillow helped track this down and propose a fix.
issue filed at pyautogui
issue filed at pillow which led to the solution
pending PR at pyscreeze to address
I can use pixel function on Python 3.8 like this:
a = pixel(100,100)
> except:
> a = pixel(100,100)
I don't have any clue why this works, but it works.
I had this error too and i fixed it. Just use try and except.
While true:
x,y = pyautogui.position()
print("Cannot get pixel for the moment")
Given that you might be taking pixels multiple times, or you can do so, try and except works wonders to solve any pyscreeze for pyautogui issue. Honestly i dont know whats up with pyscreeze, but this works for me. Cheers

libav gives audio duration as negative

I am trying to make a simple av player, and in some cases I am getting values correctly as below:
checking /media/timecapsule/Music/02 Baawre.mp3
[mp3 # 0x7f0698005660] Skipping 0 bytes of junk at 2102699.
dur is 4396400640
duration is 311
However, in other places, I am getting negative durations:
checking /media/timecapsule/Music/01 Just Chill.mp3
[mp3 # 0x7f0694005f20] Skipping 0 bytes of junk at 1318922.
dur is -9223372036854775808
duration is -653583619391
I am not sure what's causing the duration to end up negative only in some audio files. Any ideas to where I might be wrong are welcome!
Source code here
I would suggest two things:
1. Make sure that failed files are not corrupt, i.e. you can use ffmpeg command line tool to dump details.
2. Break this in 2 if conditions to avoid order of operation and ensure open succeeded.
if(!(avformat_open_input(&container, name, NULL, NULL) < 0 && avformat_find_stream_info(container, NULL) < 0)) {
Also you can use av_dump_format to ensure that it headers are correct. See ex -

(Python 2.7.6) IOError: [Errno 2] No such file or directory: OSX Yosemite

Sorry first time doing this -
Essentially I have obtained a code that can convert .wav audio files to an .stl file. I am trying to run "" in Terminal, but end up with this IOError no file or directory error.
Can anyone please help?
Last login: Wed Mar 18 22:27:04 on ttys001
/var/folders/1_/q5syr5b51zn6y9yvxd7v8mhw0000gn/T/Cleanup\ At\ Startup/ ; exit;
Jags-MacBook-Pro:~ jag$ /var/folders/1_/q5syr5b51zn6y9yvxd7v8mhw0000gn/T/Cleanup\ At\ Startup/ ; exit;
Traceback (most recent call last):
File "/private/var/folders/1_/q5syr5b51zn6y9yvxd7v8mhw0000gn/T/Cleanup At Startup/", line 24, in <module>
w =, 'r')
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/", line 502, in open
return Wave_read(f)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/", line 159, in __init__
f =, 'rb')
IOError: [Errno 2] No such file or directory: 'yourbody.wav'
[Process completed]
This is my code:
import wave
import math
import struct
bitDepth = 8#target bitDepth
frate = 44100#target frame rate
fileName = "yourbody.wav"#file to be imported (change this)
#read file and get data
w =, 'r')
numframes = w.getnframes()
frame = w.readframes(numframes)#w.getnframes()
frameInt = map(ord, list(frame))#turn into array
#separate left and right channels and merge bytes
frameOneChannel = [0]*numframes#initialize list of one channel of wave
for i in range(numframes):
frameOneChannel[i] = frameInt[4*i+1]*2**8+frameInt[4*i]#separate channels and store one channel in new list
if frameOneChannel[i] > 2**15:
frameOneChannel[i] = (frameOneChannel[i]-2**16)
elif frameOneChannel[i] == 2**15:
frameOneChannel[i] = 0
frameOneChannel[i] = frameOneChannel[i]
#convert to string
audioStr = ''
for i in range(numframes):
audioStr += str(frameOneChannel[i])
audioStr += ","#separate elements with comma
fileName = fileName[:-3]#remove .wav extension
text_file = open(fileName+"txt", "w")
IOError: [Errno 2] No such file or directory: 'yourbody.wav'
The file 'yourbody.wav' is not found in the directory where you executed your program.
