Deepstream back to back detectors with DashCamNetand VehicleMakeNet not classifying correctly - nvidia-deepstream

Question also posted on their forum here: https://forums.developer.nvidia.com/t/deepstream-back-to-back-detectors-with-dashcamnetand-vehiclemakenet-not-classifying-correctly/220606
Hello,
Hardware Platform (Jetson / GPU) dGPU RTX 3080 on Ubuntu 20.04.1
DeepStream Version 6.1
TensorRT Version 8.4.1
NVIDIA GPU Driver Version (valid for GPU only) 515.48.07
I'm trying to use the back-to-back-detectors reference C application with the DashCamNet and VehicleMakeNet models. The DashCamNet detector works, but the VehicleMakeNet classifier is only outputing whatever the first entry in the labels file is (Acura in this case).
The changes I've made is from lines 89 to 103, I've replaced that all with an if statement that will print out any classifier metadata if it exists along with instantiating variables, changing the config names and changing the sink to "fake-renderer".
if (obj_meta->classifier_meta_list) {
class_meta = (NvDsClassifierMeta * )(obj_meta->classifier_meta_list->data);
if (class_meta->label_info_list) {
label_info = (NvDsLabelInfo * )(class_meta->label_info_list->data);
g_print("Result: %s\n", label_info->result_label);
}
}
DashCamNet Configuration:
[property]
gpu-id=0
net-scale-factor=0.00392156862745098
offsets=0.0;0.0;0.0
tlt-model-key=tlt_encode
tlt-encoded-model=models/tao_pretrained_models/dashcamnet/resnet18_dashcamnet_pruned.etlt
labelfile-path=models/tao_pretrained_models/dashcamnet/labels.txt
int8-calib-file=models/tao_pretrained_models/dashcamnet/dashcamnet_int8.txt
model-engine-file=models/tao_pretrained_models/dashcamnet/resnet18_dashcamnet_pruned.etlt_b1_gpu0_int8.engine
infer-dims=3;544;960
uff-input-blob-name=input_1
batch-size=1
process-mode=1
model-color-format=0
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=1
num-detected-classes=4
interval=0
gie-unique-id=1
output-blob-names=output_cov/Sigmoid;output_bbox/BiasAdd
model-color-format=0
maintain-aspect-ratio=0
output-tensor-meta=0
[class-attrs-all]
pre-cluster-threshold=0.2
group-threshold=1
## Set eps=0.7 and minBoxes for cluster-mode=1(DBSCAN)
eps=0.2
#minBoxes=3
VehicleMakeNet
[property]
batch-size=4
classifier-threshold=0.95
gie-unique-id=4
gpu-id=0
input-dims=3;224;224;0
int8-calib-file=models/VehicleMake/vehiclemakenet_int8.txt
labelfile-path=models/VehicleMake/labels_vehiclemakenet.txt
model-color-format=0
model-engine-file=models/VehicleMake/resnet18_vehiclemakenet_pruned.etlt_b4_gpu0_int8.engine
net-scale-factor=1
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=1
network-type=1
num-detected-classes=4
offsets=124;117;104
operate-on-gie-id=1
output-blob-names=predictions/Softmax
process-mode=2
tlt-encoded-model=models/VehicleMake/resnet18_vehiclemakenet_pruned.etlt
tlt-model-key=tlt_encode
uff-input-blob-name=input_1
Full code
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gst/gst.h>
#include <glib.h>
#include <stdio.h>
#include "gstnvdsmeta.h"
#include <cuda_runtime_api.h>
#define MAX_DISPLAY_LEN 64
#define PGIE_CLASS_ID_VEHICLE 0
#define PGIE_CLASS_ID_PERSON 2
#define SGIE_CLASS_ID_LP 1
#define SGIE_CLASS_ID_FACE 0
/* Change this to 0 to make the 2nd detector act as a primary(full-frame) detector.
* When set to 1, it will act as secondary(operates on primary detected objects). */
#define SECOND_DETECTOR_IS_SECONDARY 1
/* The muxer output resolution must be set if the input streams will be of
* different resolution. The muxer will scale all the input frames to this
* resolution. */
#define MUXER_OUTPUT_WIDTH 1280
#define MUXER_OUTPUT_HEIGHT 720
/* Muxer batch formation timeout, for e.g. 40 millisec. Should ideally be set
* based on the fastest source's framerate. */
#define MUXER_BATCH_TIMEOUT_USEC 40000
gint frame_number = 0;
gchar pgie_classes_str[4][32] = { "Vehicle", "TwoWheeler", "Person",
"Roadsign"
};
#define PRIMARY_DETECTOR_UID 1
#define SECONDARY_DETECTOR_UID 2
/* nvvidconv_sink_pad_buffer_probe will extract metadata received on nvvideoconvert sink pad
* and update params for drawing rectangle, object information etc. */
static GstPadProbeReturn
nvvidconv_sink_pad_buffer_probe (GstPad * pad, GstPadProbeInfo * info,
gpointer u_data)
{
GstBuffer *buf = (GstBuffer *) info->data;
NvDsObjectMeta *obj_meta = NULL;
guint vehicle_count = 0;
guint person_count = 0;
guint face_count = 0;
guint lp_count = 0;
NvDsMetaList * l_frame = NULL;
NvDsMetaList * l_obj = NULL;
NvDsDisplayMeta *display_meta = NULL;
NvDsClassifierMeta *class_meta = NULL;
NvDsLabelInfo *label_info = NULL;
NvDsBatchMeta *batch_meta = gst_buffer_get_nvds_batch_meta (buf);
for (l_frame = batch_meta->frame_meta_list; l_frame != NULL;
l_frame = l_frame->next) {
NvDsFrameMeta *frame_meta = (NvDsFrameMeta *) (l_frame->data);
int offset = 0;
for (l_obj = frame_meta->obj_meta_list; l_obj != NULL;
l_obj = l_obj->next) {
obj_meta = (NvDsObjectMeta *) (l_obj->data);
/* Check that the object has been detected by the primary detector
* and that the class id is that of vehicles/persons. */
if (obj_meta->unique_component_id == PRIMARY_DETECTOR_UID) {
if (obj_meta->class_id == PGIE_CLASS_ID_VEHICLE)
vehicle_count++;
if (obj_meta->class_id == PGIE_CLASS_ID_PERSON)
person_count++;
}
if (obj_meta->classifier_meta_list) {
class_meta = (NvDsClassifierMeta * )(obj_meta->classifier_meta_list->data);
if (class_meta->label_info_list) {
label_info = (NvDsLabelInfo * )(class_meta->label_info_list->data);
g_print("Result: %s\n", label_info->result_label);
}
}
}
display_meta = nvds_acquire_display_meta_from_pool(batch_meta);
NvOSD_TextParams *txt_params = &display_meta->text_params[0];
display_meta->num_labels = 1;
txt_params->display_text = g_malloc0 (MAX_DISPLAY_LEN);
offset = snprintf(txt_params->display_text, MAX_DISPLAY_LEN, "Person = %d ", person_count);
offset += snprintf(txt_params->display_text + offset , MAX_DISPLAY_LEN, "Vehicle = %d ", vehicle_count);
offset += snprintf(txt_params->display_text + offset , MAX_DISPLAY_LEN, "Face = %d ", face_count);
offset += snprintf(txt_params->display_text + offset , MAX_DISPLAY_LEN, "License Plate = %d ", lp_count);
/* Now set the offsets where the string should appear */
txt_params->x_offset = 10;
txt_params->y_offset = 12;
/* Font , font-color and font-size */
txt_params->font_params.font_name = "Serif";
txt_params->font_params.font_size = 10;
txt_params->font_params.font_color.red = 1.0;
txt_params->font_params.font_color.green = 1.0;
txt_params->font_params.font_color.blue = 1.0;
txt_params->font_params.font_color.alpha = 1.0;
/* Text background color */
txt_params->set_bg_clr = 1;
txt_params->text_bg_clr.red = 0.0;
txt_params->text_bg_clr.green = 0.0;
txt_params->text_bg_clr.blue = 0.0;
txt_params->text_bg_clr.alpha = 1.0;
nvds_add_display_meta_to_frame(frame_meta, display_meta);
}
g_print ("Frame Number = %d Vehicle Count = %d Person Count = %d"
" Face Count = %d License Plate Count = %d\n",
frame_number, vehicle_count, person_count,
face_count, lp_count);
frame_number++;
return GST_PAD_PROBE_OK;
}
static gboolean
bus_call (GstBus * bus, GstMessage * msg, gpointer data)
{
GMainLoop *loop = (GMainLoop *) data;
switch (GST_MESSAGE_TYPE (msg)) {
case GST_MESSAGE_EOS:
g_print ("End of stream\n");
g_main_loop_quit (loop);
break;
case GST_MESSAGE_ERROR:{
gchar *debug;
GError *error;
gst_message_parse_error (msg, &error, &debug);
g_printerr ("ERROR from element %s: %s\n",
GST_OBJECT_NAME (msg->src), error->message);
if (debug)
g_printerr ("Error details: %s\n", debug);
g_free (debug);
g_error_free (error);
g_main_loop_quit (loop);
break;
}
default:
break;
}
return TRUE;
}
int
main (int argc, char *argv[])
{
GMainLoop *loop = NULL;
GstElement *pipeline = NULL, *source = NULL, *h264parser = NULL,
*decoder = NULL, *streammux = NULL, *sink = NULL, *primary_detector = NULL,
*secondary_detector = NULL, *nvvidconv = NULL, *nvosd = NULL;
GstElement *transform = NULL;
GstBus *bus = NULL;
guint bus_watch_id;
GstPad *nvvidconv_sink_pad = NULL;
int current_device = -1;
cudaGetDevice(&current_device);
struct cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, current_device);
/* Check input arguments */
if (argc != 2) {
g_printerr ("Usage: %s <H264 filename>\n", argv[0]);
return -1;
}
/* Standard GStreamer initialization */
gst_init (&argc, &argv);
loop = g_main_loop_new (NULL, FALSE);
/* Create gstreamer elements */
/* Create Pipeline element that will form a connection of other elements */
pipeline = gst_pipeline_new ("pipeline");
/* Source element for reading from the file */
source = gst_element_factory_make ("filesrc", "file-source");
/* Since the data format in the input file is elementary h264 stream,
* we need a h264parser */
h264parser = gst_element_factory_make ("h264parse", "h264-parser");
/* Use nvdec_h264 for hardware accelerated decode on GPU */
decoder = gst_element_factory_make ("nvv4l2decoder", "nvv4l2-decoder");
/* Create nvstreammux instance to form batches from one or more sources. */
streammux = gst_element_factory_make ("nvstreammux", "stream-muxer");
if (!pipeline || !streammux) {
g_printerr ("One element could not be created. Exiting.\n");
return -1;
}
/* Create two nvinfer instances for the two back-to-back detectors */
primary_detector = gst_element_factory_make ("nvinfer", "primary-nvinference-engine1");
secondary_detector = gst_element_factory_make ("nvinfer", "primary-nvinference-engine2");
/* Use convertor to convert from NV12 to RGBA as required by nvosd */
nvvidconv = gst_element_factory_make ("nvvideoconvert", "nvvideo-converter");
/* Create OSD to draw on the converted RGBA buffer */
nvosd = gst_element_factory_make ("nvdsosd", "nv-onscreendisplay");
/* Finally render the osd output */
if(prop.integrated) {
transform = gst_element_factory_make ("nvegltransform", "nvegl-transform");
}
sink = gst_element_factory_make("fakesink", "fake-renderer");
if (!source || !h264parser || !decoder || !primary_detector || !secondary_detector
|| !nvvidconv || !nvosd || !sink) {
g_printerr ("One element could not be created. Exiting.\n");
return -1;
}
if(prop.integrated) {
if(!transform) {
g_printerr ("One tegra element could not be created. Exiting.\n");
return -1;
}
}
/* we set the input filename to the source element */
g_object_set (G_OBJECT (source), "location", argv[1], NULL);
g_object_set (G_OBJECT (streammux), "width", MUXER_OUTPUT_WIDTH, "height",
MUXER_OUTPUT_HEIGHT, "batch-size", 1,
"batched-push-timeout", MUXER_BATCH_TIMEOUT_USEC, NULL);
/* Set the config files for the two detectors. We demonstrate this by using
* the same detector model twice but making them act as vehicle-only and
* person-only detectors by adjusting the bbox confidence thresholds in the
* two seperate config files. */
g_object_set (G_OBJECT (primary_detector), "config-file-path", "dashcamnet_config.txt",
"unique-id", PRIMARY_DETECTOR_UID, NULL);
g_object_set (G_OBJECT (secondary_detector), "config-file-path", "vehicletypenet_sgie_config.txt",
"unique-id", SECONDARY_DETECTOR_UID, "process-mode", 2, NULL);
/* we add a message handler */
bus = gst_pipeline_get_bus (GST_PIPELINE (pipeline));
bus_watch_id = gst_bus_add_watch (bus, bus_call, loop);
gst_object_unref (bus);
/* Set up the pipeline */
/* we add all elements into the pipeline */
if(prop.integrated) {
gst_bin_add_many (GST_BIN (pipeline),
source, h264parser, decoder, streammux, primary_detector, secondary_detector,
nvvidconv, nvosd, transform, sink, NULL);
} else {
gst_bin_add_many (GST_BIN (pipeline),
source, h264parser, decoder, streammux, primary_detector, secondary_detector,
nvvidconv, nvosd, sink, NULL);
}
GstPad *sinkpad, *srcpad;
gchar pad_name_sink[16] = "sink_0";
gchar pad_name_src[16] = "src";
sinkpad = gst_element_get_request_pad (streammux, pad_name_sink);
if (!sinkpad) {
g_printerr ("Streammux request sink pad failed. Exiting.\n");
return -1;
}
srcpad = gst_element_get_static_pad (decoder, pad_name_src);
if (!srcpad) {
g_printerr ("Decoder request src pad failed. Exiting.\n");
return -1;
}
if (gst_pad_link (srcpad, sinkpad) != GST_PAD_LINK_OK) {
g_printerr ("Failed to link decoder to stream muxer. Exiting.\n");
return -1;
}
gst_object_unref (sinkpad);
gst_object_unref (srcpad);
/* we link the elements together */
/* file-source -> h264-parser -> nvh264-decoder ->
* nvinfer -> nvvidconv -> nvosd -> video-renderer */
if (!gst_element_link_many (source, h264parser, decoder, NULL)) {
g_printerr ("Elements could not be linked: 1. Exiting.\n");
return -1;
}
if(prop.integrated) {
if (!gst_element_link_many (streammux, primary_detector, secondary_detector,
nvvidconv, nvosd, transform, sink, NULL)) {
g_printerr ("Elements could not be linked: 2. Exiting.\n");
return -1;
}
} else {
if (!gst_element_link_many (streammux, primary_detector, secondary_detector,
nvvidconv, nvosd, sink, NULL)) {
g_printerr ("Elements could not be linked: 2. Exiting.\n");
return -1;
}
}
/* Lets add probe to get informed of the meta data generated, we add probe to
* the sink pad of the nvvideoconvert element, since by that time, the buffer would have
* had got all the metadata. */
nvvidconv_sink_pad = gst_element_get_static_pad (nvvidconv, "sink");
if (!nvvidconv_sink_pad)
g_print ("Unable to get sink pad\n");
else
gst_pad_add_probe (nvvidconv_sink_pad, GST_PAD_PROBE_TYPE_BUFFER,
nvvidconv_sink_pad_buffer_probe, NULL, NULL);
/* Set the pipeline to "playing" state */
g_print ("Now playing: %s\n", argv[1]);
gst_element_set_state (pipeline, GST_STATE_PLAYING);
/* Wait till pipeline encounters an error or EOS */
g_print ("Running...\n");
g_main_loop_run (loop);
/* Out of the main loop, clean up nicely */
g_print ("Returned, stopping playback\n");
gst_element_set_state (pipeline, GST_STATE_NULL);
g_print ("Deleting pipeline\n");
gst_object_unref (GST_OBJECT (pipeline));
g_source_remove (bus_watch_id);
g_main_loop_unref (loop);
return 0;
}

I think the issue was that the labels.txt file that was provided for the model needed to be comma seperated and doesn’t work if it’s seperated by new lines.

Related

libx264 Input picture width (640) is greater than stride (0)

I'm trying to encode a series of Cairo surfaces by using libav. Here I initialize AV stuff:
AVStream* video_stream;
AVCodec* vcodec;
gint ret;
/* Setup video container */
avformat_alloc_output_context2(&img->video_format_context, NULL, NULL, filename);
if (img->video_format_context == NULL)
{
img_message(img, TRUE, _("Failed to find a suitable container for %s\n"),filename);
return FALSE;
}
ret = avio_open(&img->video_format_context->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0)
{
img_message(img, TRUE, _("Couldn't write output file %s\n"),filename);
return FALSE;
}
/* Setup video codec */
vcodec = avcodec_find_encoder(codec_id);
if (!vcodec)
{
img_message(img, TRUE, _("Couldn't find any encoder for %s\n"),filename);
return FALSE;
}
/* Create video stream */
video_stream = avformat_new_stream(img->video_format_context, vcodec);
video_stream->id = 0;
if (! video_stream)
{
img_message(img, TRUE, _("Couldn't not allocate video stream\n"));
return FALSE;
}
/* Allocate video encoding context */
img->codec_context = avcodec_alloc_context3(vcodec);
if (! img->codec_context)
{
img_message(img, TRUE, _("Couldn't allocate video enconding context\n"));
return FALSE;
}
/* Setup video enconding context parameters */
img->codec_context->codec_id = codec_id;
img->codec_context->codec_type = AVMEDIA_TYPE_VIDEO;
img->codec_context->width = img->video_size[0];
img->codec_context->height = img->video_size[1];
img->codec_context->sample_aspect_ratio = (struct AVRational) {1, 1};
img->codec_context->pix_fmt = vcodec->pix_fmts[0];
img->codec_context->framerate = av_d2q(frame_rate, INT_MAX);
if (codec_id == AV_CODEC_ID_VP8 || codec_id == AV_CODEC_ID_VP9 || codec_id == AV_CODEC_ID_THEORA || codec_id == AV_CODEC_ID_FLV1 ||
AV_CODEC_ID_MPEG1VIDEO || codec_id == AV_CODEC_ID_MPEG2VIDEO)
img->codec_context->bit_rate = round(bitrate_crf * 1000000);
img->codec_context->time_base = av_inv_q(img->codec_context->framerate);
video_stream->time_base = img->codec_context->time_base;
if (img->video_format_context->oformat->flags & AVFMT_GLOBALHEADER)
img->codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
/* Some codecs require the CRF value */
if (codec_id == AV_CODEC_ID_H264 || codec_id == AV_CODEC_ID_H265)
{
gchar *crf = g_strdup_printf("%i", bitrate_crf);
av_opt_set(img->codec_context->priv_data, "crf", crf, AV_OPT_SEARCH_CHILDREN);
g_free(crf);
}
/* Set exporting stage to be multithreaded */
AVDictionary* opts = NULL;
av_dict_set(&opts, "threads", "auto", 0);
/* Open video encoder */
ret = avcodec_open2(img->codec_context, vcodec, &opts);
if (ret < 0)
{
img_message(img, TRUE, _("Failed to open the video encoder\n"));
return FALSE;
}
/* Copy video encoder parameters to output stream */
ret = avcodec_parameters_from_context(video_stream->codecpar, img->codec_context);
if (ret < 0)
{
img_message(img, TRUE, _("Failed to copy video encoder parameters to output stream\n"));
return FALSE;
}
/* AVFRAME stuff */
img->video_frame = av_frame_alloc();
img->video_frame->format = AV_PIX_FMT_RGBA;
img->video_frame->width = img->video_size[0];
img->video_frame->height = img->video_size[1];
av_frame_make_writable(img->video_frame);
ret = av_frame_get_buffer(img->video_frame, 1);
if (ret < 0)
img_message(img,TRUE, _("Could not allocate the video frame data\n"));
img->video_packet = av_packet_alloc();
And here I called repeatedly (the function is called somewehere else) av_send_frame() but it throws the error in the subject:
gint width, height, stride, row, col, offset;
uint8_t *pix;
/* Image info and pixel data */
width = cairo_image_surface_get_width( surface );
height = cairo_image_surface_get_height( surface );
stride = cairo_image_surface_get_stride( surface );
pix = cairo_image_surface_get_data( surface );
for( row = 0; row < height; row++ )
{
for( col = 0; col < width; col++ )
{
offset = 3 * col + row * img->video_frame->linesize[0];
img->video_frame->data[0][offset + 0] = pix[0];
img->video_frame->data[0][offset + 1] = pix[1];
img->video_frame->data[0][offset + 2] = pix[2];
}
}
img_export_encode_av_frame(img->video_frame, img->video_format_context, img->codec_context, img->video_packet);
return TRUE;
}
void img_export_encode_av_frame(AVFrame *frame, AVFormatContext *fmt, AVCodecContext *ctx, AVPacket *pkt)
{
gint ret;
/* send the frame to the encoder */
ret = avcodec_send_frame(ctx, frame);
if (ret < 0)
g_print("Error sending a frame for encoding\n");
while (ret >= 0)
{
ret = avcodec_receive_packet(ctx, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0)
g_print("Error during encoding\n");
av_interleaved_write_frame(fmt, pkt);
av_packet_unref(pkt);
}
}
I googled here also but with no luck. It seems I'm the only one to encode a cairo surface. Grepping the error message in ffmpeg sources didn't help. How do I set the stride? I read ffmpeg does it for me once I allocate the buffer for the frame but in my case it seems it doesn't. Where am I wrong?

How to use Filesink in Deepstream test application to save video

Hello everyone my question is related with Nvidia Deepstream-5.0 SDK,
I am trying to run a sample test deepstream-nvdsanalytics-test which is in "/source/apps/sample_apps" in NVIDIA Deepstream container. I want to save the video file by using Filesink. I got the suggestion to look into create_encode_file_bin function which is in"/source/apps/apps-common/src/deepstream_sink_bin.c".
I tried changing the code deepstream_nvdsanalytics_test.cpp taking create_encode_file_bin as a reference but got some errors. I am posting my pipeline, edited code and error please have a look.
pipeline used-
pgie->nvtracker->nvdsanalytics->tiler->nvvidconv->nvosd->nvideoconvvert->caps
filter(x/raw)->encoder->codecparse->mux->filesink
Error-
(deepstream-nvdsanalytics-test:203): GStreamer-WARNING **:
16:08:13.115: Name ‘nvvideo-converter’ is not unique in bin
‘nvdsanalytics-test-pipeline’, not adding
(deepstream-nvdsanalytics-test:203): GStreamer-CRITICAL **:
16:08:13.116: gst_element_link_pads_full: assertion ‘GST_IS_ELEMENT
(dest)’ failed Elements could not be linked. Exiting.
code-
#include <gst/gst.h>
#include <glib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <sys/time.h>
#include <iostream>
#include <vector>
#include <unordered_map>
#include "gstnvdsmeta.h"
#include "nvds_analytics_meta.h"
#include "deepstream_config.h"
#ifndef PLATFORM_TEGRA
#include "gst-nvmessage.h"
#endif
[....]
int
main (int argc, char *argv[])
{
GMainLoop *loop = NULL;
GstElement *pipeline = NULL, *streammux = NULL, *sink = NULL, *pgie = NULL,
*nvtracker = NULL, *nvdsanalytics = NULL,
*nvvidconv = NULL, *nvosd = NULL, *nvvidconv1 = NULL, *transform1 = NULL, *cap_filter = NULL, *encoder = NULL, *codecparse = NULL, *mux = NULL, *tiler = NULL;
GstCaps *caps = NULL;
#ifdef PLATFORM_TEGRA
GstElement *transform = NULL;
#endif
GstBus *bus = NULL;
guint bus_watch_id;
GstPad *nvdsanalytics_src_pad = NULL;
guint i, num_sources;
guint tiler_rows, tiler_columns;
guint pgie_batch_size;
gulong bitrate = 2000000;
guint profile = 0;
/* Check input arguments */
if (argc < 2) {
g_printerr ("Usage: %s <uri1> [uri2] ... [uriN] \n", argv[0]);
return -1;
}
num_sources = argc - 1;
/* Standard GStreamer initialization */
gst_init (&argc, &argv);
loop = g_main_loop_new (NULL, FALSE);
/* Create gstreamer elements */
/* Create Pipeline element that will form a connection of other elements */
pipeline = gst_pipeline_new ("nvdsanalytics-test-pipeline");
/* Create nvstreammux instance to form batches from one or more sources. */
streammux = gst_element_factory_make ("nvstreammux", "stream-muxer");
if (!pipeline || !streammux) {
g_printerr ("One element could not be created. Exiting.\n");
return -1;
}
gst_bin_add (GST_BIN (pipeline), streammux);
for (i = 0; i < num_sources; i++) {
GstPad *sinkpad, *srcpad;
gchar pad_name[16] = { };
GstElement *source_bin = create_source_bin (i, argv[i + 1]);
if (!source_bin) {
g_printerr ("Failed to create source bin. Exiting.\n");
return -1;
}
gst_bin_add (GST_BIN (pipeline), source_bin);
g_snprintf (pad_name, 15, "sink_%u", i);
sinkpad = gst_element_get_request_pad (streammux, pad_name);
if (!sinkpad) {
g_printerr ("Streammux request sink pad failed. Exiting.\n");
return -1;
}
srcpad = gst_element_get_static_pad (source_bin, "src");
if (!srcpad) {
g_printerr ("Failed to get src pad of source bin. Exiting.\n");
return -1;
}
if (gst_pad_link (srcpad, sinkpad) != GST_PAD_LINK_OK) {
g_printerr ("Failed to link source bin to stream muxer. Exiting.\n");
return -1;
}
gst_object_unref (srcpad);
gst_object_unref (sinkpad);
}
/* Use nvinfer to infer on batched frame. */
pgie = gst_element_factory_make ("nvinfer", "primary-nvinference-engine");
/* Use nvtracker to track detections on batched frame. */
nvtracker = gst_element_factory_make ("nvtracker", "nvtracker");
/* Use nvdsanalytics to perform analytics on object */
nvdsanalytics = gst_element_factory_make ("nvdsanalytics", "nvdsanalytics");
/* Use nvtiler to composite the batched frames into a 2D tiled array based
* on the source of the frames. */
tiler = gst_element_factory_make ("nvmultistreamtiler", "nvtiler");
/* Use convertor to convert from NV12 to RGBA as required by nvosd */
nvvidconv = gst_element_factory_make ("nvvideoconvert", "nvvideo-converter");
if (!nvvidconv) {
g_printerr ("nvvdiconv element could not be created. Exiting.\n");
}
/* Create OSD to draw on the converted RGBA buffer */
nvosd = gst_element_factory_make ("nvdsosd", "nv-onscreendisplay");
if (!nvosd) {
g_printerr ("nvosd element could not be created. Exiting.\n");
}
/* converter to convert RGBA to NV12 */
nvvidconv1 = gst_element_factory_make ("nvvideoconvert", "nvvideo-converter1");
if (!nvvidconv1) {
g_printerr ("nvvidconv1 element could not be created. Exiting.\n");
}
/*create cap_filter */
cap_filter = gst_element_factory_make (NVDS_ELEM_CAPS_FILTER, "cap_filter");
if (!cap_filter) {
g_printerr ("cap_filter element could not be created. Exiting.\n");
}
/* create cap for filter */
caps = gst_caps_from_string ("video/x-raw, format=I420");
g_object_set (G_OBJECT (cap_filter), "caps", caps, NULL);
/* creatge encoder*/
encoder = gst_element_factory_make (NVDS_ELEM_ENC_H264_HW, "encoder");
if (!encoder) {
g_printerr ("encoder element could not be created. Exiting.\n");
}
/* create transform1 */
transform1 = gst_element_factory_make (NVDS_ELEM_VIDEO_CONV, "transform1");
g_object_set (G_OBJECT (transform1), "gpu-id", 0, NULL);
if (!transform1) {
g_printerr ("transform1 element could not be created. Exiting.\n");
}
#ifdef IS_TEGRA
g_object_set (G_OBJECT (encoder), "bufapi-version", 1, NULL);
#endif
g_object_set (G_OBJECT (encoder), "profile", profile, NULL);
g_object_set (G_OBJECT (encoder), "bitrate", bitrate, NULL);
/* create codecparse */
codecparse = gst_element_factory_make ("h264parse", "h264-parser");
if (!codecparse) {
g_printerr ("codecparse element could not be created. Exiting.\n");
}
/* create mux */
mux = gst_element_factory_make (NVDS_ELEM_MUX_MP4, "mux");
if (!mux) {
g_printerr ("mux element could not be created. Exiting.\n");
}
/* create sink */
sink = gst_element_factory_make (NVDS_ELEM_SINK_FILE, "filesink");
if (!sink) {
g_printerr ("sink element could not be created. Exiting.\n");
}
g_object_set (G_OBJECT (sink), "location", "capture.mp4", "sync", 0, "async" , FALSE, NULL);
// /* Finally render the osd output */
#ifdef PLATFORM_TEGRA
transform = gst_element_factory_make ("nvegltransform", "nvegl-transform");
#endif
// sink = gst_element_factory_make (NVDS_ELEM_SINK_FILE, "filesink");
// g_object_set (G_OBJECT (sink), "location", "capture.mp4", "sync", 0, "async" , FALSE, NULL);
if (!pgie || !nvtracker || !nvdsanalytics || !nvvidconv ||
!nvosd || !nvvidconv1 || !cap_filter || !encoder || !codecparse || !mux || !sink) {
g_printerr ("One element could not be created. Exiting.\n");
return -1;
}
#ifdef PLATFORM_TEGRA
if(!transform) {
g_printerr ("One tegra element could not be created. Exiting.\n");
return -1;
}
#endif
g_object_set (G_OBJECT (streammux), "width", MUXER_OUTPUT_WIDTH, "height",
MUXER_OUTPUT_HEIGHT, "batch-size", num_sources,
"batched-push-timeout", MUXER_BATCH_TIMEOUT_USEC, NULL);
/* Configure the nvinfer element using the nvinfer config file. */
g_object_set (G_OBJECT (pgie),
"config-file-path", "nvdsanalytics_pgie_config.txt", NULL);
/* Configure the nvtracker element for using the particular tracker algorithm. */
g_object_set (G_OBJECT (nvtracker),
"ll-lib-file", "/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_nvdcf.so",
"ll-config-file", "tracker_config.yml", "tracker-width", 640, "tracker-height", 480,
NULL);
/* Configure the nvdsanalytics element for using the particular analytics config file*/
g_object_set (G_OBJECT (nvdsanalytics),
"config-file", "config_nvdsanalytics.txt",
NULL);
/* Override the batch-size set in the config file with the number of sources. */
g_object_get (G_OBJECT (pgie), "batch-size", &pgie_batch_size, NULL);
if (pgie_batch_size != num_sources) {
g_printerr
("WARNING: Overriding infer-config batch-size (%d) with number of sources (%d)\n",
pgie_batch_size, num_sources);
g_object_set (G_OBJECT (pgie), "batch-size", num_sources, NULL);
}
tiler_rows = (guint) sqrt (num_sources);
tiler_columns = (guint) ceil (1.0 * num_sources / tiler_rows);
/* we set the tiler properties here */
g_object_set (G_OBJECT (tiler), "rows", tiler_rows, "columns", tiler_columns,
"width", TILED_OUTPUT_WIDTH, "height", TILED_OUTPUT_HEIGHT, NULL);
/* we add a message handler */
bus = gst_pipeline_get_bus (GST_PIPELINE (pipeline));
bus_watch_id = gst_bus_add_watch (bus, bus_call, loop);
gst_object_unref (bus);
/* Set up the pipeline */
/* we add all elements into the pipeline */
#ifdef PLATFORM_TEGRA
gst_bin_add_many (GST_BIN (pipeline), pgie, nvtracker, nvdsanalytics ,
nvvidconv, nvosd, nvvidconv1, cap_filter, encoder, codecparse, mux, sink,
NULL);
/* we link the elements together
* nvstreammux -> nvinfer -> nvtracker -> nvdsanalytics -> nvtiler ->
* nvvideoconvert -> nvosd -> transform -> sink
*/
if (!gst_element_link_many (streammux, pgie, nvtracker, nvdsanalytics,
nvvidconv, nvosd, nvvidconv1, cap_filter, encoder, codecparse, mux, sink, NULL)) {
g_printerr ("Elements could not be linked. Exiting.\n");
return -1;
}
#else
gst_bin_add_many (GST_BIN (pipeline), pgie, nvtracker, nvdsanalytics,
nvvidconv, nvosd, nvvidconv1, cap_filter, encoder, codecparse, mux, sink, NULL);
/* we link the elements together
* nvstreammux -> nvinfer -> nvtracker -> nvdsanalytics -> nvtiler ->
* nvvideoconvert -> nvosd -> sink
*/
if (!gst_element_link_many (streammux, pgie, nvtracker, nvdsanalytics,
nvvidconv, nvosd, nvvidconv1, cap_filter, encoder, codecparse, mux, sink, NULL)) {
g_printerr ("Elements could not be linked. Exiting.\n");
return -1;
}
#endif
/* Lets add probe to get informed of the meta data generated, we add probe to
* the sink pad of the nvdsanalytics element, since by that time, the buffer
* would have had got all the metadata.
*/
nvdsanalytics_src_pad = gst_element_get_static_pad (nvdsanalytics, "src");
if (!nvdsanalytics_src_pad)
g_print ("Unable to get src pad\n");
else
gst_pad_add_probe (nvdsanalytics_src_pad, GST_PAD_PROBE_TYPE_BUFFER,
nvdsanalytics_src_pad_buffer_probe, NULL, NULL);
/* Set the pipeline to "playing" state */
g_print ("Now playing:");
for (i = 0; i < num_sources; i++) {
g_print (" %s,", argv[i + 1]);
}
g_print ("\n");
gst_element_set_state (pipeline, GST_STATE_PLAYING);
/* Wait till pipeline encounters an error or EOS */
g_print ("Running...\n");
g_main_loop_run (loop);
/* Out of the main loop, clean up nicely */
g_print ("Returned, stopping playback\n");
gst_element_set_state (pipeline, GST_STATE_NULL);
g_print ("Deleting pipeline\n");
gst_object_unref (GST_OBJECT (pipeline));
g_source_remove (bus_watch_id);
g_main_loop_unref (loop);
return 0;
}
Please let me know if any other information is required from my side. Thank you in advance.
Modify the pipeline as per the below git diff (note: the below diff is for deepstream-test2 app, it'll work deepstream-nvdsanalytics-test as well) source:
diff --git a/deepstream-test2/deepstream_test2_app.c b/deepstream-test2/deepstream_test2_app.c
index 2b1ff34..c31441e 100644
--- a/deepstream-test2/deepstream_test2_app.c
+++ b/deepstream-test2/deepstream_test2_app.c
## -318,6 +318,16 ## main (int argc, char *argv[])
GstBus *bus = NULL;
guint bus_watch_id = 0;
GstPad *osd_sink_pad = NULL;
+
+ /* Added to save output to file */
+ GstElement *nvvidconv1 = NULL,
+ *filter1 = NULL, *filter2 = NULL,
+ *filter3 = NULL,
+ *videoconvert = NULL,
+ *filter4 = NULL,
+ *x264enc = NULL,
+ *qtmux = NULL;
+ GstCaps *caps1 = NULL, *caps2 = NULL, *caps3 = NULL, *caps4 = NULL;
/* Check input arguments */
if (argc != 2) {
## -373,17 +383,35 ## main (int argc, char *argv[])
/* Create OSD to draw on the converted RGBA buffer */
nvosd = gst_element_factory_make ("nvdsosd", "nv-onscreendisplay");
+ /* Added to save output to file */
+ nvvidconv1 = gst_element_factory_make ("nvvideoconvert", "nvvideo-converter1");
+ videoconvert = gst_element_factory_make ("videoconvert", "converter");
+ x264enc = gst_element_factory_make ("x264enc", "h264 encoder");
+ qtmux = gst_element_factory_make ("qtmux", "muxer");
+
/* Finally render the osd output */
#ifdef PLATFORM_TEGRA
transform = gst_element_factory_make ("nvegltransform", "nvegl-transform");
#endif
- sink = gst_element_factory_make ("nveglglessink", "nvvideo-renderer");
+ sink = gst_element_factory_make ("filesink", "nvvideo-renderer");
+
+ /* caps filter for nvvidconv to convert NV12 to RGBA as nvosd expects input
+ * in RGBA format */
+ filter1 = gst_element_factory_make ("capsfilter", "filter1");
+ filter2 = gst_element_factory_make ("capsfilter", "filter2");
+ filter3 = gst_element_factory_make ("capsfilter", "filter3");
+ filter4 = gst_element_factory_make ("capsfilter", "filter4");
if (!source || !h264parser || !decoder || !pgie ||
!nvtracker || !sgie1 || !sgie2 || !sgie3 || !nvvidconv || !nvosd || !sink) {
g_printerr ("One element could not be created. Exiting.\n");
return -1;
}
+ /* Added to test saving output to file */
+ if (!nvvidconv1 || !x264enc || !qtmux || !filter3 || !filter4) {
+ g_printerr ("One element could not be created. Exiting.\n");
+ return -1;
+ }
#ifdef PLATFORM_TEGRA
if(!transform) {
## -395,6 +423,9 ## main (int argc, char *argv[])
/* Set the input filename to the source element */
g_object_set (G_OBJECT (source), "location", argv[1], NULL);
+ /* Added to save output to file */
+ g_object_set (G_OBJECT (sink), "location", "out.mp4", NULL);
+
g_object_set (G_OBJECT (streammux), "batch-size", 1, NULL);
g_object_set (G_OBJECT (streammux), "width", MUXER_OUTPUT_WIDTH, "height",
## -429,9 +460,24 ## main (int argc, char *argv[])
#else
gst_bin_add_many (GST_BIN (pipeline),
source, h264parser, decoder, streammux, pgie, nvtracker, sgie1, sgie2, sgie3,
- nvvidconv, nvosd, sink, NULL);
+ filter1, nvvidconv, filter2, nvosd, nvvidconv1, filter3, videoconvert, filter4,
+ x264enc, qtmux, sink, NULL);
#endif
+ /* Added to save output to file */
+ caps1 = gst_caps_from_string ("video/x-raw(memory:NVMM), format=NV12");
+ g_object_set (G_OBJECT (filter1), "caps", caps1, NULL);
+ gst_caps_unref (caps1);
+ caps2 = gst_caps_from_string ("video/x-raw(memory:NVMM), format=RGBA");
+ g_object_set (G_OBJECT (filter2), "caps", caps2, NULL);
+ gst_caps_unref (caps2);
+ caps3 = gst_caps_from_string ("video/x-raw, format=RGBA");
+ g_object_set (G_OBJECT (filter3), "caps", caps3, NULL);
+ gst_caps_unref (caps3);
+ caps4 = gst_caps_from_string ("video/x-raw, format=NV12");
+ g_object_set (G_OBJECT (filter4), "caps", caps4, NULL);
+ gst_caps_unref (caps4);
+
GstPad *sinkpad, *srcpad;
gchar pad_name_sink[16] = "sink_0";
gchar pad_name_src[16] = "src";
## -470,7 +516,8 ## main (int argc, char *argv[])
}
#else
if (!gst_element_link_many (streammux, pgie, nvtracker, sgie1,
- sgie2, sgie3, nvvidconv, nvosd, sink, NULL)) {
+ sgie2, sgie3, filter1, nvvidconv, filter2, nvosd, nvvidconv1, filter3,
+ videoconvert, filter4, x264enc, qtmux, sink, NULL)) {
g_printerr ("Elements could not be linked. Exiting.\n");
return -1;
}
For deepstream-nvdsanalytics-test, pipeline would be:
gst_bin_add_many (GST_BIN (pipeline), pgie, nvtracker, nvdsanalytics, tiler,
filter1, nvvidconv, filter2, nvosd, nvvidconv1, filter3, videoconvert, filter4,
x264enc, qtmux, sink, NULL);

libavformat/libavcodec providing invalid container header

I'm using libavcodec to encode a stream to h264 and libavformat to store it in an mp4. The resulting container has an invalid header that can be played in VLC, but not any other player.
I've found that using the mp4 container and the "mpeg4" codec produces a valid mp4 file, but using libx265 (HEVC) or the libx264 codec produces invalid mp4s.
I can use ffmpeg -i invalid.mp4 -vcodec copy valid.mp4 and I get a file of almost the exact same size, but in a valid container.
Examples of these files are here: Broken file and
Repaied file [use the download links in the upper right to examine]
I used a hex editor to see the differences in the headers of the two files and the invalid one is 1 byte smaller than the valid one.
The code I'm using to open the container and codec and to write the header is here:
AVOutputFormat *container_format;
AVFormatContext *container_format_context;
AVStream *video_stream;
int ret;
/* allocate the output media context */
avformat_alloc_output_context2(&container_format_context, NULL, NULL, out_file);
if (!container_format_context) {
log(INFO, "Unable to determine container format from filename, exiting\n");
exit(1);
}
else {
log(INFO, "Using container %s\n", container_format_context->oformat->name);
}
if (!container_format_context) {
log(ERROR, "Could not build container format context. Encoding failed.");
exit(1);
}
container_format = container_format_context->oformat;
/* Pull codec based on name */
AVCodec* codec = avcodec_find_encoder_by_name(codec_name);
if (codec == NULL) {
log(ERROR, "Failed to locate codec \"%s\".",
codec_name);
exit(1);
}
/* create stream */
video_stream = NULL;
video_stream = avformat_new_stream(container_format_context, codec);
if (!video_stream) {
log(ERROR, "Could not allocate encoder stream. Cannot continue.\n");
exit(1);
}
video_stream->id = container_format_context->nb_streams - 1;
video_stream->time_base = video_stream->codec->time_base = (AVRational) { 1, 25};
av_dump_format(container_format_context, 0, out_file, 1);
/* Retrieve encoding context */
AVCodecContext* avcodec_context = video_stream->codec;
if (avcodec_context == NULL) {
log(ERROR, "Failed to allocate context for "
"codec \"%s\".", codec_name);
exit(1);
}
/* Init context with encoding parameters */
avcodec_context->bit_rate = bitrate;
avcodec_context->width = width;
avcodec_context->height = height;
avcodec_context->gop_size = 10;
avcodec_context->max_b_frames = 1;
avcodec_context->qmax = 31;
avcodec_context->qmin = 2;
avcodec_context->pix_fmt = AV_PIX_FMT_YUV420P;
av_dump_format(container_format_context, 0, out_file, 1);
/* Open codec for use */
if (avcodec_open2(avcodec_context, codec, NULL) < 0) {
log(ERROR, "Failed to open codec \"%s\".", codec_name);
exit(1);
}
/* Allocate corresponding frame */
AVFrame* frame = av_frame_alloc();
if (frame == NULL) {
exit(1);
}
/* Copy necessary data for frame from avcodec_context */
frame->format = avcodec_context->pix_fmt;
frame->width = avcodec_context->width;
frame->height = avcodec_context->height;
/* Allocate actual backing data for frame */
if (av_image_alloc(frame->data, frame->linesize, frame->width,
frame->height, frame->format, 32) < 0) {
exit(1);
}
/* open the output file, if the container needs it */
if (!(container_format->flags & AVFMT_NOFILE)) {
ret = avio_open(&container_format_context->pb, out_file, AVIO_FLAG_WRITE);
if (ret < 0) {
log(ERROR, "Error occurred while opening output file: %s\n",
av_err2str(ret));
exit(1);
}
}
/* write the stream header, if needed */
ret = avformat_write_header(container_format_context, NULL);
if (ret < 0) {
log(ERROR, "Error occurred while writing output file header: %s\n",
av_err2str(ret));
}
The code to encode a frame is here:
/* Init video packet */
AVPacket packet;
av_init_packet(&packet);
/* Request that encoder allocate data for packet */
packet.data = NULL;
packet.size = 0;
/* Write frame to video */
int got_data;
if (avcodec_encode_video2(avcontext, &packet, frame, &got_data) < 0) {
log(WARNING, "Error encoding frame #%" PRId64,
video_struct->next_pts);
return -1;
}
/* Write corresponding data to file */
if (got_data) {
if (packet.pts != AV_NOPTS_VALUE) {
packet.pts = av_rescale_q(packet.pts, video_struct->output_stream->codec->time_base, video_struct->output_stream->time_base);
}
if (packet.dts != AV_NOPTS_VALUE) {
packet.dts = av_rescale_q(packet.dts, video_struct->output_stream->codec->time_base, video_struct->output_stream->time_base);
}
write_packet(video_struct, &packet, packet.size);
av_packet_unref(&packet);
}
And the code to write the packet to the video stream:
static int write_packet(video_struct* video, void* data, int size) {
int ret;
/* use AVStream is not null, otherwise write to output fd */
AVPacket *pkt = (AVPacket*) data;
pkt->stream_index = video->output_stream->index;
ret = av_interleaved_write_frame(video->container_format_context, pkt);
if (ret != 0) {
return -1;
}
/* Data was written successfully */
return ret;
}
Solved this issue. The problem was that I wasn't assigning global headers to the container if the container required it. While assigning properties like height, width, bit rate and so forth to the avcodec_context, I added
if (container_format_context->oformat->flags & AVFMT_GLOBALHEADER) {
avcodec_context->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
which seems to have fixed the issue.

Convert from NV12 to RGB/YUV420P using libswscale

I'm developing an application which needs to transform NV12 frames from h264_cuvid decoder to RGB in order to modify those frames. I checked this question but I don't not the 'Stride' value.
My code is the following:
uint8_t *inData[2] = { videoFrame->data[0], videoFrame->data[0] + videoFrame->width * videoFrame->height };
int inLinesize[2] = { videoFrame->width, videoFrame->width };
sws_scale(convert_yuv_to_rgb, inData, inLinesize, 0, videoFrame->height, aux_frame->data, aux_frame->linesize);
But it does not work. Although the problem is on colours because I can see the luminance plane correctly.
I ended up using a video filter based on this example.
char args[512];
int ret;
AVFilter *buffersrc = avfilter_get_by_name("buffer");
AVFilter *buffersink = avfilter_get_by_name("buffersink");
AVFilterInOut *outputs = avfilter_inout_alloc();
AVFilterInOut *inputs = avfilter_inout_alloc();
AVFilterGraph *filter_graph = avfilter_graph_alloc();
AVBufferSinkParams *buffersink_params;
enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE };
/* buffer video source: the decoded frames from the decoder will be inserted here. */
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
inStream.width, inStream.height, inStream.pix_fmt,
inStream.time_base.num, inStream.time_base.den,
inStream.sample_aspect_ratio.num, inStream.sample_aspect_ratio.den);
ret = avfilter_graph_create_filter(&buffersrc_ctx_to_rgb_, buffersrc, "in", args, NULL, filter_graph);
if (ret < 0) {
throw SVSException(QString("Could not create filter graph, error: %1").arg(svsAvErrorToFormattedString(ret)));
}
/* buffer video sink: to terminate the filter chain. */
buffersink_params = av_buffersink_params_alloc();
buffersink_params->pixel_fmts = pix_fmts;
ret = avfilter_graph_create_filter(&buffersink_ctx_to_rgb_, buffersink, "out", NULL, buffersink_params, filter_graph);
if (ret < 0) {
throw SVSException(QString("Cannot create buffer sink, error: %1").arg(svsAvErrorToFormattedString(ret)));
}
/* Endpoints for the filter graph. */
outputs -> name = av_strdup("in");
outputs -> filter_ctx = buffersrc_ctx_to_rgb_;
outputs -> pad_idx = 0;
outputs -> next = NULL;
/* Endpoints for the filter graph. */
inputs -> name = av_strdup("out");
inputs -> filter_ctx = buffersink_ctx_to_rgb_;
inputs -> pad_idx = 0;
inputs -> next = NULL;
QString filter_description = "format=pix_fmts=rgb32";
if ((ret = avfilter_graph_parse_ptr(filter_graph, filter_description.toStdString().c_str(), &inputs, &outputs, NULL)) < 0) {
svsCritical("", QString("Could not add the filter to graph, error: %1").arg(svsAvErrorToFormattedString(ret)))
}
if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) {
svsCritical("", QString("Could not configure the graph, error: %1").arg(svsAvErrorToFormattedString(ret)))
}
return;
I created another one to convert from RGB to YUV420P before encoding in a similar way.

OpenCL stackoverflow. How to solve it?

I'm having a problem when I try to run the reduction program from the OpenCL in Action's sources.
Im using Visual Studio 2008. This is the error:
Unhandled exception in 0x013526a7 in Reduction.exe: 0xC00000FD: Stack
overflow.
And in the asm file the cursor is to
test dword ptr [eax],eax ; probe page.
I tried to debug it, but when I put a breakpoint in the main function, the debugging starts, but the program does not keep running.
I don't know what is the really problem.
These are the source files:
reduction.cpp
#define _CRT_SECURE_NO_WARNINGS
#define PROGRAM_FILE "reduction_complete.cl"
#define ARRAY_SIZE 1048576
#define KERNEL_1 "reduction_vector"
#define KERNEL_2 "reduction_complete"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
/* Find a GPU or CPU associated with the first available platform */
cl_device_id create_device() {
cl_platform_id platform;
cl_device_id dev;
int err;
/* Identify a platform */
err = clGetPlatformIDs(1, &platform, NULL);
if(err < 0) {
perror("Couldn't identify a platform");
exit(1);
}
/* Access a device */
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &dev, NULL);
if(err == CL_DEVICE_NOT_FOUND) {
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL);
}
if(err < 0) {
perror("Couldn't access any devices");
exit(1);
}
return dev;
}
/* Create program from a file and compile it */
cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) {
cl_program program;
FILE *program_handle;
char *program_buffer, *program_log;
size_t program_size, log_size;
int err;
/* Read program file and place content into buffer */
program_handle = fopen(filename, "r");
if(program_handle == NULL) {
perror("Couldn't find the program file");
exit(1);
}
fseek(program_handle, 0, SEEK_END);
program_size = ftell(program_handle);
rewind(program_handle);
program_buffer = (char*)malloc(program_size + 1);
program_buffer[program_size] = '\0';
fread(program_buffer, sizeof(char), program_size, program_handle);
fclose(program_handle);
/* Create program from file */
program = clCreateProgramWithSource(ctx, 1,
(const char**)&program_buffer, &program_size, &err);
if(err < 0) {
perror("Couldn't create the program");
exit(1);
}
free(program_buffer);
/* Build program */
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if(err < 0) {
/* Find size of log and print to std output */
clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
0, NULL, &log_size);
program_log = (char*) malloc(log_size + 1);
program_log[log_size] = '\0';
clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
log_size + 1, program_log, NULL);
printf("%s\n", program_log);
free(program_log);
exit(1);
}
return program;
}
int main() {
/* OpenCL structures */
cl_device_id device;
cl_context context;
cl_program program;
cl_kernel vector_kernel, complete_kernel;
cl_command_queue queue;
cl_event start_event, end_event;
cl_int i, err;
size_t local_size, global_size;
/* Data and buffers */
float data[ARRAY_SIZE];
float sum, actual_sum;
cl_mem data_buffer, sum_buffer;
cl_ulong time_start, time_end, total_time;
/* Initialize data */
for(i=0; i<ARRAY_SIZE; i++) {
data[i] = 1.0f*i;
}
/* Create device and determine local size */
device = create_device();
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(local_size), &local_size, NULL);
if(err < 0) {
perror("Couldn't obtain device information");
exit(1);
}
/* Create a context */
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if(err < 0) {
perror("Couldn't create a context");
exit(1);
}
/* Build program */
program = build_program(context, device, PROGRAM_FILE);
/* Create data buffer */
data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_USE_HOST_PTR, ARRAY_SIZE * sizeof(float), data, &err);
sum_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(float), NULL, &err);
if(err < 0) {
perror("Couldn't create a buffer");
exit(1);
};
/* Create a command queue */
queue = clCreateCommandQueue(context, device,
CL_QUEUE_PROFILING_ENABLE, &err);
if(err < 0) {
perror("Couldn't create a command queue");
exit(1);
};
/* Create kernels */
vector_kernel = clCreateKernel(program, KERNEL_1, &err);
complete_kernel = clCreateKernel(program, KERNEL_2, &err);
if(err < 0) {
perror("Couldn't create a kernel");
exit(1);
};
/* Set arguments for vector kernel */
err = clSetKernelArg(vector_kernel, 0, sizeof(cl_mem), &data_buffer);
err |= clSetKernelArg(vector_kernel, 1, local_size * 4 * sizeof(float), NULL);
/* Set arguments for complete kernel */
err = clSetKernelArg(complete_kernel, 0, sizeof(cl_mem), &data_buffer);
err |= clSetKernelArg(complete_kernel, 1, local_size * 4 * sizeof(float), NULL);
err |= clSetKernelArg(complete_kernel, 2, sizeof(cl_mem), &sum_buffer);
if(err < 0) {
perror("Couldn't create a kernel argument");
exit(1);
}
/* Enqueue kernels */
global_size = ARRAY_SIZE/4;
err = clEnqueueNDRangeKernel(queue, vector_kernel, 1, NULL, &global_size,
&local_size, 0, NULL, &start_event);
if(err < 0) {
perror("Couldn't enqueue the kernel");
exit(1);
}
printf("Global size = %zu\n", global_size);
/* Perform successive stages of the reduction */
while(global_size/local_size > local_size) {
global_size = global_size/local_size;
err = clEnqueueNDRangeKernel(queue, vector_kernel, 1, NULL, &global_size,
&local_size, 0, NULL, NULL);
printf("Global size = %zu\n", global_size);
if(err < 0) {
perror("Couldn't enqueue the kernel");
exit(1);
}
}
global_size = global_size/local_size;
err = clEnqueueNDRangeKernel(queue, complete_kernel, 1, NULL, &global_size,
NULL, 0, NULL, &end_event);
printf("Global size = %zu\n", global_size);
/* Finish processing the queue and get profiling information */
clFinish(queue);
clGetEventProfilingInfo(start_event, CL_PROFILING_COMMAND_START,
sizeof(time_start), &time_start, NULL);
clGetEventProfilingInfo(end_event, CL_PROFILING_COMMAND_END,
sizeof(time_end), &time_end, NULL);
total_time = time_end - time_start;
/* Read the result */
err = clEnqueueReadBuffer(queue, sum_buffer, CL_TRUE, 0,
sizeof(float), &sum, 0, NULL, NULL);
if(err < 0) {
perror("Couldn't read the buffer");
exit(1);
}
/* Check result */
actual_sum = 1.0f * (ARRAY_SIZE/2)*(ARRAY_SIZE-1);
if(fabs(sum - actual_sum) > 0.01*fabs(sum))
printf("Check failed.\n");
else
printf("Check passed.\n");
printf("Total time = %lu\n", total_time);
/* Deallocate resources */
clReleaseEvent(start_event);
clReleaseEvent(end_event);
clReleaseMemObject(sum_buffer);
clReleaseMemObject(data_buffer);
clReleaseKernel(vector_kernel);
clReleaseKernel(complete_kernel);
clReleaseCommandQueue(queue);
clReleaseProgram(program);
clReleaseContext(context);
return 0;
}
reduction_complete.cl
__kernel void reduction_vector(__global float4* data,
__local float4* partial_sums) {
int lid = get_local_id(0);
int group_size = get_local_size(0);
partial_sums[lid] = data[get_global_id(0)];
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = group_size/2; i>0; i >>= 1) {
if(lid < i) {
partial_sums[lid] += partial_sums[lid + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if(lid == 0) {
data[get_group_id(0)] = partial_sums[0];
}
}
__kernel void reduction_complete(__global float4* data,
__local float4* partial_sums, __global float* sum) {
int lid = get_local_id(0);
int group_size = get_local_size(0);
partial_sums[lid] = data[get_local_id(0)];
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = group_size/2; i>0; i >>= 1) {
if(lid < i) {
partial_sums[lid] += partial_sums[lid + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if(lid == 0) {
*sum = partial_sums[0].s0 + partial_sums[0].s1 +
partial_sums[0].s2 + partial_sums[0].s3;
}
}
I dont know what causes the stackoverflow...
I don't see any recursion so my guess is the float data[ARRAY_SIZE]; where #define ARRAY_SIZE 1048576 is putting 4MB on the stack which is pretty large. Try changing that to a dynamic allocation.

Resources