What is the purpose of parameter "step_between_clips" in torchvision.datasets.UFC101() - torchvision

I couldn't understand the the purpose of "step_between_clips"
torchvision.datasets.UCF101(root: str, annotation_path: str, frames_per_clip: int, step_between_clips: int = 1, frame_rate: Optional[int] = None, fold: int = 1, train: bool = True, transform: Optional[Callable] = None, _precomputed_metadata: Optional[Dict[str, Any]] = None, num_workers: int = 1, _video_width: int = 0, _video_height: int = 0, _video_min_dimension: int = 0, _audio_samples: int = 0, output_format: str = 'THWC')
Can someone explain a bit?
Thanks.

Related

Efficiently synchronously queue many small OpenCL kernels

TLDR: How can I run many small kernels, one at a time, without significant overhead?
I'm working on a project that acts as a virtual green screen. It takes in an image feed, looks for pixels similar to a color key, and replaces those pixels with a replacement color. I plan to output the resulting image feed as a virtual webcam in Windows. The full source code is on Github. Currently, I'm using OpenCL bindings in Java (JOCL) to accelerate the process. The main application is written in JavaFX with Kotlin, which I'm comfortable with, but the OpenCL kernels are written in C, which I'm new to.
This is the main "API" I created for the program. I tried making the API interface relatively open so I can add direct Cuda support in the future.
class OpenClApi constructor(
platformIndex: Int = 0,
deviceIndex: Int = 0,
val localWorkSize: Long? = null
) : AbstractApi {
companion object : AbstractApi.AbstractApiConsts {
override val listName = "OpenCl"
enum class ClMemOperation(val flags: Long) {
// CL_MEM_USE_HOST_PTR instead of CL_MEM_COPY_HOST_PTR speeds up most operations for realtime video
READ(CL_MEM_READ_ONLY or CL_MEM_USE_HOST_PTR),
WRITE(CL_MEM_WRITE_ONLY)
}
private fun getPlatforms(): Array<cl_platform_id?> {
val numPlatformsArray = IntArray(1)
clGetPlatformIDs(0, null, numPlatformsArray)
val numPlatforms = numPlatformsArray[0]
val platforms = arrayOfNulls<cl_platform_id>(numPlatforms)
clGetPlatformIDs(platforms.size, platforms, null)
return platforms
}
private fun getPlatform(platformId: Int) = getPlatforms()[platformId]
?: throw ArrayIndexOutOfBoundsException("Couldn't find the specified platform")
fun getPlatformsMap(): Map<Int, String> {
val platforms = getPlatforms()
val result = mutableMapOf<Int, String>()
for (platformId in platforms.indices) {
val platformFromList = platforms[platformId]
val size = LongArray(1)
clGetPlatformInfo(platformFromList, CL_PLATFORM_NAME, 0, null, size)
val buffer = ByteArray(size[0].toInt())
clGetPlatformInfo(platformFromList, CL_PLATFORM_NAME, buffer.size.toLong(), Pointer.to(buffer), null)
result[platformId] = String(buffer, 0, buffer.size - 1)
}
return result
}
private fun getDevices(platformId: Int): Array<cl_device_id?> {
val platform = getPlatform(platformId)
val numDevicesArray = IntArray(1)
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, null, numDevicesArray)
val numDevices = numDevicesArray[0]
val devices = arrayOfNulls<cl_device_id>(numDevices)
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, numDevices, devices, null)
return devices
}
private fun getDevice(platformId: Int, deviceId: Int) = getDevices(platformId)[deviceId]
?: throw ArrayIndexOutOfBoundsException("Couldn't find the specified platform or device")
fun getDevicesMap(platformId: Int): Map<Int, String> {
val devices = getDevices(platformId)
val result = mutableMapOf<Int, String>()
for (deviceId in devices.indices) {
val deviceFromList = devices[deviceId]
val size = LongArray(1)
clGetDeviceInfo(deviceFromList, CL_DEVICE_NAME, 0, null, size)
val buffer = ByteArray(size[0].toInt())
clGetDeviceInfo(deviceFromList, CL_DEVICE_NAME, buffer.size.toLong(), Pointer.to(buffer), null)
result[deviceId] = String(buffer, 0, buffer.size - 1)
}
return result
}
}
private val platform: cl_platform_id = getPlatform(platformIndex)
private val contextProperties: cl_context_properties = cl_context_properties()
private val device: cl_device_id = getDevice(platformIndex, deviceIndex)
private val context: cl_context = clCreateContext(contextProperties, 1, arrayOf(device), null, null, null)
val commandQueue: cl_command_queue
val program: cl_program
init {
setExceptionsEnabled(true)
contextProperties.addProperty(CL_CONTEXT_PLATFORM.toLong(), platform)
val properties = cl_queue_properties()
commandQueue = clCreateCommandQueueWithProperties(context, device, properties, null)
val sources = arrayOf(
"Util",
"InitialComparison",
"NoiseReduction",
"FlowKey",
"Splash",
"SplashPrep"
).map {
this::class.java.getResource("$it.cl")!!.readText()
}.toTypedArray()
program = clCreateProgramWithSource(context, sources.size, sources, null, null)
clBuildProgram(program, 0, null, null, null, null)
}
override fun getFilters(): Map<String, AbstractFilter> = mapOf(
OpenClInitialComparisonFilter.listName to OpenClInitialComparisonFilter(api = this),
OpenClNoiseReductionFilter.listName to OpenClNoiseReductionFilter(api = this),
OpenClFlowKeyFilter.listName to OpenClFlowKeyFilter(api = this),
OpenClSplashFilter.listName to OpenClSplashFilter(api = this),
)
override fun close() {
clReleaseProgram(program)
clReleaseCommandQueue(commandQueue)
clReleaseContext(context)
}
fun allocMem(ptr: Pointer?, op: ClMemOperation, size: Int): cl_mem = clCreateBuffer(
context,
op.flags,
size.toLong(),
ptr,
null
)
}
This is an example "Filter" that processes a frame using an API instance.
class OpenClInitialComparisonFilter #Suppress("LongParameterList") constructor(
private val api: OpenClApi,
var colorKey: ByteArray = byteArrayOf(0, 255.toByte(), 0),
var replacementKey: ByteArray = byteArrayOf(0, 255.toByte(), 0),
var percentTolerance: Float = 0.025f,
var colorSpace: ColorSpace = ColorSpace.ALL,
var width: Int = DEFAULT_WIDTH_PIXELS,
var height: Int = DEFAULT_HEIGHT_PIXELS
) : AbstractFilter{
companion object : AbstractFilterConsts {
override val listName = "Initial Comparison"
private const val KERNEL_NAME = "initialComparisonKernel"
}
override fun getProperties(): Map<AbstractFilterProperty, Any> = mapOf(
AbstractFilterProperty.TOLERANCE to percentTolerance,
AbstractFilterProperty.COLOR_KEY to colorKey,
AbstractFilterProperty.REPLACEMENT_KEY to replacementKey,
AbstractFilterProperty.COLOR_SPACE to colorSpace
)
override fun setProperty(listName: String, newValue: Any) = when (listName) {
AbstractFilterProperty.TOLERANCE.listName -> percentTolerance = newValue as Float
AbstractFilterProperty.COLOR_KEY.listName -> colorKey = newValue as ByteArray
AbstractFilterProperty.REPLACEMENT_KEY.listName -> replacementKey = newValue as ByteArray
AbstractFilterProperty.COLOR_SPACE.listName -> colorSpace = newValue as ColorSpace
else -> throw ArrayIndexOutOfBoundsException("Couldn't find property $listName")
}
#Suppress("LongMethod")
override fun apply(inputBuffer: ByteArray): ByteArray {
val outputBuffer = ByteArray(size = inputBuffer.size)
val floatOptionsBuffer = floatArrayOf(percentTolerance)
val intOptionsBuffer = intArrayOf(colorSpace.i, width, height)
val inputPtr = Pointer.to(inputBuffer)
val outputPtr = Pointer.to(outputBuffer)
val colorKeyPtr = Pointer.to(colorKey)
val replacementKeyPtr = Pointer.to(replacementKey)
val floatOptionsPtr = Pointer.to(floatOptionsBuffer)
val intOptionsPtr = Pointer.to(intOptionsBuffer)
val inputMem = api.allocMem(inputPtr, ClMemOperation.READ, Sizeof.cl_char * inputBuffer.size)
val outputMem = api.allocMem(null, ClMemOperation.WRITE, Sizeof.cl_char * outputBuffer.size)
val colorKeyMem = api.allocMem(colorKeyPtr, ClMemOperation.READ, Sizeof.cl_char * colorKey.size)
val replacementKeyMem = api.allocMem(
replacementKeyPtr,
ClMemOperation.READ,
Sizeof.cl_char * replacementKey.size
)
val floatOptionsMem = api.allocMem(
floatOptionsPtr,
ClMemOperation.READ,
Sizeof.cl_float * floatOptionsBuffer.size
)
val intOptionsMem = api.allocMem(intOptionsPtr, ClMemOperation.READ, Sizeof.cl_int * intOptionsBuffer.size)
val kernel = clCreateKernel(api.program, KERNEL_NAME, null)
var a = 0
clSetKernelArg(kernel, a++, Sizeof.cl_mem.toLong(), Pointer.to(inputMem))
clSetKernelArg(kernel, a++, Sizeof.cl_mem.toLong(), Pointer.to(outputMem))
clSetKernelArg(kernel, a++, Sizeof.cl_mem.toLong(), Pointer.to(colorKeyMem))
clSetKernelArg(kernel, a++, Sizeof.cl_mem.toLong(), Pointer.to(replacementKeyMem))
clSetKernelArg(kernel, a++, Sizeof.cl_mem.toLong(), Pointer.to(floatOptionsMem))
clSetKernelArg(kernel, a, Sizeof.cl_mem.toLong(), Pointer.to(intOptionsMem))
val globalWorkSizeBuffer = api.localWorkSize?.let {
longArrayOf(ceil(inputBuffer.size / it.toFloat()).toLong() * it)
} ?: longArrayOf(inputBuffer.size.toLong())
val localWorkSizeBuffer = api.localWorkSize?.let { longArrayOf(api.localWorkSize) }
clEnqueueNDRangeKernel(
api.commandQueue,
kernel,
1,
null,
globalWorkSizeBuffer,
localWorkSizeBuffer,
0,
null,
null
)
clEnqueueReadBuffer(
api.commandQueue,
outputMem,
CL_TRUE,
0,
(inputBuffer.size * Sizeof.cl_char).toLong(),
outputPtr,
0,
null,
null
)
clReleaseMemObject(inputMem)
clReleaseMemObject(outputMem)
clReleaseMemObject(colorKeyMem)
clReleaseMemObject(replacementKeyMem)
clReleaseMemObject(floatOptionsMem)
clReleaseMemObject(intOptionsMem)
clReleaseKernel(kernel)
return outputBuffer
}
}
Here is an example of the InitialComparison kernel, which looks for and replaces similar pixels.
enum ColorSpace {
BLUE = 0,
GREEN = 1,
RED = 2,
ALL = 3
};
enum FloatOptions {
PERCENT_TOLERANCE = 0,
GRADIENT_TOLERANCE = 1
};
enum IntOptions {
COLOR_SPACE = 0,
WIDTH = 1,
HEIGHT = 2,
BLOCK_SIZE = 3
};
float calcColorDiff(
const char *a,
const int i,
const char *b,
const int j,
const int colorSpace
) {
float colorDiff[3];
for (int k = 0; k < 3; k++) {
colorDiff[k] = abs(a[i + k] - b[j + k]);
}
if (colorSpace < 3) {
return colorDiff[colorSpace] / 255.0;
} else {
float percentDiff = 0.0;
for (int i = 0; i < 3; i++) {
percentDiff += colorDiff[i] / 765.0;
}
return percentDiff;
}
}
void writePixel(
char *canvas,
const int i,
const char *ink,
const int j
) {
for (int k = 0; k < 3; k++) {
canvas[i + k] = ink[j + k];
}
}
__kernel void initialComparisonKernel(
__global const char *input,
__global char *output,
__global const char *colorKey,
__global const char *replacementKey,
__global const float *floatOptions,
__global const int *intOptions
) {
float percentTolerance = floatOptions[PERCENT_TOLERANCE];
int colorSpace = intOptions[COLOR_SPACE];
int gid = get_global_id(0);
if (gid % 3 == 0) {
float percentDiff = calcColorDiff(input, gid, colorKey, 0, colorSpace);
if (percentDiff < percentTolerance) {
writePixel(output, gid, replacementKey, 0);
} else {
writePixel(output, gid, input, gid);
}
}
}
It works pretty well! Much faster than running it on a CPU, even using Java's multithreading ExecutorService. On top of the comparison, I also run two additional filters: NoiseReduction, which removes green screen pixels that aren't mostly surrounded by other green screen pixels, and FlowKey, which fills in the gaps between green screen pixels.
int checkPixelEquality(
const char *input,
const int i,
const char *colorKey
) {
int diffSum = 0;
for (int j = 0; j < 3; j++) {
diffSum += abs(input[i + j] - colorKey[j]);
}
if (diffSum == 0) {
return 1;
} else {
return 0;
}
}
__kernel void noiseReductionKernel(
__global const char *input,
__global char *output,
__global const char *template,
__global const char *colorKey,
__global const int *intOptions
) {
int width = intOptions[WIDTH];
int height = intOptions[HEIGHT];
int gid = get_global_id(0);
if (gid % 3 == 0) {
int anchorEquality = checkPixelEquality(input, gid, colorKey);
if (anchorEquality == 1) {
int surroundingPixels = 0;
if ((gid / 3) % width == 0) {
surroundingPixels += 1;
} else {
surroundingPixels += checkPixelEquality(input, gid - 3, colorKey);
}
if ((gid / 3) % width == width - 1) {
surroundingPixels += 1;
} else {
surroundingPixels += checkPixelEquality(input, gid + 3, colorKey);
}
if ((gid / 3) / width == 0) {
surroundingPixels += 1;
} else {
surroundingPixels += checkPixelEquality(input, gid - (width * 3), colorKey);
}
if ((gid / 3) / width == height - 1) {
surroundingPixels += 1;
} else {
surroundingPixels += checkPixelEquality(input, gid + (width * 3), colorKey);
}
if (surroundingPixels < 3) {
writePixel(output, gid, template, gid);
} else {
writePixel(output, gid, colorKey, 0);
}
} else {
writePixel(output, gid, template, gid);
}
}
}
__kernel void flowKeyKernel(
__global const char *input,
__global char *output,
__global const char *template,
__global const char *colorKey,
__global const float *floatOptions,
__global const int *intOptions
) {
float gradientTolerance = floatOptions[GRADIENT_TOLERANCE];
int colorSpace = intOptions[COLOR_SPACE];
int width = intOptions[WIDTH];
int height = intOptions[HEIGHT];
int gid = get_global_id(0);
if (gid % 3 == 0) {
if (checkPixelEquality(input, gid, colorKey) == 0) {
if (
(gid / 3) % width != 0 &&
checkPixelEquality(input, gid - 3, colorKey) == 1 &&
calcColorDiff(input, gid, template, gid - 3, colorSpace) > gradientTolerance
) {
writePixel(output, gid, colorKey, 0);
return;
}
if (
(gid / 3) % width != width - 1 &&
checkPixelEquality(input, gid + 3, colorKey) == 1 &&
calcColorDiff(input, gid, template, gid + 3, colorSpace) > gradientTolerance
) {
writePixel(output, gid, colorKey, 0);
return;
}
if (
(gid / 3) / width != 0 &&
checkPixelEquality(input, gid - (width * 3), colorKey) == 1 &&
calcColorDiff(input, gid, template, gid - (width * 3), colorSpace) > gradientTolerance
) {
writePixel(output, gid, colorKey, 0);
return;
}
if (
(gid / 3) / width != height - 1 &&
checkPixelEquality(input, gid + (width * 3), colorKey) == 1 &&
calcColorDiff(input, gid, template, gid + (width * 3), colorSpace) > gradientTolerance
) {
writePixel(output, gid, colorKey, 0);
return;
}
writePixel(output, gid, template, gid);
} else {
writePixel(output, gid, colorKey, 0);
}
} else {
writePixel(output, gid, colorKey, 0);
}
}
The issue is that these kernels have insignificant runtime as compared to queueing the kernels through clEnqueueNDRangeKernel. This means that the overhead for running all of the kernels is too large, resulting in frame latency. Each of the filters must be run one at a time until the image has been fully processed.
My current understanding of OpenCL is that within a queued kernel, each workgroup will get queued without any specific order and without any guarantee of total concurrency. Because these filters must be applied one at a time across the entire image, the only option I can think of is to queue many small kernels.
I've tried aggregating all the kernels into one large kernel (code below). There are two issues:
The workgroups run without regard for total concurrency, meaning that one line of pixels could finish all the filters, while another line of pixels hasn't run at all.
When I implemented a lock for the aggregate kernel, it would freeze because not all workgroups were running at the same time.
__kernel void openClKernel(
__global const char *input,
__global char *output,
__global const char *colorKey,
__global const char *replacementKey,
__global const float *floatOptions,
__global const int *intOptions,
__global char *tmpActive,
__global char *tmpStale
) {
float tolerance = floatOptions[TOLERANCE];
float flowKeyTolerance = floatOptions[FLOW_KEY_TOLERANCE];
int colorSpace = intOptions[COLOR_SPACE];
int width = intOptions[WIDTH];
int height = intOptions[HEIGHT];
int initialNoiseReductionIterations = intOptions[INITIAL_NOISE_REDUCTION_ITERATIONS];
int flowKeyIterations = intOptions[FLOW_KEY_ITERATIONS];
int finalNoiseReductionIterations = intOptions[FINAL_NOISE_REDUCTION_ITERATIONS];
int gid = get_global_id(0);
if (gid % 3 == 0) {
applyInitialComparison(input, tmpActive, colorKey, replacementKey, tolerance, colorSpace, gid);
writePixel(tmpStale, gid, tmpActive, gid);
for (int i = 0; i < initialNoiseReductionIterations; i++) {
applyNoiseReduction(tmpStale, tmpActive, input, replacementKey, width, height, gid);
writePixel(tmpStale, gid, tmpActive, gid);
}
for (int i = 0; i < flowKeyIterations; i++) {
applyFlowKey(tmpStale, tmpActive, input, replacementKey, flowKeyTolerance, colorSpace, width, height, gid);
writePixel(tmpStale, gid, tmpActive, gid);
}
for (int i = 0; i < finalNoiseReductionIterations; i++) {
applyNoiseReduction(tmpStale, tmpActive, input, replacementKey, width, height, gid);
writePixel(tmpStale, gid, tmpActive, gid);
}
writePixel(output, gid, tmpStale, gid);
}
}
That being said, the aggregate kernel ran easily 1,000 times faster than the split kernels (I implemented a little frame-latency counter). This signals to me that the overhead of queueing a kernel is way too large as compared to the task at hand.
What can I do to optimize this program? Is there a way to efficiently queue many small kernels? Is there a way to restructure the kernels to run concurrently? Please also let me know how I can improve the quality of my question if needed.
Thanks!
For every kernel launch there is a fixed amount of overhead, let's say 1 Millisecond. The overhead originates partly in the loading of the instructions, but mainly in the synchronization of all threads at the end of each kernel. So if you launch lots of small kernels that take 1ms execution time each, half of the total time will be lost as overhead. If you aggregate many small kernels into one that runs 9ms, then overhead is only 10%.
So aggregate as many small kernels into one as data movement allows without running into race conditions. Also make sure the range of each kernel is as large as possible.
Alternatively, you could use multiple queues in parallel. A kernel with small range does not saturate the GPU, so part of the GPU is idle at any time. If you have multiple concurrent queues, the kernels in these queues can run concurrently and together saturate the hardware. However with this you still have the overhead losses.

How to generate random integer that are random "enough"?

I'm trying to solve the 280th problem in Project Euler, and for this I have written the following simulation;
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
/* Directions
1
2 3
4
*/
int grid[5][5] = {
{0, 0, 0, 0, 2},
{0, 0, 0, 0, 2},
{0, 0, 0, 0, 2},
{0, 0, 0, 0, 2},
{0, 0, 0, 0, 2}
};
int InitPos[2] = {2, 2};
int MaxExp = 5000000;
bool Success = false;
int StepCount = 0;
int ExpNumber = 1;
int AntsBag = 0;
void Init();
void CarryFood(int * pos);
void LeftFood(int * pos);
bool checkMovability(int * pos, int direction);
bool moveToDirection(int pos[2], int direction);
bool checkSuccess();
void ShowResult();
int main(int argc, char const *argv[])
{
timeval curTime;
gettimeofday(&curTime, NULL);
int milli = curTime.tv_usec / 1000;
time_t t;
srand((unsigned)time(&t));
//timeTData*.txt corresponds to using "time(&t)" above
//milliData.txt corresponds to using "milli" variable above
//timeTUnsigData*.txt corresponds to using "(unsigned)time(&t)" above
printf("%% Experiment Number : %d \n", MaxExp);
while(ExpNumber <= MaxExp)
{
Init();
int pos[2];
pos[0] = InitPos[0];
pos[1] = InitPos[1];
do{
int direction = (rand() % 4) + 1;
if (moveToDirection(pos, direction))
{
StepCount++;
}
if (pos[1] == 4&&grid[pos[0]][4]==2&&AntsBag==0)
{
CarryFood(pos);
}
if (pos[1] == 0&&grid[pos[0]][0]==0&&AntsBag==2)
{
LeftFood(pos);
}
checkSuccess();
}
while(!Success);
ShowResult();
ExpNumber++;
}
return 0;
}
void Init()
{
Success = false;
StepCount = 0;
AntsBag = 0;
int gridInit[5][5] = {
{0, 0, 0, 0, 2},
{0, 0, 0, 0, 2},
{0, 0, 0, 0, 2},
{0, 0, 0, 0, 2},
{0, 0, 0, 0, 2}
};
for (int i = 0; i < 5; ++i)
{
for (int j = 0; j < 5; ++j)
{
grid[i][j] = gridInit[i][j];
}
}
}
void ShowResult()
{
/*
for (int i = 0; i < 5; ++i)
{
printf("\n");
for (int j = 0; j < 5; ++j)
{
printf("%d ", grid[i][j]);
}
}
*/
printf("%d %d\n", StepCount, ExpNumber);
}
void CarryFood(int * pos)
{
AntsBag = 2;
grid[pos[0]][4] = 0;
}
void LeftFood(int * pos)
{
AntsBag = 0;
grid[pos[0]][0] = 2;
}
bool checkMovability(int * pos, int direction)
{
switch(direction)
{
case 1:
{
if(pos[1]==0){
return false;
}
break;
}
case 2:
{
if (pos[0]==0)
{
return false;
}
break;
}
case 3:
{
if (pos[0]==4)
{
return false;
}
break;
}
case 4:
{
if (pos[1]==4)
{
return false;
}
break;
}
default:
{
printf("Wrong direction input is given!!\n");
return false;
break;
}
}
return true;
}
bool moveToDirection(int * pos, int direction)
{
if ( !checkMovability(pos, direction) )
{
return false;
}
switch(direction){
case 1:
{
pos[1] -= 1;
break;
}
case 2:
{
pos[0] -= 1;
break;
}
case 3:
{
pos[0] += 1;
break;
}
case 4:
{
pos[1] += 1;
break;
}
default:
{
printf("I'm stunned!\n");
return false;
break;
}
}
return true;
}
bool checkSuccess()
{
for (int i = 0; i < 5; ++i)
{
if (grid[i][0] != 2)
{
return false;
}
}
//printf("Success!\n");
Success = true;
return true;
}
And the redirected the output to a *.txt file and find the expected value of the number of steps with the following octave code;
clear
load data.txt
n = data(:,1);
output_precision(15);
mean(n)
%% The actual data
%% milliData1 -> 430.038224000000
%% milliData2 -> 430.031745000000
%% timeTData1 -> 430.029882400000
%% timeTData2 -> 430.019626400000
%% timeUnsigData1 -> 430.028159000000
%% timeUnsigData2 -> 430.009509000000
However, even I run the exact same code twice I get different results, as you can see from the above results.(Note that, I have tried this with different srand(..) inputs for the reason I'm going to explain).
I thought that the reason for this is because how I generate a random integer between 1-4 for the random directions of the ant, because as far as I have been though, the probability distribution of this experiment should be the same as long as I repeat the experiment large number of time (in this particular case 5000000 times).
So my first question is that is it really the problem with the method of how I generate random integers ? If so, how can we overcome this problem, I mean how can we generate integer random enough so that when we repeat the same experiment large number of times, the expected value between those is smaller than these result that I have got ?
You can use something like
std::mt19937 gen{std::random_device{}()};
std::uniform_int_distribution<int> dist{1, 4};
int randNumber = dist(gen);
This generates a more uniform distribution of values.

Navigating through a Maze using path-planning (Dijkstra)

I'm working on an robot that would be able to navigate through a maze, avoid obstacles and identify some of the objects in it. I have a monochromatic bitmap of the maze, that is supposed to be used in the robot navigation.
Up till now I have processed the bitmap image, and converted it into an adjacency list. I will now use the dijkstra's algorithm to plan the path.
However the problem is that I have to extract the entrance point/node and exit node from the bmp image itself for dijkstra's algorithm to plan the path.
The robots starting position will be slightly different (inch or two before the entrance point) from the entrance point of maze, and I am supposed to move to the entrance point using any "arbitrary method" and then apply dijkstra algorithm to plan path from maze's entrance to exit.
On the way I have to also stop at the "X's" marked in the bmp file I have attached below. These X's are basically boxes in which I have to pot balls. I will plan the path from entrance point to exit point , and not from the entrance to 1st box, then to second, and then to the exit point; because I think the boxes will always be placed at the shortest path.
Since the starting position is different from the entrance point, how will I match my robot's physical location with the coordinates in the program and move it accordingly. Even if the entrance position would have been same as starting position there may have been an error. How should I deal with it? Should I navigate only on the bases of the coordinates provided by dijkstra or use ultrasonics as well to prevent collisions? And if we yes, can you give me an idea of how should I use the both (ultrasonics, and coordinates)?
Here's the sample Bitmap image of the maze.
I know you need this for robotics but here is an example how to translate pixels to array in java to give some ideas?
import java.awt.BorderLayout;
import java.awt.Color;
import java.awt.Dimension;
import java.awt.Graphics;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import javax.swing.JFrame;
import javax.swing.JPanel;
import javax.swing.SwingUtilities;
import javax.swing.Timer;
import javax.swing.WindowConstants;
public class RobotDemo extends JFrame {
private static final long serialVersionUID = 1L;
public RobotDemo() {
super("Robot Demo");
setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
getContentPane().add(new RobotPanel(), BorderLayout.CENTER);
pack();
setResizable(false);
setLocationRelativeTo(null);
}
public static void main(String[] args) {
SwingUtilities.invokeLater(new Runnable() {
public void run() {
JFrame frame = new RobotDemo();
frame.setVisible(true);
}
});
}
}
interface Constants {
public static final int TILE_WIDTH = 32;
public static final int TILE_HEIGHT = 32;
public static final int NUM_TILE_COLS = 20;
public static final int NUM_TILE_ROWS = 10;
public static final int PIXEL_STEPS = 3;
public static final int REFRESH_RATE = 200;
public static final Dimension PANEL_SIZE = new Dimension(TILE_WIDTH * NUM_TILE_COLS, TILE_HEIGHT * NUM_TILE_ROWS);
public static enum RobotState {
awaiting_instruction,
moving_north,
moving_south,
moving_east,
moving_west
};
public static enum RobotInstruction {
NORTH,
SOUTH,
EAST,
WEST
}
public void draw(Graphics g);
}
class RobotPanel extends JPanel implements Constants, ActionListener {
private static final long serialVersionUID = 1L;
private Timer timer = new Timer(REFRESH_RATE, this);
private Map map = new Map();
private Robot robot = new Robot(map);
public RobotPanel() {
timer.start();
}
public Dimension getPreferredSize() { return PANEL_SIZE; }
public Dimension getMinimumSize() { return PANEL_SIZE; }
public Dimension getMaximumSize() { return PANEL_SIZE; }
protected void paintComponent(Graphics g) {
super.paintComponent(g);
map.draw(g);
robot.draw(g);
draw(g);
}
public void actionPerformed(ActionEvent e) {
robot.update();
repaint();
}
public void draw(Graphics g) {
for(int r = 0; r < NUM_TILE_ROWS; r++) {
for(int c = 0; c < NUM_TILE_COLS; c++) {
g.drawRect(c * TILE_WIDTH, r * TILE_HEIGHT, TILE_WIDTH, TILE_HEIGHT);
}
}
}
}
class Robot implements Constants {
private RobotState state = RobotState.moving_east;
private int row = TILE_HEIGHT;
private int col = TILE_WIDTH;
private int mapX = 1;
private int mapY = 1;
private Map map;
int nextRowCheck = 1;
int nextColCheck = 2;
public Robot(Map m) {
map = m;
}
public int getRow() {
return mapY;
}
public int getCol() {
return mapX;
}
private boolean needsNewInstruction(){
int newRow = row;
int newCol = col;
if(state == RobotState.moving_north) newRow -= PIXEL_STEPS;
if(state == RobotState.moving_south) newRow += PIXEL_STEPS;
if(state == RobotState.moving_east) newCol += PIXEL_STEPS;
if(state == RobotState.moving_west) newCol -= PIXEL_STEPS;
if((newRow / TILE_HEIGHT) != mapY) return true;
if((newCol / TILE_WIDTH) != mapX) return true;
return false;
}
public void draw(Graphics g) {
Color c = g.getColor();
g.setColor(Color.GREEN);
g.fillRect(col, row, TILE_WIDTH, TILE_HEIGHT);
g.setColor(c);
}
public void update() {
System.out.println("UPDATE [" + row + "][" + col + "] = [" + (row / TILE_HEIGHT) + "][" + (col / TILE_WIDTH) + "]");
if(needsNewInstruction()) {
System.out.println("NEEDS NEW INSTRUCTION [" + row + "][" + col + "] = [" + (row / TILE_HEIGHT) + "][" + (col / TILE_WIDTH) + "]");
mapX = nextColCheck;
mapY = nextRowCheck;
System.out.println("UPDATED MAP REFERENCE [" + mapY + "][" + mapX + "]");
row = mapY * TILE_HEIGHT;
col = mapX * TILE_WIDTH;
System.out.println("UPDATED PIXEL REFERENCE [" + row + "][" + col + "]");
RobotInstruction instruction = map.getNextInstruction(this);
if(instruction == RobotInstruction.NORTH) {
state = RobotState.moving_north;
nextRowCheck = mapY - 1;
}
if(instruction == RobotInstruction.SOUTH) {
state = RobotState.moving_south;
nextRowCheck = mapY + 1;
}
if(instruction == RobotInstruction.EAST) {
state = RobotState.moving_east;
nextColCheck = mapX + 1;
}
if(instruction == RobotInstruction.WEST) {
state = RobotState.moving_west;
nextColCheck = mapX - 1;
}
}
move();
}
public void move() {
if(state == RobotState.moving_north) row -= PIXEL_STEPS;
if(state == RobotState.moving_south) row += PIXEL_STEPS;
if(state == RobotState.moving_east) col += PIXEL_STEPS;
if(state == RobotState.moving_west) col -= PIXEL_STEPS;
}
}
class Map implements Constants {
int[][] map = new int[][] {
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
};
public Map() {
}
public RobotInstruction getNextInstruction(Robot robot) {
int row = robot.getRow();
int col = robot.getCol();
System.out.println("GET NEXT INSTRUCTION FOR [" + row + "][" + col + "]");
if(map[row][col + 1] == 0) return RobotInstruction.EAST;
if(map[row + 1][col] == 0) return RobotInstruction.SOUTH;
if(map[row - 1][col] == 0) return RobotInstruction.NORTH;
if(map[row][col - 1] == 0) return RobotInstruction.WEST;
return null;
}
public void draw(Graphics g) {
Color color = g.getColor();
for(int r = 0; r < NUM_TILE_ROWS; r++) {
for(int c = 0; c < NUM_TILE_COLS; c++) {
g.setColor(map[r][c] == 0 ? Color.CYAN : Color.RED);
g.fillRect(c * TILE_WIDTH, r * TILE_HEIGHT, TILE_WIDTH, TILE_HEIGHT);
}
}
g.setColor(color);
}
}
Here is an example how to populate your navigational array with directions. the code above doesn't use the code below so you would have to do that yourself ...
public class Maze {
private static final char E = 'E'; // Ending position
private static final char X = 'X'; // Wall
private static final char O = ' '; // Space
private static final char L = 'L'; // Left
private static final char R = 'R'; // Right
private static final char U = 'U'; // Up
private static final char D = 'D'; // Down
private static final char FALSE = '0'; // Not accessible
private static final char TRUE = '1'; // Is accessible
private static final Node END_NODE = new Node(4, 4);
private static final int[] ROW_DIRECTIONS = {-1, 1, 0, 0};
private static final int[] COL_DIRECTIONS = { 0, 0, -1, 1};
private static final char[][] OPPOSITES = new char[][] {{O, D, O},{R, O, L},{O, U, O}};
public static void main(String[] args) {
char[][] maze = new char[][] {
{X, X, X, X, X, X},
{X, O, O, X, O, X},
{X, O, X, X, O, X},
{X, O, O, O, X, X},
{X, X, O, X, O, X},
{X, O, O, O, O, X},
{X, O, X, X, O, X},
{X, X, X, X, X, X}};
// PLOT THE DESTINATION CELL AND ADD IT TO LIST
List<Node> nodes = new ArrayList<Node>();
nodes.add(END_NODE);
maze[END_NODE.row][END_NODE.col] = E;
// PRINT THE MAZE BEFORE ANY CALCULATIONS
printMaze(maze);
// SOLVE THE MAZE
fillMaze(maze, nodes);
printMaze(maze);
// CONVERT MAZE TO AN ADJACENCY MATRIX
compileMaze(maze);
printMaze(maze);
}
/**
* The parallel arrays define all four directions radiating from
* the dequeued node's location.
*
* Each node will have up to four neighboring cells; some of these
* cells are accessible, some are not.
*
* If a neighboring cell is accessible, we encode it with a directional
* code that calculates the direction we must take should we want to
* navigate to the dequeued node's location from this neighboring cell.
*
* Once encoded into our maze, this neighboring cell is itself queued
* up as a node so that recursively, we can encode the entire maze.
*/
public static final void fillMaze(char[][] maze, List<Node> nodes) {
// dequeue our first node
Node destination = nodes.get(0);
nodes.remove(destination);
// examine all four neighboring cells for this dequeued node
for(int index = 0; index < ROW_DIRECTIONS.length; index++) {
int rowIndex = destination.row + ROW_DIRECTIONS[index];
int colIndex = destination.col + COL_DIRECTIONS[index];
// if this neighboring cell is accessible, encode it and add it
// to the queue
if(maze[rowIndex][colIndex] == O) {
maze[rowIndex][colIndex] = getOppositeDirection(ROW_DIRECTIONS[index], COL_DIRECTIONS[index]);
nodes.add(new Node(rowIndex, colIndex));
}
}
// if our queue is not empty, call this method again recursively
// so we can fill entire maze with directional codes
if(nodes.size() > 0) {
fillMaze(maze, nodes);
}
}
/**
* Converts the maze to an adjacency matrix.
*/
private static void compileMaze(char[][] maze) {
for(int r = 0; r < maze.length; r++) {
for(int c = 0; c < maze[0].length; c++) {
if(maze[r][c] == X || maze[r][c] == O) {
maze[r][c] = FALSE;
}
else {
maze[r][c] = TRUE;
}
}
}
}
/**
* prints the specified two dimensional array
*/
private static final void printMaze(char[][] maze) {
System.out.println("====================================");
for(int r = 0; r < maze.length; r++) {
for(int c = 0; c < maze[0].length; c++) {
System.out.print(maze[r][c] + " ");
}
System.out.print("\n");
}
System.out.println("====================================");
}
/**
* Simply returns the opposite direction from those specified
* by our parallel direction arrays in method fillMaze.
*
* coordinate 1, 1 is the center of the char[][] array and
* applying the specified row and col offsets, we return the
* correct code (opposite direction)
*/
private static final char getOppositeDirection(int row, int col) {
return OPPOSITES[1 + row][1 + col];
}
}
class Node {
int row;
int col;
public Node(int rowIndex, int colIndex) {
row = rowIndex;
col = colIndex;
}
}

Converting Relation to Matrix

I have 2 sets and 1 relation. I want to show them in a matrix.
char a[] = "12345";
char b[] = "ABCDE";
char r[] = "1C2B3E4D5A";
int rel[LA][LA] = {
/* A B C D E*/
/* 1*/{0, 0, 0, 0, 0} ,
/* 2*/{0, 0, 0, 0, 0} ,
/* 3*/{0, 0, 0, 0, 0} ,
/* 4*/{0, 0, 0, 0, 0} ,
/* 5*/{0, 0, 0, 0, 0} };
in the char array each char is an element
char a[] = "12345"; is A{1,2,3,4,5}
Relation char r[] = "1C2B3E4D5A"; is R= {(1,C),(2.B).... }
My question is how can I Show them on Matrix that If there is a relation on A and B in R in the matrix this point get 1 .
Output must like that :
int rel[LA][LA] = {
/* A B C D E*/
/* 1*/{0, 0, 1, 0, 0} ,
/* 2*/{0, 1, 0, 0, 0} ,
/* 3*/{0, 0, 0, 0, 1} ,
/* 4*/{0, 0, 0, 1, 0} ,
/* 5*/{1, 0, 0, 0, 0} };
Firstly I tried :
for(i=0;i<LR-1;i=i+2){ // Look at element from A
for(j=0;j<LA;j++){ // Look at A
if(r[i]==a[j]){
for(k=1;k<LR;k=k+2){ // Look at element from B
for(m=0;m<LA;m++){ // Look at B
if(r[k]==b[m]){
rel[j][m]=1; // if both exist that point gets 1
}
}
}
}
}
}
It does not work.
Here is a solution to your problem:
public static void main(String[] args) {
char[] a = "12345".toCharArray();
char[] b = "ABCDE".toCharArray();
char[] r = "1C2B3E4D5A".toCharArray();
int LA=a.length;
int LB=b.length;
int LR=r.length;
int[][] rel=new int[LA][LB];
for(int i=0;i<LR;i+=2){
int indexa=index(a,r[i]);
int indexb=index(b,r[i+1]);
rel[indexa][indexb]=1;
}
// Print out the matrix
for(int i=0;i<LA;i++){
for(int j=0;j<LB;j++){
System.out.print(rel[i][j]);
}
System.out.println("");
}
}
/**
* Return the position of a value v array arr
*/
public static int index(char[] arr,char v){
for(int i=0;i<arr.length;i++){
if(arr[i]==v){
return i;
}
}
return -1;
}

Tampering detection by using sobel edge in processing

I have to use Sobel edge detection to detect how an image has been tampered with. I have been able to implement the edge filter, but have not been able to figure out how to use it to detect tampering. I want to show the tampering by highlighting the region that has been tampered with in another color.
Can someone help please?
PImage img, edgeImg;
int[][] sobelx = { { -1, -2, -1 }, { 0, 0, 0 }, { 1, 2, 1 } };
int[][] sobely = { { -1, 0, 1 }, { -2, 0, 2 }, { -1, 0, 1 } };
void setup() {
img = loadImage("face1.jpg");
size(img.width, img.height);
edgeImg = createImage(img.width, img.height, RGB);
}
void draw() {
image(img, 0, 0);
int matrixsize = 3;
loadPixels();
img.loadPixels();
int loc = 0;
for (int x = 1; x < img.width - 1; x++) {
for (int y = 1; y < img.height - 1; y++) {
loc = x + y * img.width;
int sx = convolution(x, y, sobelx, matrixsize, img);
int sy = convolution(x, y, sobely, matrixsize, img);
int sum = abs(sy) + abs(sx);
sum = constrain(sum, 0, 255);
edgeImg.pixels[loc] = sum;
}
}
edgeImg.updatePixels();
image(edgeImg, 0, 0);
filter(THRESHOLD, 0.8);
}
int convolution(int x, int y, int [][] mat, int matrixsize, PImage img) {
float rtotal = 0.0;
float gtotal = 0.0;
float btotal = 0.0;
int total = 0;
int offset = matrixsize/2;
for(int i=0; i<matrixsize; i++) {
for(int j=0; j<matrixsize; j++) {
int xloc = x + i - offset;
int yloc = y + j - offset;
int loc = xloc + img.width*yloc;
loc = constrain(loc,0,img.pixels.length - 1);
rtotal = rtotal + red(img.pixels[loc])*mat[i][j];
gtotal = gtotal + green(img.pixels[loc])*mat[i][j];
btotal = btotal + blue(img.pixels[loc])*mat[i][j];
total = total + int(brightness(img.pixels[loc])*mat[i][j]);
}
}
rtotal = constrain(rtotal, 0, 255);
gtotal = constrain(gtotal, 0, 255);
btotal = constrain(btotal, 0, 255);
return total;
}
I don't know how the algorithm can be used for your particular purpose, but I would guess you would need to run the same filter to the original image and compare the results.
PImage original = loadImage("face1.jpg");
PImage edgeImg; // previously created
original.loadPixels();
edgeImg.loadPixels();
for (int i=0; i<original.pixels.length; i++) {
color origPx = original.pixels[i];
color edgePx = edgeImg.pixels[i];
// compare red values, since the edgeImg is B&W
if ( (origPx >> 16 & 0xFF) != (edgePx >> 16 & 0xFF) ) {
// don't match? do something!
}
}

Resources