vulkan pipeline layout compatibility - macos

Two pipeline layouts are defined to be “compatible for push constants” if they were created with identical push constant ranges. Two pipeline layouts are defined to be “compatible for set N” if they were created with identically defined descriptor set layouts for sets zero through N, and if they were created with identical push constant ranges.
vkCmdBindDescriptorSets causes the sets numbered [firstSet.. firstSet+descriptorSetCount-1] to use the bindings stored in pDescriptorSets[0..descriptorSetCount-1] for subsequent rendering commands (either compute or graphics, according to the pipelineBindPoint). Any bindings that were previously applied via these sets are no longer valid.
suppose there are two pipeline layout, first pipeline layout contain one set #0, second pipeline layout contain two set #0 #1。First set 0 and second set 0 are identical descriptor set layout。also they have same push constant ranges,Therefore as distribution above, both pipeline layout’s set #0 are compatible?
// create set 0
...
VkDescriptorSetLayout setLayout0;
...
vkCreateDescriptorSetLayout(logicalDevice,
&layoutInfo,
nullptr,
&setLayout0);
...
VkDescriptorSet set0;
VkDescriptorSetAllocateInfo allocInfo0 = {};
allocInfo0.descriptorSetCount = 1;
allocInfo0.pSetLayouts = &setLayout0;
...
vkAllocateDescriptorSets(logicalDevice,
&allocInfo0,
&set0);
...
// create first pipeline layout
VkPipelineLayoutCreateInfo firstPipelineLayoutInfo = {};
firstPipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
firstPipelineLayoutInfo.setLayoutCount = 1;
firstPipelineLayoutInfo.pSetLayouts = &setLayout0; // <--- use set0
VkPipelineLayout firstPiplineLayout;
vkCreatePipelineLayout(logicalDevice,
&firstPipelineLayoutInfo,
nullptr,
&firstPiplineLayout);
// create set 1
...
VkDescriptorSetLayout setLayout1;
...
vkCreateDescriptorSetLayout(logicalDevice,
&layoutInfo,
nullptr,
&setLayout1);
...
VkDescriptorSet set1;
VkDescriptorSetAllocateInfo allocInfo1 = {};
allocInfo1.descriptorSetCount = 1;
allocInfo1.pSetLayouts = &setLayout1;
...
vkAllocateDescriptorSets(logicalDevice,
&allocInfo1,
&set1);
...
// create second pipeline layout
array<VkDescriptorSetLayout, 2> setLayout0And1 = {setLayout0, setLayout1} // <---use set0 set1
VkPipelineLayoutCreateInfo secondPipelineLayoutInfo = {};
secondPipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
secondPipelineLayoutInfo.setLayoutCount = 2;
secondPipelineLayoutInfo.pSetLayouts = &setLayout0And1;
VkPipelineLayout secondPiplineLayout;
vkCreatePipelineLayout(logicDevice,
&secondPipelineLayoutInfo,
nullptr,
&secondPiplineLayout);
// create pipeline
VkGraphicsPipelineCreateInfo pipelineInfo = {};
...
pipelineInfo.layout = secondPiplineLayout;
VkPipeline pipeline;
vkCreateGraphicsPipelines(logicDevice,
VK_NULL_HANDLE,
1,
&pipelineInfo,
nullptr,
&pipeline);
After that, call vkCmdBindDescriptorSets to bind when draw.
...
vkCmdBindDescriptorSets(commandBuffer,
VK_PIPELINE_BIND_POINT_GRAPHICS,
firstPiplineLayout,
0,
1,
&set0,
0,
nullptr);
// When call vkCmdBindDescriptorSets with first set > 0 then the bound descriptor sets index [0 ... firstSet-1] will remain the same.
vkCmdBindDescriptorSets(commandBuffer,
VK_PIPELINE_BIND_POINT_GRAPHICS,
secondPiplineLayout,
1,
1,
&set1,
0,
nullptr);
vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
...
vkCmdDrawIndexed(commandBuffer,
indicesCount,
1, 0, 0, 0);
...
After that
validation layer: VkPipeline 0x42 uses set #0 but that set is not bound.
Why is there an error? Where did I get it wrong?

Related

How to setup my CANbus message filterering acceptance code and mask?

I have no success enabling the filtering configuration of the CANbus driver. The doc is right there (CTRL+F acceptance filter): https://docs.espressif.com/projects/esp-idf/en/v3.3/api-reference/peripherals/can.html#configuration
I would like to setup the filter so that I can only catch my extended frames which bear the ID that fall within 0x18000000 to 0x18FFFFFF range.
But my code does not catch the frames which I expect (aka 0x18307001).
I probably misunderstand the documentation.
My code is as follows:
can_general_config_t general_config = {
.mode =
//CAN_MODE_NO_ACK,
//CAN_MODE_LISTEN_ONLY,
CAN_MODE_NORMAL,
.tx_io = (gpio_num_t)TXD,
.rx_io = (gpio_num_t)RXD,
.clkout_io = (gpio_num_t)CAN_IO_UNUSED,
.bus_off_io = (gpio_num_t)CAN_IO_UNUSED,
.tx_queue_len = 100,
.rx_queue_len = 65,
.alerts_enabled = CAN_ALERT_NONE,
.clkout_divider = 0
};
log("CAN Driver: general config done");
can_timing_config_t timing_config = CAN_TIMING_CONFIG_500KBITS();
log("CAN Driver: timing config done # 500KBPS");
can_filter_config_t filter_config =
{
.acceptance_code = 0x18FFFFFF,
.acceptance_mask = 0x00FFFFFF, //0x18FFFFFF,
.single_filter = true
};
log("CAN Driver: filter config done");
esp_err_t error = can_driver_install(&general_config, &timing_config, &filter_config);
#endif
https://docs.espressif.com/projects/esp-idf/en/v3.3/api-reference/peripherals/can.html#configuration does not specify, how the mask has to be built.
I would expect from other implementations, that "don't care" bits have to be set to zero.
So, this should work:
acceptance_code = 0x18FFFFFF
acceptance_mask = 0x1F000000
I finally found the solution: I have to shift left 3 bits
const uint32_t acceptanceCode = 0x18000000U<<3;
const uint32_t acceptanceMask = 0x00FFFFFFU<<3;

Join tiles in Corona SDK into one word for a Breakout game grid?

I have a game project to re-implement Breakout. I want to display two words, each word on a line. They are joined by the bricks block. Inside, the top line is the first name, aligned left. The bottom line is the last name, aligned right. They are input from textboxes, and rendered as shown:
Each second that passes, the screen will add a configurable number of bricks to the grid (for example, five bricks per second) until the two words appear complete. I displayed a letter of the alphabet which is created from the matrix(0,1).
...But I don’t know how to join them into one word. How can I join these letters?
This is what I've gotten so far:
Bricks.lua
local Bricks = display.newGroup() -- static object
local Events = require("Events")
local Levels = require("Levels")
local sound = require("Sound")
local physics = require("physics")
local Sprites = require("Sprites")
local Func = require("Func")
local brickSpriteData =
{
{
name = "brick",
frames = {Sprites.brick}
},
{
name = "brick2",
frames = {Sprites.brick2}
},
{
name = "brick3",
frames = {Sprites.brick3}
},
}
-- animation table
local brickAnimations = {}
Sprites:CreateAnimationTable
{
spriteData = brickSpriteData,
animationTable = brickAnimations
}
-- get size from temp object for later use
local tempBrick = display.newImage('red_apple_20.png',300,500)
--local tempBrick = display.newImage('cheryGreen2.png',300,500)
local brickSize =
{
width = tempBrick.width,
height = tempBrick.height
}
--tempBrick:removeSelf( )
----------------
-- Rubble -- needs to be moved to its own file
----------------
local rubbleSpriteData =
{
{
name = "rubble1",
frames = {Sprites.rubble1}
},
{
name = "rubble2",
frames = {Sprites.rubble2}
},
{
name = "rubble3",
frames = {Sprites.rubble3}
},
{
name = "rubble4",
frames = {Sprites.rubble4}
},
{
name = "rubble5",
frames = {Sprites.rubble5}
},
}
local rubbleAnimations = {}
Sprites:CreateAnimationTable
{
spriteData = rubbleSpriteData,
animationTable = rubbleAnimations
}
local totalBricksBroken = 0 -- used to track when level is complete
local totalBricksAtStart = 0
-- contains all brick objects
local bricks = {}
local function CreateBrick(data)
-- random brick sprite
local obj = display.newImage('red_apple_20.png')
local objGreen = display.newImage('cheryGreen2.png')
obj.name = "brick"
obj.x = data.x --or display.contentCenterX
obj.y = data.y --or 1000
obj.brickType = data.brickType or 1
obj.index = data.index
function obj:Break()
totalBricksBroken = totalBricksBroken + 1
bricks[self.index] = nil
obj:removeSelf( )
sound.play(sound.breakBrick)
end
function obj:Update()
if(self == nil) then
return
end
if(self.y > display.contentHeight - 20) then
obj:Break()
end
end
if(obj.brickType ==1) then
physics.addBody( obj, "static", {friction=0.5, bounce=0.5 } )
elseif(obj.brickType == 2) then
physics.addBody( objGreen,"static",{friction=0.2, bounce=0.5, density = 1 } )
end
return obj
end
local currentLevel = testLevel
-- create level from bricks defined in an object
-- this allows for levels to be designed
local function CreateBricksFromTable(level)
totalBricksAtStart = 0
local activeBricksCount = 0
for yi=1, #level.bricks do
for xi=1, #level.bricks[yi] do
-- create brick?
if(level.bricks[yi][xi] > 0) then
local xPos
local yPos
if(level.align == "center") then
--1100-((99*16)*0.5)
xPos = display.contentCenterX- ((level.columns * brickSize.width) * 0.5/3) + ((xi-1) * level.xSpace)--display.contentCenterX
--xPos = 300 +(xi * level.xSpace)
yPos = 100 + (yi * level.ySpace)--100
else
xPos = level.xStart + (xi * level.xSpace)
yPos = level.yStart + (yi * level.ySpace)
end
local brickData =
{
x = xPos,
y = yPos,
brickType = level.bricks[yi][xi],
index = activeBricksCount+1
}
bricks[activeBricksCount+1] = CreateBrick(brickData)
activeBricksCount = activeBricksCount + 1
end
end
end
totalBricks = activeBricksCount
totalBricksAtStart = activeBricksCount
end
-- create bricks for level --> set from above functions, change function to change brick build type
local CreateAllBricks = CreateBricksFromTable
-- called by a timer so I can pass arguments to CreateAllBricks
local function CreateAllBricksTimerCall()
CreateAllBricks(Levels.currentLevel)
end
-- remove all brick objects from memory
local function ClearBricks()
for i=1, #bricks do
bricks[i] = nil
end
end
-- stuff run on enterFrame event
function Bricks:Update()
-- update individual bricks
if(totalBricksAtStart > 0) then
for i=1, totalBricksAtStart do
-- brick exists?
if(bricks[i]) then
bricks[i]:Update()
end
end
end
-- is level over?
if(totalBricksBroken == totalBricks) then
Events.allBricksBroken:Dispatch()
end
end
----------------
-- Events
----------------
function Bricks:allBricksBroken(event)
-- cleanup bricks
ClearBricks()
local t = timer.performWithDelay( 1000, CreateAllBricksTimerCall)
--CreateAllBricks()
totalBricksBroken = 0
-- play happy sound for player to enjoy
sound.play(sound.win)
print("You Win!")
end
Events.allBricksBroken:AddObject(Bricks)
CreateAllBricks(Levels.currentLevel)
return Bricks
Levels.lua
local Events = require("Events")
local Levels = {}
local function MakeLevel(data)
local level = {}
level.xStart = data.xStart or 100
level.yStart = data.yStart or 100
level.xSpace = data.xSpace or 23
level.ySpace = data.ySpace or 23
level.align = data.align or "center"
level.columns = data.columns or #data.bricks[1]
level.bricks = data.bricks --> required
return level
end
Levels.test4 = MakeLevel
{
bricks =
{
{0,2,0,0,2,0,0,2,0},
{0,0,2,0,2,0,2,0,0},
{0,0,0,0,2,0,0,0,0},
{1,1,2,1,1,1,2,1,1},
{0,0,0,0,1,0,0,0,0},
{0,0,0,0,1,0,0,0,0},
{0,0,0,0,1,0,0,0,0},
}
}
Levels.test5 = MakeLevel
{
bricks =
{
{0,0,0,1,0,0,0,0},
{0,0,1,0,1,0,0,0},
{0,0,1,0,1,0,0,0},
{0,1,0,0,0,1,0,0},
{0,1,1,1,1,1,0,0},
{1,0,0,0,0,0,1,0},
{1,0,0,0,0,0,1,0},
{1,0,0,0,0,0,1,0},
{1,0,0,0,0,0,1,0}
}
}
-- Levels.test6 = MakeLevel2
-- {
-- bricks =
-- {
----A "a" = {{0,0,0,1,0,0,0,0},
-- {0,0,1,0,1,0,0,0},
-- {0,0,1,0,1,0,0,0},
-- {0,1,0,0,0,1,0,0},
-- {0,1,1,1,1,1,0,0},
-- {1,0,0,0,0,0,1,0},
-- {1,0,0,0,0,0,1,0},
-- {1,0,0,0,0,0,1,0},
-- {1,0,0,0,0,0,1,0}},
----B
-- "b" = {{1,1,1,1,0,0,0},
-- {1,0,0,0,1,0,0},
-- {1,0,0,0,1,0,0},
-- {1,0,0,0,1,0,0},
-- {1,1,1,1,0,0,0},
-- {1,0,0,0,1,0,0},
-- {1,0,0,0,0,1,0},
-- {1,0,0,0,0,1,0},
-- {1,1,1,1,1,0,0}},
--...........
--.......
--...
-- --Z
-- "z"= {{1,1,1,1,1,1,1,0},
-- {0,0,0,0,0,1,0,0},
-- {0,0,0,0,1,0,0,0},
-- {0,0,0,0,1,0,0,0},
-- {0,0,0,1,0,0,0,0},
-- {0,0,1,0,0,0,0,0},
-- {0,0,1,0,0,0,0,0},
-- {0,1,0,0,0,0,0,0},
-- {1,1,1,1,1,1,1,0}}
-- }
-- }
-- stores all levels in ordered table so that one can be selected randomly by index
Levels.levels =
{
--Levels.test4,
Levels.test5
-- Levels.test6,
}
function Levels:GetRandomLevel()
return self.levels[math.random(#Levels.levels)]
end
Levels.notPlayedYet = {}
Levels.currentLevel = Levels:GetRandomLevel()
-- Events
function Levels:allBricksBroken(event)
self.currentLevel = Levels:GetRandomLevel()
end
Events.allBricksBroken:AddObject(Levels)
return Levels
The work I've done thus far (same as above) as an external download: http://www.mediafire.com/download/1t89ftkbznkn184/Breakout2.rar
In the interest of actually answering the question:
I'm not 100% sure what you mean by "How can I join these letters", but from poking through the code I have a guess, so please clarify on whether it is accurate, or if I am wrong about what you wanted.
Scenario 1
You haven't successfully achieved the image illustrated in the screenshot - you've been able to draw one letter, but not multiple ones.
In this case, you'll need to have a better understanding of what your code is doing. The CreateBricksFromTable function takes in a Level object, which is created by the MakeLevel function from a table with a bricks property, which is a table of tables that represent rows with columns in them, showing what type of brick should be at each position. In your commented-out level, you have created an table where the bricks field contains a field for each letter, but the MakeLevel function still expects a bricks field that directly contains the grid of blocks. You will have to - as it seems you attempted - create a MakeWordLevel function (or the like) that takes this letter list, and a word for each line, and constructs a larger grid by copying the appropriate letters into it.
StackOverflow is not your programming tutor, and an SO question is not the right forum for having people write code for you or getting into step-by-step details of how to do this, but I'll leave you a basic outline. Your function would look something like this:
local function MakeWordLevel(data, line1, line2)
local level = {}
...
return level
end
And then would have to:
Populate all of the same properties that MakeLevel does
Calculate how wide (level.columns) the level should be with all the letters
Create a table in the same format as the bricks properties, but big enough to hold all of the letters
Go through the input strings (line1 and line2), find the correct letter data from what is now the test6 array, and copy that data into the large table
Assign that table as level.bricks
This question already is a bit outside of what StackOverflow is intended for in that it asks about how to implement a feature rather than achieve a small, specific programming task, so any further followup should take place in a chatroom - perhaps the Hello World room would be helpful.
Scenario 2:
This was my original guess, but after considering and reading past edits, I doubt this is answering the right question
You may want a solid "background" of, say, red blocks, surrounding your letters and making the field into a solid "wall", with the name in a different color. And you may want these bricks to slowly show up a few at a time.
In that case, the main thing you need to do is keep track of what spaces are "taken" by the name bricks. There are many ways to do this, but I would start with a matrix to keep track of that - as big as the final playing field - full of 0's. Then, as you add the bricks for the name, set a 1 at the x,y location in that matrix according to that block's coordinate.
When you want to fill in the background, each time you go to add a block at a coordinate, check that "taken" matrix before trying to add a block - if it's taken (1), then just skip it and move onto the next coordinate.
This works if you're filling in the background blocks sequentially (say, left to right, top to bottom), or if you want to add them randomly. With random, you'd also want to keep updating the "taken" matrix so you don't try to add a block twice.
The random fill-in, however, presents its own problem - it will keep taking longer to fill in as it goes, because it'll find more and more "taken" blocks and have to pick a new one. There are solutions to this, of course, but I won't go too far down that road when I don't know if that's even what you want.
I don't really understand (or read, for that matter) your code but from what I see joining them into complete words is easy. You have two possibilities.
You can "render" them directly into your level/display data, simply copy them to the appropriate places, like this:
-- The level data.
local level = {}
-- Create the level data.
for row = 1, 25, 1 do
local rowData = {}
for column = 1, 80, 1 do
rowData[column] = "."
end
level[row] = rowData
end
-- Now let us setup the letters.
local letters = {
A = {
{".",".",".","#",".",".",".","."},
{".",".","#",".","#",".",".","."},
{".",".","#",".","#",".",".","."},
{".","#",".",".",".","#",".","."},
{".","#","#","#","#","#",".","."},
{"#",".",".",".",".",".","#","."},
{"#",".",".",".",".",".","#","."},
{"#",".",".",".",".",".","#","."},
{"#",".",".",".",".",".","#","."}
},
B = {
{"#","#","#","#",".",".","."},
{"#",".",".",".","#",".","."},
{"#",".",".",".","#",".","."},
{"#",".",".",".","#",".","."},
{"#","#","#","#",".",".","."},
{"#",".",".",".","#",".","."},
{"#",".",".",".",".","#","."},
{"#",".",".",".",".","#","."},
{"#","#","#","#","#",".","."}
}
}
-- The string to print.
local text = "ABBA"
-- Let us insert the data into the level data.
for index = 1, #text, 1 do
local char = string.sub(text, index, index)
local charData = letters[char]
local offset = index * 7
for row = 1, 9, 1 do
local rowData = charData[row]
for column = 1, 7, 1 do
level[row][offset + column] = rowData[column]
end
end
end
-- Print everything
for row = 1, 25, 1 do
local rowData = level[row]
for column = 1, 80, 1 do
io.write(rowData[column])
end
print()
end
You save you letters in a lookup table and then copy them, piece by piece, to the level data. Here I replaced the numbers with dots and number signs to make it prettier on the command line.
Alternately to that you can also "render" the words into a prepared buffer and then insert that into the level data by using the same logic.

How can I execute a TensorFlow graph from a protobuf in C++?

I got a simple code form tutorial and output it to .pb file as below:
mnist_softmax_train.py
x = tf.placeholder("float", shape=[None, 784], name='input_x')
y_ = tf.placeholder("float", shape=[None, 10], name='input_y')
W = tf.Variable(tf.zeros([784, 10]), name='W')
b = tf.Variable(tf.zeros([10]), name='b')
tf.initialize_all_variables().run()
y = tf.nn.softmax(tf.matmul(x,W)+b, name='softmax')
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy, name='train_step')
train_step.run(feed_dict={x:input_x, y_:input_y})
In C++, I load the same graph, and feed in fake data for testing:
Tensor input_x(DT_FLOAT, TensorShape({10,784}));
Tensor input_y(DT_FLOAT, TensorShape({10,10}));
Tensor W(DT_FLOAT, TensorShape({784,10}));
Tensor b(DT_FLOAT, TensorShape({10,10}));
Tensor input_test_x(DT_FLOAT, TensorShape({1,784}));
for(int i=0;i<10;i++){
for(int j=0;j<10;j++)
input_x.matrix<float>()(i,i+j) = 1.0;
input_y.matrix<float>()(i,i) = 1.0;
input_test_x.matrix<float>()(0,i) = 1.0;
}
std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
{ "input_x", input_x },
{ "input_y", input_y },
{ "W", W },
{ "b", b },
{ "input_test_x", input_test_x },
};
std::vector<tensorflow::Tensor> outputs;
status = session->Run(inputs, {}, {"train_step"}, &outputs);
std::cout << outputs[0].DebugString() << "\n";
However, this fails with the error:
Invalid argument: Input 0 of node train_step/update_W/ApplyGradientDescent was passed float from _recv_W_0:0 incompatible with expected float_ref.
The graph runs correctly in Python. How can I run it correctly in C++?
The issue here is that you are running the "train_step" target, which performs much more work than just inference. In particular, it attempts to update the variables W and b with the result of the gradient descent step. The error message
Invalid argument: Input 0 of node train_step/update_W/ApplyGradientDescent was passed float from _recv_W_0:0 incompatible with expected float_ref.
...means that one of the nodes you attempted to run ("train_step/update_W/ApplyGradientDescent") expected a mutable input (with type float_ref) but it got an immutable input (with type float) because the value was fed in.
There are (at least) two possible solutions:
If you only want to see predictions for a given input and given weights, fetch "softmax:0" instead of "train_step" in the call to Session::Run().
If you want to perform training in C++, do not feed W and b, but instead assign values to those variables, then continue to execute "train_step". You may find it easier to create a tf.train.Saver when you build the graph in Python, and then invoke the operations that it produces to save and restore values from a checkpoint.

How does SparkContext.textFile work under the covers?

I am trying to understand the textFile method deeply, but I think my
lack of Hadoop knowledge is holding me back here. Let me lay out my
understanding and maybe you can correct anything that is incorrect
When sc.textFile(path) is called, then defaultMinPartitions is used,
which is really just math.min(taskScheduler.defaultParallelism, 2). Let's
assume we are using the SparkDeploySchedulerBackend and this is
conf.getInt("spark.default.parallelism", math.max(totalCoreCount.get(),
2))
So, now let's say the default is 2, going back to the textFile, this is
passed in to HadoopRDD. The true size is determined in getPartitions() using
inputFormat.getSplits(jobConf, minPartitions). But, from what I can find,
the partitions is merely a hint and is in fact mostly ignored, so you will
probably get the total number of blocks.
OK, this fits with expectations, however what if the default is not used and
you provide a partition size that is larger than the block size. If my
research is right and the getSplits call simply ignores this parameter, then
wouldn't the provided min end up being ignored and you would still just get
the block size?
Cross posted with the spark mailing list
Short Version:
Split size is determined by mapred.min.split.size or mapreduce.input.fileinputformat.split.minsize, if it's bigger than HDFS's blockSize, multiple blocks inside a same file would be combined into a single split.
Detailed Version:
I think you are right in understanding the procedure before inputFormat.getSplits.
Inside inputFormat.getSplits, more specifically, inside FileInputFormat's getSplits, it is mapred.min.split.size or mapreduce.input.fileinputformat.split.minsize that would at last determine split size. (I'm not sure which would be effective in Spark, I prefer to believe the former one).
Let's see the code: FileInputFormat from Hadoop 2.4.0
long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
long minSize = Math.max(job.getLong(org.apache.hadoop.mapreduce.lib.input.
FileInputFormat.SPLIT_MINSIZE, 1), minSplitSize);
// generate splits
ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
NetworkTopology clusterMap = new NetworkTopology();
for (FileStatus file: files) {
Path path = file.getPath();
long length = file.getLen();
if (length != 0) {
FileSystem fs = path.getFileSystem(job);
BlockLocation[] blkLocations;
if (file instanceof LocatedFileStatus) {
blkLocations = ((LocatedFileStatus) file).getBlockLocations();
} else {
blkLocations = fs.getFileBlockLocations(file, 0, length);
}
if (isSplitable(fs, path)) {
long blockSize = file.getBlockSize();
long splitSize = computeSplitSize(goalSize, minSize, blockSize);
long bytesRemaining = length;
while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
String[] splitHosts = getSplitHosts(blkLocations,
length-bytesRemaining, splitSize, clusterMap);
splits.add(makeSplit(path, length-bytesRemaining, splitSize,
splitHosts));
bytesRemaining -= splitSize;
}
if (bytesRemaining != 0) {
String[] splitHosts = getSplitHosts(blkLocations, length
- bytesRemaining, bytesRemaining, clusterMap);
splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
splitHosts));
}
} else {
String[] splitHosts = getSplitHosts(blkLocations,0,length,clusterMap);
splits.add(makeSplit(path, 0, length, splitHosts));
}
} else {
//Create empty hosts array for zero length files
splits.add(makeSplit(path, 0, length, new String[0]));
}
}
Inside the for loop, makeSplit() is used to generate each split, and splitSize is the effective Split Size. The computeSplitSize Function to generate splitSize:
protected long computeSplitSize(long goalSize, long minSize,
long blockSize) {
return Math.max(minSize, Math.min(goalSize, blockSize));
}
Therefore, if minSplitSize > blockSize, the output splits are actually a combination of several blocks in the same HDFS file, on the other hand, if minSplitSize < blockSize, each split corresponds to a HDFS's block.
I will add more points with examples to Yijie Shen answer
Before we go into details,lets understand the following
Assume that we are working on Spark Standalone local system with 4 cores
In the application if master is configured as like below
new SparkConf().setMaster("**local[*]**") then
defaultParallelism : 4 (taskScheduler.defaultParallelism ie no.of cores)
/* Default level of parallelism to use when not given by user (e.g. parallelize and makeRDD). */
defaultMinPartitions : 2 //Default min number of partitions for Hadoop RDDs when not given by user
* Notice that we use math.min so the "defaultMinPartitions" cannot be higher than 2.
logic to find defaultMinPartitions as below
def defaultMinPartitions: Int = math.min(defaultParallelism, 2)
The actual partition size is defined by the following formula in the method FileInputFormat.computeSplitSize
package org.apache.hadoop.mapred;
public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
protected long computeSplitSize(long goalSize, long minSize, long blockSize) {
return Math.max(minSize, Math.min(goalSize, blockSize));
}
}
where,
minSize is the hadoop parameter mapreduce.input.fileinputformat.split.minsize (default mapreduce.input.fileinputformat.split.minsize = 1 byte)
blockSize is the value of the dfs.block.size in cluster mode(**dfs.block.size - The default value in Hadoop 2.0 is 128 MB**) and fs.local.block.size in the local mode (**default fs.local.block.size = 32 MB ie blocksize = 33554432 bytes**)
goalSize = totalInputSize/numPartitions
where,
totalInputSize is the total size in bytes of all the files in the input path
numPartitions is the custom parameter provided to the method sc.textFile(inputPath, numPartitions) - if not provided it will be defaultMinPartitions ie 2 if master is set as local(*)
blocksize = file size in bytes = 33554432
33554432/1024 = 32768 KB
32768/1024 = 32 MB
Ex1:- If our file size is 91 bytes
minSize=1 (mapreduce.input.fileinputformat.split.minsize = 1 byte)
goalSize = totalInputSize/numPartitions
goalSize = 91(file size)/12(partitions provided as 2nd paramater in sc.textFile) = 7
splitSize = Math.max(minSize, Math.min(goalSize, blockSize)); => Math.max(1,Math.min(7,33554432)) = 7 // 33554432 is block size in local mode
Splits = 91(file size 91 bytes) / 7 (splitSize) => 13
FileInputFormat: Total # of splits generated by getSplits: 13
=> while calculating splitSize if file size is > 32 MB then the split size will be taken the default fs.local.block.size = 32 MB ie blocksize = 33554432 bytes

Aparapi add sample

I'm studing Aparapi (https://code.google.com/p/aparapi/) and have a strange behaviour of one of the sample included.
The sample is the first, "add". Building and executing it, is ok. I also put the following code for testing if the GPU is really used
if(!kernel.getExecutionMode().equals(Kernel.EXECUTION_MODE.GPU)){
System.out.println("Kernel did not execute on the GPU!");
}
and it works fine.
But, if I try to change the size of the array from 512 to a number greater than 999 (for example 1000), I have the following output:
!!!!!!! clEnqueueNDRangeKernel() failed invalid work group size
after clEnqueueNDRangeKernel, globalSize[0] = 1000, localSize[0] = 128
Apr 18, 2013 1:31:01 PM com.amd.aparapi.KernelRunner executeOpenCL
WARNING: ### CL exec seems to have failed. Trying to revert to Java ###
JTP
Kernel did not execute on the GPU!
Here's my code:
final int size = 1000;
final float[] a = new float[size];
final float[] b = new float[size];
for (int i = 0; i < size; i++) {
a[i] = (float)(Math.random()*100);
b[i] = (float)(Math.random()*100);
}
final float[] sum = new float[size];
Kernel kernel = new Kernel(){
#Override public void run() {
int gid = getGlobalId();
sum[gid] = a[gid] + b[gid];
}
};
Range range = Range.create(size);
kernel.execute(range);
System.out.println(kernel.getExecutionMode());
if (!kernel.getExecutionMode().equals(Kernel.EXECUTION_MODE.GPU)){
System.out.println("Kernel did not execute on the GPU!");
}
kernel.dispose();
}
I tried specifying the size using
Range range = Range.create(size, 128);
as suggested in a Google group, but nothing changed.
I'm currently running on Mac OS X 10.8 with Java 1.6.0_43. Aparapi version is the latest (2012-01-23).
Am I missing something? Any ideas?
Thanks in advance
Aparapi inherits a 'Grid Style' of implementation from OpenCL. When you specify a range of execution (say 1024), OpenCL will break this 'range' into groups of equal size. Possibly 4 groups of 256, or 8 groups of 128.
The group size must be a factor of range (so assert(range%groupSize==0)).
By default Aparapi internally selects the group size.
But you are choosing to fully specify the range and group size to using
Range r= Range.range(n,128)
You are responsible for ensuring that n%128==0.
From the error, it looks like you chose Range.range(1000,128).
Sadly 1000 % 128 != 0 so this range will fail.
If you specifiy
Range r = Range.range(n)
Aparapi will choose a valid group size, by finding the highest common factor of n.
Try dropping the 128 as the the second arg.
Gary

Resources