Get FileInfo with windows api - windows
Im a bit new to rust. So im trying to fetch FileDescription from windows crate. Im failing to convert the descriptionBuffer from VerQueryValueA into utf8 string. Cant figure out what im doing wrong.
fn get_proc_data(pid: u32) -> Option<String> {
let mut path = None;
unsafe {
let h_snap = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE | TH32CS_SNAPMODULE32, pid);
if h_snap != INVALID_HANDLE_VALUE {
let mut mod_entry: MODULEENTRY32 = MODULEENTRY32 {
..Default::default()
};
mod_entry.dwSize = size_of_val(&mod_entry) as u32;
if Module32First(h_snap, &mut mod_entry).as_bool() {
let char_vec = mod_entry.szExePath.iter().map(|f| f.0).collect::<Vec<u8>>();
path = match from_utf8(&char_vec) {
Ok(s) => Some(String::from(s.to_string().trim_end_matches(char::from(0)))),
Err(_) => None,
};
}
}
CloseHandle(h_snap);
if path.is_some() {
let mut infoBuffer: [u8; 2048] = [0; 2048];
let pat = path.as_ref().unwrap();
let lpvoid: *mut c_void = infoBuffer.as_mut_ptr() as *mut c_void;
let c_str = CString::new(pat.as_str()).unwrap();
let pstr = PSTR(c_str.as_ptr() as *const u8);
let verInfoLen = GetFileVersionInfoSizeA(pstr, &mut 0);
let ok = GetFileVersionInfoA(pstr, 0, verInfoLen, lpvoid);
let mut descriptionBuffer: [u8; 256] = [0; 256];
let descriptionPtr: *mut *mut c_void =
descriptionBuffer.as_mut_ptr() as *mut *mut c_void;
let mut descriptionLen = 0;
if ok.as_bool()
&& VerQueryValueA(
lpvoid,
"\\StringFileInfo\\040904E4\\FileDescription",
descriptionPtr,
&mut descriptionLen,
)
.as_bool()
{
info!("{:?}", descriptionBuffer);
let res = from_utf8_lossy(&descriptionBuffer);
info!("{:?} {:?}", path, res);
}
}
}
return path;
}
Example outputs
info!("{:?}", descriptionBuffer);
INFO - [180, 162, 146, 165, 206, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
info!("{:?} {:?}", path, res);
INFO - Some("C:\Users\acoop\AppData\Local\Amazon Music\Amazon Music.exe") "�����\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}"
Querying for file information properties is a multi-step process:
Determine the version info size by calling GetFileVersionInfoSizeW
Allocate a sufficiently sized buffer; a Vec provides everything needed (contiguous memory, runtime dynamic size, low stack memory overhead)
Read the entire version info into the allocated buffer by calling GetFileVersionInfoW
Query for the information of interest using VerQueryValueW; on success the function returns a pointer/size pair into the buffer allocated above
It is the final step that ultimately goes wrong in the code provided. The API expects the address of a pointer variable where it stores the result. The code however passes the address of the first element of descriptionBuffer, and that's where the API writes to (presumably, the code was compiled for a 64-bit target, meaning that the first 8 bytes are the pointer value).
In essence the code did succeed in producing a pointer/size pair, but failed to interpret them according to the API contract. An improved version that queries for a binary's file description might look like this:
fn get_file_description(path: impl AsRef<Path>) -> Result<String, Box<dyn Error>> {
// Determine version info size
let size = unsafe { GetFileVersionInfoSizeW(path.as_ref().as_os_str(), null_mut()) };
if size == 0 {
return Err(core::Error::from_win32().into());
}
// Allocate buffer
let mut buffer = vec![0u8; size as usize];
// Read version info
unsafe {
GetFileVersionInfoW(
path.as_ref().as_os_str(),
0,
size,
buffer.as_mut_ptr() as *mut std::ffi::c_void,
)
}
.ok()?;
// Declare pointer/size pair for output
let mut ptr = null_mut();
let mut len = 0;
// Query for file description
let success = unsafe {
VerQueryValueW(
buffer.as_ptr() as *const std::ffi::c_void,
"\\StringFileInfo\\040904B0\\FileDescription",
&mut ptr,
&mut len,
)
}
// The API call doesn't set the last error code so we cannot use `.ok()?` here
.as_bool();
if !success {
return Err("Failed to query file description".into());
}
// `len` here is in elements (as opposed to bytes)
let descr = unsafe { slice::from_raw_parts(ptr as *const u16, len as usize) };
// Optionally use `from_utf16_lossy` if you don't need to handle invalid UTF-16
let descr = String::from_utf16(descr)?;
Ok(descr)
}
This is better, but certainly not perfect by any stretch of the term. Most of the improvements revolve around character encoding subtleties, the gnarliest of pain points when it comes to Rust on Windows. There is no dedicated string type in Rust that can store strings in Windows' native character encoding, UTF-16.
Noteworthy changes:
The function accepts an argument convertible to a Path reference. The underlying storage is of type OsStr, sporting a relaxed version of UTF-8 capable of representing any sequence of UTF-16 code units, well-formed or otherwise. This is crucial since Windows makes no character encoding guarantees with respect to filesystem objects. With the exception of a few reserved values, virtually any sequence of 16-bit values is allowed. Your program needs to be prepared for this.
All narrow-character set versions of the API calls have been replaced with the wide-character versions (see Conventions for Function Prototypes for background information). This is the only safe option when dealing with data you do not control (such as version info resources of arbitrary binaries). Of particular note here is that the windows crate provides an implicit conversion from &OsStr into PCWSTR, so passing a Path into wide-character APIs isn't entirely inconvenient (but they do incur a conversion and allocation).
Likewise, the windows crate provides conversions from &str to PCWSTR, gated under the "alloc" feature. This allows using a string literal in the call to VerQueryValueW and get everything converted as needed (again, with a conversion and allocation cost).
To turn this into a fully working example, that produces a command line application accepting the path name of the binary image as its first argument, simple add the following Cargo.toml
[package]
name = "fileinfo"
version = "0.0.0"
edition = "2021"
[dependencies.windows]
version = "0.33.0"
features = [
"alloc",
"Win32_Foundation",
"Win32_Storage_FileSystem",
]
and add the following to the src/main.rs file:
use std::{env, error::Error, path::Path, ptr::null_mut, slice};
use windows::{
core,
Win32::Storage::FileSystem::{GetFileVersionInfoSizeW, GetFileVersionInfoW, VerQueryValueW},
};
fn main() -> Result<(), Box<dyn Error>> {
let input = env::args_os()
.nth(1)
.ok_or("Expected 1 command line argument")?;
let path = Path::new(&input);
let descr = get_file_description(&path)?;
println!("File description: \"{}\"", &descr);
Ok(())
}
Related
How can I create a random array between a range of two other arrays?
I need to generate an array of random 20 bytes between a given range of arrays. Since arrays are comparable in Rust, this works: let low = [0u8; 20]; let high = [2u8; 20]; assert_eq!(true, low < high); assert_eq!(false, low > high); assert_eq!(true, low == [0u8; 20]); For these bounds: let low: [u8; 20] = [98, 0, 1, 0, 2, 6, 99, 3, 0, 5, 23, 3, 5, 6, 11, 8, 0, 2, 0, 17]; let high: [u8; 20] = [99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]; These would be a valid result: [98, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] These are not: [98, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2] I want to do something like: use rand::prelude::*; fn main() { let low = [0u8; 20]; let high = [2u8; 20]; let value = rand::thread_rng().gen_range(low, high); println!("{:?}", value); } but I get following error: error[E0277]: the trait bound `[u8; 20]: rand::distributions::uniform::SampleUniform` is not satisfied --> src\main.rs:6:36 | 6 | let value = rand::thread_rng().gen_range(low, high); | ^^^^^^^^^ the trait `rand::distributions::uniform::SampleUniform` is not implemented for `[u8; 20]` I tried implementing SampleUniform and UniformSampler without much success. Is there a simple way to implement this?
If you want to treat the byte arrays as big integers, use the num-bigint crate with the rand feature enabled: use bigint::{ToBigInt, RandBigInt}; let low = -10000.to_bigint().unwrap(); let high = 10000.to_bigint().unwrap(); let b = rng.gen_bigint_range(&low, &high); You could also use unsigned integers instead of signed. There are methods to convert to and from big endian byte arrays: from_bytes_be to_bytes_be See also: How do I generate a random num::BigUint?
How to find boundary point using bfs algorithm
I think of the 2D array as a coordinate and try to find a coordinate value with a value of 1. So far, it's a very easy BFS problem, but what I want to do is look at the following picture. While I'm looking for 1 or after I've found it all, I would like to know the coordinate values surrounding the boundary in the order of the arrow or the other direction. What options do I need to add to get those information? Below is the BFS code that I use now. I can get coordinate values from the BFS function as shown in the second picture. class Node { public int x; public int y; public Node(int x, int y) { this.x = x; this.y = y; } }; private int[] dx = new int[8] { -1, 0, 1, 0, 1, -1, -1, 1 }; private int[] dy = new int[8] { 0, -1, 0, 1, 1, -1, 1, -1 }; private Queue<Node> q = new Queue<Node>(); bool[,] visit = new bool[15, 15]; int[,] coordinates = new int[15, 15] { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 }, { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }, { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }, { 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}; void BFS(int[,] pixel, int x, int y) { q.Enqueue(new Node(x, y)); visit[x, y] = true; while (q.Count != 0) { Node cur = q.Dequeue(); for (int i = 0; i < 8; i++) { int r = cur.x + dx[i]; int c = cur.y + dy[i]; if (r >= 0 && c >= 0 && r < 15 && c < 15) { if (!visit[r, c] && pixel[r, c] == 1) { q.Enqueue(new Node(r, c)); visit[r, c] = true; } } } } } void main() { for (int y = 0; y < 15; y++) { for (int x = 0; x < 15; x++) { if (!visit[x, y] && coordinates[x, y] == 1) { BFS(coordinates, x, y); } } } }
we do not need BFS for finding boundary '1' values. We can simply loop over 2D grid and then for each '1', we can just check whether all of it's 4 adjacent (i.e up, down, left, right) values are '1' or not. If at least one of them is not '1', then it is a boundary point. Thanks!
find a coordinate value with a value of 1 Start by pre-processing the matrix - Search all 1 values (this can also be done recursively) - If 1 value does not have a 0 neighbor, it means it it not on edge - change it to 0. After the pre-processing you are left with only the edge 1 values, and all others are 0. I would like to know the coordinate values surrounding the boundary in the order of the arrow or the other direction To find out if the edge forms a closed loop, and get the nodes in the right order apply BFS to the pre-processed matrix. Seek a path from a node of your choice, back to the same node(a loop).
Low cost Image to NSData conversion for feeding SCNTechnique's sampler2D inputs
Sometimes the only way to pass precious data from CPU to GPU is by hiding it in textures. I tried to trick SCNTechnique and simply pass [NSData dataWithBytes:length:] or a CGDataProviderRef containing my neatly prepared raw pixel data bytes, but SceneKit is smart enough to detect my sinister attempts. But I did not give up, and found a loophole: [_sceneView.technique setValue: UIImagePNGRepresentation(encodeInSinglePixelUIImage(pos.x, pos.y)) forKey:#"blob_pos_"]; Encoding and decoding single pixel PNGs at 60fps on a mobile device is something you can afford, on an iPhone X it just costs 2ms and keeps your palm a little bit warmer. However I do not need any heat-generating features till november, so I was wondering if there's a cool alternative to this method.
The most efficient way I found is constructing floating point RGB TIFFs. It's still not super fast, consuming 0.7ms on the iPhone X, but a lot faster than the PNG method. Having a float texture also have the benefits of direct float transfer, that is, no encoding to multiple uint8 RGBA values on the CPU and reconstructing floats on the GPU. Here's how: NSData * tiffencode(float x, float y) { const uint8_t tags = 9; const uint8_t headerlen = 8+2+tags*12+4; const uint8_t width = 1; const uint8_t height = 1; const uint8_t datalen = width*height*3*4; static uint8_t tiff[headerlen+datalen] = { 'I', 'I', 0x2a, 0, //little endian/'I'ntel 8, 0, 0, 0, //index of metadata tags, 0, 0x00, 1, 4, 0, 1, 0, 0, 0, width, 0, 0, 0, //width 0x01, 1, 4, 0, 1, 0, 0, 0, height, 0, 0, 0, //height 0x02, 1, 3, 0, 1, 0, 0, 0, 32, 0, 0, 0, //bits per sample(s) 0x06, 1, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0, //photometric interpretation: RGB 0x11, 1, 4, 0, 1, 0, 0, 0, headerlen, 0, 0, 0,//strip offset 0x15, 1, 3, 0, 1, 0, 0, 0, 3, 0, 0, 0, //samples per pixel: 3 0x16, 1, 4, 0, 1, 0, 0, 0, height, 0, 0, 0, //rows per strip: height 0x17, 1, 4, 0, 1, 0, 0, 0, datalen, 0, 0, 0, //strip byte length 0x53, 1, 3, 0, 1, 0, 0, 0, 3, 0, 0, 0, //sampleformat: float 0, 0, 0, 0, //end of metadata //RGBRGB.. pixeldata here }; float *rawData = tiff+headerlen; rawData[0] = x; rawData[1] = y; NSData *data = [NSData dataWithBytes:&tiff length:sizeof(tiff)]; return data; } Useful TIFF links I used: http://www.fileformat.info/format/tiff/corion.htm http://paulbourke.net/dataformats/tiff/ https://www.fileformat.info/format/tiff/egff.htm https://www.awaresystems.be/imaging/tiff/tifftags/sampleformat.html
Matrix columns permutation with cublas
I have an input matrix A of size 10x20, I want to permute its columns as follows: p=[1 4 2 3 5 11 7 13 6 12 8 14 17 9 15 18 10 16 19 20] ;%rearrange the columns of A A=A(:,p); To do so, I constructed a permutation matrix I corresponding to the permutation vector p and permuted A can be obtained by performing the following multiplication: A=A*I I tested the permutation in Matlab and everything is ok. Now, I want to test it in cuda using cublas. The input matrix A is entered in column major. The permuation matrix I in column major as well. The following code is to simply test the permutation: #include "cuda_runtime.h" #include "device_launch_parameters.h" #include <stdio.h> #include <stdlib.h> #include <math.h> #include <cublas_v2.h> #define cudacall(call) \ do \ { \ cudaError_t err = (call); \ if(cudaSuccess != err) \ { \ fprintf(stderr,"CUDA Error:\nFile = %s\nLine = %d\nReason = %s\n", __FILE__, __LINE__, cudaGetErrorString(err)); \ cudaDeviceReset(); \ exit(EXIT_FAILURE); \ } \ } \ while (0) #define cublascall(call) \ do \ { \ cublasStatus_t status = (call); \ if(CUBLAS_STATUS_SUCCESS != status) \ { \ fprintf(stderr,"CUBLAS Error:\nFile = %s\nLine = %d\nCode = %d\n", __FILE__, __LINE__, status); \ cudaDeviceReset(); \ exit(EXIT_FAILURE); \ } \ \ } \ while(0) __global__ void sgemm_kernel(float *A_d, float *I_d) { int m=10,n=20,k=20; int lda=k, ldb=k; cublasHandle_t hdl; cublasStatus_t status = cublasCreate_v2(&hdl); const float alpha=1.0F, beta=0.0f; status=cublasSgemm(hdl,CUBLAS_OP_N,CUBLAS_OP_N,k,n,k,&alpha,A_d,lda,I_d,ldb,&beta,A_d,lda); } int main(int argc, char* argv[]) {float A[10*20]={-0.0614, -0.0199, 0.0024, -0.0414, 0.1736, -0.0595, -0.2794, 0.1946, -0.0647, -0.0025, -0.0036, 0.0628, -0.0827, 0.3679, -0.1913, 0.0500, -0.0245, 0.3855, -0.1298, -0.0334, -0.0241, -0.0564, 0.0098, -0.2862, -0.0474, 0.0333, -0.3049, 0.2851, -0.1242, 0.0162, 0.0241, 0.0270, -0.0670, 0.3129, -0.2428, 0.0947, -0.1878, 0.0889, -0.0208, 0.0075, -0.1559, 0.1437, -0.1916, 0.2297, -0.0833, -0.1805, 0.2522, -0.1738, 0.1027, -0.1273, 0.0716, 0.1882, -0.0963, 0.1081, 0.0958, -0.0713, 0.1931, 0.0874, -0.4186, 0.0345, -0.1912, 0.0501, -0.1396, -0.0989, -0.0338, 0.1773, 0.1088, 0.0389, -0.0117, 0.0014, 0.1648, -0.1705, -0.0575, -0.0133, -0.0570, 0.2124, -0.0193, 0.1535, 0.0857, -0.1308, 0.1971, 0.0882, -0.2577, 0.1662, -0.2498, -0.0365, -0.1805, 0.0921, 0.0912, 0.0178, -0.0379, 0.0080, 0.0572, -0.0067, 0.0591, -0.0136, 0.0471, -0.0163, 0.0082, -0.0338, -0.2436, 0.1116, 0.0732, -0.0319, 0.0550, 0.2821, 0.0240, 0.0109, -0.0034, 0.1212, -0.0061, 0.2497, -0.0542, -0.0939, 0.0651, 0.0063, -0.1367, 0.0580, 0.7389, -0.1143, -0.3786, 0.1288, 0.0001, 0.2604, -0.1094, -0.3624, -0.0184, 0.0538, 0.0329, 0.0040, 0.0603, 0.1422, 0.1037, -0.1846, 0.4046, -0.3738, -0.3487, 0.3846, -0.0849, 0.0135, -0.1850, 0.3571, -0.0543, -0.0025, -0.2880, 0.0600, 0.2605, -0.0474, 0.0010, -0.0333, -0.1974, 0.4788, -0.2441, 0.3847, -0.1235, -0.3503, -0.1785, -0.1095, 0.3158, 0.0062, -0.0509, -0.0502, 0.2154, 0.2237, -0.0671, 0.0377, 0.0519, 0.1530, -0.1675, 0.1856, -0.0380, -0.0026, 0.4700, 0.0097, -0.2394, 0.0717, -0.2101, 0.2841, -0.1799, -0.0924, -0.2678, 0.4485, 0.0044, 0.0030, -0.0439, 0.4337, 0.1819, -0.0180, -0.5443, 0.0864, 0.0390, -0.0235, -0.0706, 0.0138, 0.0633, -0.0147, 0.0444, -0.0334, 0.0557, 0.0507} float I[20*20]={1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; float *A_d, *I_d; cudacall(cudaMalloc(&A_d,10*20*sizeof( float ))); cudacall(cudaMalloc(&I_d, 20*20*sizeof(float ))); cudacall(cudaMemcpy(A_d, A, 10*20*sizeof(float), cudaMemcpyHostToDevice)); cudacall(cudaMemcpy(I_d, I, 20*20*sizeof(float), cudaMemcpyHostToDevice)); sgemm_kernel<<<1,1>>>(A_d, I_d); cudacall(cudaDeviceSynchronize()); cudacall(cudaMemcpy(A, A_d, 10*20*sizeof(float), cudaMemcpyDeviceToHost)); cudacall(cudaFree(A_d)); cudacall(cudaFree(I_d)); return 0; } I couldn't get a correct result.
CUBLAS doesn't support in-place operations (in fact no parallel BLAS I am aware of supports it). You cannot pass A_d and use it in the multiplication and as the matrix in the operation. You must use a different memory allocation to hold the result. So C <- 1*(A * B) + 0*C is legal, whereas A <- 1*(A * B) + 0*A is not.
cublasSgemm is a host function, so it should be called from a function without the __global__ qualifier.
It is possible to get a binary array from a BinData::Record instance?
I'm using Ruby gem Bindata, using the following code: require 'bindata' class Rectangle < BinData::Record endian :little uint16 :len string :name, :read_length => :len uint32 :width uint32 :height end rectangle = rectangle.new rectangle.len = 12 It is possible to get from rectangle instance an array like [0, 1, 1, 0, 0, ...] with the binary representation of all the fields inside the object?
BinData::Base#to_binary_s returns "the string representation of this data object": rectangle.to_binary_s #=> "\f\x00\x00\x00\x00\x00\x00\x00\x00\x00" This can be converted to a bit string via String#unpack: rectangle.to_binary_s.unpack('b*') #=> ["00110000000000000000000000000000000000000000000000000000000000000000000000000000"] Or to a bit array via: rectangle.to_binary_s.unpack('b*')[0].chars.map(&:to_i) #=> [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]