Pthread program always run slower than normal

Pthread program always run slower than normal - parallel-processing

I build in Ubuntu 12.04 with command
g++ -pthread hello.cpp
But I run parallel mode always slows than normal . Here's my code
#include <iostream>
#include <pthread.h>
#include <math.h>
using namespace std;
#define NUM_THREADS 4
#define MAX_NUMBER 10000000
void *doSomething(void *param)
{
int id = (int) param;
int sum = 0;
for (int i = 0; i < MAX_NUMBER; i++)
{
sum += sin(i) + cos(i) + tan(i); // sum
}
return NULL;
}
void runNormal()
{
// run in normal mode with NUM_THREADS times.
for (int i = 0; i < NUM_THREADS; i++)
{
doSomething((void *) i);
}
}
void runParallel()
{
pthread_t threads[NUM_THREADS];
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
int rc, i;
for (i = 0; i < NUM_THREADS; i++)
{
rc = pthread_create(&threads[i], &attr, doSomething, (void *) i);
if (rc)
{
cout << "ERROR : can't create thread #" << i;
}
}
pthread_attr_destroy(&attr);
void *status;
for (i = 0; i < NUM_THREADS; i++)
{
pthread_join(threads[i], &status);
}
}
int main()
{
int type;
cout << "Choose type of run (1 - normal, 2 - parallel) : ";
cin >> type;
clock_t init, final;
init = clock();
if (type == 1)
{
runNormal();
}
else if (type == 2)
{
runParallel();
}
else
{
cout << "Your choice is wrong.";
}
final = clock();
double duration = (double) (final - init) / CLOCKS_PER_SEC;
cout << "Duration : " << duration << " seconds." << endl;
pthread_exit(NULL);
return 0;
}
I run with 4 threads because my lap has 4 cores . I saw in System Monitor , I realize my lap used 4 cores concurrently in parrallel mode and only 1 core in normal mode but duration time of normal mode is shorter .

please see answer https://stackoverflow.com/a/2962914/1689451 for clarification of how clock works in multithreaded applications.
try it like this:
struct timespec start, finish;
double elapsed;
clock_gettime(CLOCK_MONOTONIC, &start);
if (type == 1)
{
runNormal();
}
else if (type == 2)
{
runParallel();
}
else
{
cout << "Your choice is wrong.";
}
clock_gettime(CLOCK_MONOTONIC, &finish);
elapsed = (finish.tv_sec - start.tv_sec);
elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
cout << " Duration : " << elapsed << " seconds." << endl;
and for completeness, i built it like this (filname par.cpp):
make CXXFLAGS="-pthread -O3 -lrt" LDLIBS=-lrt -B par && ./par

Related

CUDA which is faster? Memory coalescing vs caching?

I have encountered this exercise which asks for which code is faster between the following two.
First code.
int sum = 0;
for(int i = 0; i < n; i++) {
sum += array[i*n + thread_id];
}
Second code.
int sum = 0;
for(int i = 0; i < n; i++) {
sum += array[n*thread_id + i];
}
I would try the code myself I will not have a Nvidia GPU in the following days.
I think that the first code takes advantage of memory coalescing see here, while the second one would take advantage of caching.

Many thanks to #RobertCrovella for clarifying the issues regarding memory coalescing. This is my attempt to benchmark the two codes as asked for. It can be clearly noticed from the output (run on a NVS5400M GPU laptop) that the first code is twice more efficient as compared to the second one. This is because of the memory coalescing taking place in the first one (kernel1).
#include <cuda.h>
#include <ctime>
#include <iostream>
#include <stdio.h>
using namespace std;
#define BLOCK_SIZE 1024
#define GRID_SIZE 1024
// Error Handling
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
//kernel1<<<8,8>>>(d_array,d_sum1,n);
__global__ void kernel1(int *array, long *sum, int n) {
long result=0;
int thread_id=threadIdx.x+blockIdx.x*blockDim.x;
for(int i=0;i<n;i++) {
result += array[i*n + thread_id];
}
//__syncthreads();
sum[thread_id]=result;
}
__global__ void kernel2(int *array, long *sum, int n) {
long result=0;
int thread_id=threadIdx.x+blockIdx.x*blockDim.x;
for(int i=0;i<n;i++) {
result += array[n*thread_id+i];
}
__syncthreads();
sum[thread_id]=result;
}
int main() {
srand((unsigned)time(0));
long *h_sum1,*d_sum1;
long *h_sum2,*d_sum2;
int n=10;
int size1=n*BLOCK_SIZE*GRID_SIZE+n;
int *h_array;
h_array=new int[size1];
h_sum1=new long[size1];
h_sum2=new long[size1];
//random number range
int min =1, max =10;
for(int i=0;i<size1;i++) {
h_array[i]= min + (rand() % static_cast<int>(max - min + 1));
h_sum1[i]=0;
h_sum2[i]=0;
}
int *d_array;
gpuErrchk(cudaMalloc((void**)&d_array,size1*sizeof(int)));
gpuErrchk(cudaMalloc((void**)&d_sum1,size1*sizeof(long)));
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
gpuErrchk(cudaMemcpy(d_array,h_array,size1*sizeof(int),cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_sum1,h_sum1,size1*sizeof(long),cudaMemcpyHostToDevice));
cudaEventRecord(start);
kernel1<<<GRID_SIZE,BLOCK_SIZE>>>(d_array,d_sum1,n);
cudaEventRecord(stop);
gpuErrchk(cudaMemcpy(h_sum1,d_sum1,size1*sizeof(long),cudaMemcpyDeviceToHost));
float milliSeconds1=0;
cudaEventElapsedTime(&milliSeconds1,start,stop);
gpuErrchk(cudaMalloc((void**)&d_sum2,size1*sizeof(long)));
gpuErrchk(cudaMemcpy(d_sum2,h_sum2,size1*sizeof(long),cudaMemcpyHostToDevice));
cudaEventRecord(start);
kernel2<<<GRID_SIZE,BLOCK_SIZE>>>(d_array,d_sum2,10);
cudaEventRecord(stop);
gpuErrchk(cudaMemcpy(h_sum2,d_sum2,size1*sizeof(long),cudaMemcpyDeviceToHost));
float milliSeconds2=0;
cudaEventElapsedTime(&milliSeconds2,start,stop);
long result_device1=0,result_host1=0;
long result_device2=0,result_host2=0;
for(int i=0;i<size1;i++) {
result_device1 += h_sum1[i];
result_device2 += h_sum2[i];
}
for(int thread_id=0;thread_id<GRID_SIZE*BLOCK_SIZE;thread_id++)
for(int i=0;i<10;i++) {
result_host1 += h_array[i*10+thread_id];
result_host2 += h_array[10*thread_id+i];
}
cout << "Device result1 = " << result_device1 << endl;
cout << "Host result1 = " << result_host1 << endl;
cout << "Time1 (ms) = " << milliSeconds1 << endl;
cout << "Device result2 = " << result_device2 << endl;
cout << "Host result2 = " << result_host2 << endl;
cout << "Time2 (ms) = " << milliSeconds2 << endl;
gpuErrchk(cudaFree(d_array));
gpuErrchk(cudaFree(d_sum1));
gpuErrchk(cudaFree(d_sum2));
return 0;
}
The Cuda Event timer output is as under:
Device result1 = 57659226
Host result1 = 57659226
Time1 (ms) = 5.21952
Device result2 = 57674257
Host result2 = 57674257
Time2 (ms) = 11.8356

How to iterate a string using while loop in C++?

number = 100010001111111
for (int i=0; number.length(); i++) {
while number[i] == 1 {
k++;
}
}
I would like to implement a while-loop as a replacement for the for-loop as shown above.
How could I convert this to a while-loop?

Here's a solution for the problem you mentioned in your comment (Problem - 96A)
#include <iostream>
using namespace std;
int main()
{
cout << "Please enter your players situation" << endl;
std::string str;
cin >> str;
std::string::size_type i = 0;
int NumbersofAppearances = 0;
int ConsectiveNumberSequence = 7; //You can change that to whatever sequence you like
bool IsDangerous=false;
while (i < str.size())
{
if(str[i]=='1' )
{
++NumbersofAppearances;
//We need to check if we reached the consecutive number or not and save it on a different bool variable
if(NumbersofAppearances>=ConsectiveNumberSequence)
IsDangerous=true;
}
else
{
NumbersofAppearances=0;
}
++i;
}
//print out the end result
if (IsDangerous)
cout <<"YES , this is dangerous"<< endl;
else
cout <<"No, this is not dangerous"<< endl;
return 0;
}
And here's a link to Coding ground

Finding incorrect implementation of JudyArray

I'm trying to give a better error report (possible bug) for this case (about judySArray give incorrect result, but I don't know which key that give incorrect result).
The code here from this folder, note on this blog. Dependencies: judySArray.h and cedar.h
// judy.cpp
#include "deps/judySArray.h"
#include <string>
#include <iostream>
#include <cstdlib>
#include <cstring>
using namespace std;
typedef judySArray<double> MSD;
const int MAX_DATA = 12000000;
const char i2ch[] = {'0','1','2','3','4','5','6','7','8','9','a','B','c','D','e','F'};
int get_first_digit(double d) {
while(d > 10) d /= 10;
return d;
}
string to_rhex(int v) {
char hex[32];
int start = 0;
while(v>0) {
hex[start] = i2ch[v%16];
v /= 16;
++start;
}
hex[start] = 0;
return hex;
}
void add_or_inc(MSD &m, const string& key,double set, double inc, int& ctr) {
const char* cstr = key.c_str();
double it = m.find(cstr);
if(!it) {
m.insert(cstr,set);
return;
}
m.insert(cstr,it+inc);
++ctr;
}
int main() {
MSD m(64);
int dup1 = 0, dup2 = 0, dup3 = 0;
for(int z=MAX_DATA;z>0;--z) {
int val2 = MAX_DATA-z;
int val3 = MAX_DATA*2-z;
string key1 = to_string(z);
string key2 = to_string(val2);
string key3 = to_rhex(val3);
add_or_inc(m,key1,z,val2,dup1);
add_or_inc(m,key2,val2,val3,dup2);
add_or_inc(m,key3,val3,z,dup3);
}
cout << dup1 << ' ' << dup2 << ' ' << dup3 << endl;
int total = 0, verify = 0, count = 0;
for(auto &it = m.begin();m.success(); m.next()) {
total += get_first_digit(it.value);
verify += strlen((const char *) it.key);
count += 1;
}
cout << total << ' ' << verify << ' ' << count << endl;
}
other implementation (map, unordered_map, hat-trie and cedar) give correct result:
6009354 6009348 611297
36186112 159701682 23370001
but judy didn't:
6009354 6009348 611297
36186112 159701681 23370000
The problem is, which key that have incorrect result?
I've tried to build a code that insert those keys on another data structure (that is cedar), but that incorrect keys still not detected:
// judy.cpp
#include "deps/judySArray.h"
#include <string>
#include <iostream>
#include <cstdlib>
#include <cstring>
#include <vector>
using namespace std;
typedef judySArray<double> MSD;
const int MAX_DATA = 12000000;
const char i2ch[] = {'0','1','2','3','4','5','6','7','8','9','a','B','c','D','e','F'};
int get_first_digit(double d) {
while(d > 10) d /= 10;
return d;
}
string to_rhex(int v) {
char hex[32];
int start = 0;
while(v>0) {
hex[start] = i2ch[v%16];
v /= 16;
++start;
}
hex[start] = 0;
return hex;
}
void add_or_inc(MSD &m, const string& key,double set, double inc, int& ctr) {
const char* cstr = key.c_str();
double it = m.find(cstr);
if(!it) {
m.insert(cstr,set);
return;
}
m.insert(cstr,it+inc);
++ctr;
}
#include "deps/cedar.h"
class MSD2 {
public:
vector<double> data;
typedef cedar::da<int> CI;
CI da;
bool exists(const string& key,double &old) {
int idx = -1;
bool found = da.exactMatchExists(key.c_str(),key.size(),&idx);
if(found) old = data[idx];
return found;
}
void insert(const string& key,double val) {
da.update(key.c_str(),key.size(),data.size());
data.push_back(val);
}
void update(const string& key,double val) {
int idx = -1;
bool found = da.exactMatchExists(key.c_str(),key.size(),&idx);
if(found) {
data[idx] = val;
return;
}
insert(key,val);
}
};
void add_or_inc(MSD2 &m, const string& key,double set, double inc, int& ctr) {
double old;
if(!m.exists(key,old)) {
m.insert(key,set);
return;
}
m.update(key,old+inc);
++ctr;
}
int main() {
MSD m(64);
MSD2 m2;
int dup1 = 0, dup2 = 0, dup3 = 0;
int vup1 = 0, vup2 = 0, vup3 = 0;
for(int z=MAX_DATA;z>0;--z) {
int val2 = MAX_DATA-z;
int val3 = MAX_DATA*2-z;
string key1 = to_string(z);
string key2 = to_string(val2);
string key3 = to_rhex(val3);
add_or_inc(m,key1,z,val2,dup1);
add_or_inc(m,key2,val2,val3,dup2);
add_or_inc(m,key3,val3,z,dup3);
add_or_inc(m2,key1,z,val2,vup1);
add_or_inc(m2,key2,val2,val3,vup2);
add_or_inc(m2,key3,val3,z,vup3);
}
cout << dup1 << ' ' << dup2 << ' ' << dup3 << endl;
cout << vup1 << ' ' << vup2 << ' ' << vup3 << endl;
int total = 0, verify = 0, count = 0;
int xotal = 0, xerify = 0, xount = 0;
union { int i; int x; } b;
size_t from = 0, p = 0;
char key[256] = {0};
for (b.i = m2.da.begin(from, p); b.i != MSD2::CI::CEDAR_NO_PATH; b.i = m2.da.next(from, p)) {
double it2 = m2.data[b.x]; // <-- find cedar's
xotal += get_first_digit(it2);
m2.da.suffix(key,p,from);
xerify += strlen(key);
xount += 1;
double it = m.find(key); // <-- find judy's
if(it != it2) { // if value doesn't match, print:
cout << "mismatch value for " << key << " : " << it2 << " vs " << it << endl;
}
}
for(auto &it = m.begin();m.success(); m.next()) {
total += get_first_digit(it.value);
verify += strlen((const char *) it.key);
count += 1;
}
cout << total << ' ' << verify << ' ' << count << endl;
cout << xotal << ' ' << xerify << ' ' << xount << endl;
}
compile with: clang++ -std=c++11 judy-findbug.cpp (or g++ -std=c++11)
the output would be:
6009354 6009348 611297
6009354 6009348 611297
36186112 159701681 23370000 <-- judy's
36186112 159701682 23370001 <-- cedar's
cedar has one more value than judy's (that is correct), but it didn't detected by the code above..
How to find that incorrect key(s)?

The bug on the code is someone (me) uncomment the assert(value != 0).
The bug was Karl's Judy implementation should not store null values (0 value).
Solution: use Doug Baskins' Judy implementation.

cublas cublasDgetrfBatched() batched LU factorization doesn't work with matrices bigger than 32x32

I wrote a cuda function for Matlab to perform a LU factorization of a batch of matrices using cublasDgetrfBatched(). The toolkit documentation of this function is here.
It works fine for matrices up to size 32x32. But it fails with status code CUBLAS_STATUS_INVALID_VALUE for bigger matrices. Below is my source code (gpuBatchedLU.cu):
#include "mex.h"
#include "gpu/mxGPUArray.h"
/* Includes, cuda */
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <string>
#include <sstream>
static std::string cublasGetErrorString(cublasStatus_t error) {
switch (error) {
case CUBLAS_STATUS_SUCCESS:
return "CUBLAS_STATUS_SUCCESS";
case CUBLAS_STATUS_NOT_INITIALIZED:
return "CUBLAS_STATUS_NOT_INITIALIZED";
case CUBLAS_STATUS_ALLOC_FAILED:
return "CUBLAS_STATUS_ALLOC_FAILED";
case CUBLAS_STATUS_INVALID_VALUE:
return "CUBLAS_STATUS_INVALID_VALUE";
case CUBLAS_STATUS_ARCH_MISMATCH:
return "CUBLAS_STATUS_ARCH_MISMATCH";
case CUBLAS_STATUS_MAPPING_ERROR:
return "CUBLAS_STATUS_MAPPING_ERROR";
case CUBLAS_STATUS_EXECUTION_FAILED:
return "CUBLAS_STATUS_EXECUTION_FAILED";
case CUBLAS_STATUS_INTERNAL_ERROR:
return "CUBLAS_STATUS_INTERNAL_ERROR";
}
return "<unknown>";
}
inline bool cublasAssert(cublasStatus_t code, const char* file, int line) {
if (code != CUBLAS_STATUS_SUCCESS) {
std::stringstream ss;
ss << "cublasAssert: " << cublasGetErrorString(code) << " in "
<< std::string(file) << ", line " << line << ".";
mexErrMsgTxt(ss.str().c_str());
}
return code == CUBLAS_STATUS_SUCCESS;
}
inline bool cudaAssert(cudaError_t code, const char* file, int line) {
if (code != cudaSuccess) {
std::stringstream ss;
ss << "cudaAssert: " << cudaGetErrorString(code) << " in "
<< std::string(file) << ", line " << line << ".";
mexErrMsgTxt(ss.str().c_str());
}
return code == cudaSuccess;
}
inline bool mexGPUAssert(int code, const char* file, int line) {
if (code != MX_GPU_SUCCESS) {
std::stringstream ss;
ss << "mexGPUAssert: could not initialize the Mathworks GPU API in "
<< std::string(file) << ", line " << line << ".";
mexErrMsgTxt(ss.str().c_str());
}
return code == MX_GPU_SUCCESS;
}
#define cublasErrchk(ans) { cublasAssert((ans), __FILE__, __LINE__); }
#define cudaErrchk(ans) { cudaAssert((ans), __FILE__, __LINE__); }
#define mxGPUErrchk(ans) { mexGPUAssert((ans), __FILE__, __LINE__); }
void mexFunction(int nlhs, mxArray *plhs[], /* Output variables */int nrhs,
const mxArray *prhs[]) /* Input variables */{
if (nrhs != 1) { /* end if not one function arguments */
mexErrMsgTxt("This function requires one input argument.");
return;
}
if (nlhs > 3) { /* take three outputs */
mexErrMsgTxt("This function takes a maximum of three output variables.");
return;
}
mxGPUErrchk(mxInitGPU());
const mxGPUArray* in1_gpu = mxGPUCreateFromMxArray(prhs[0]);
size_t ndims = mxGPUGetNumberOfDimensions(in1_gpu);
const size_t* dim = (const size_t*) mxGPUGetDimensions(in1_gpu);
if (ndims != 3) { /* end if input arguments are of different dimensions */
mexErrMsgTxt("The input argument must be a 3-dimensional array.");
return;
}
cublasHandle_t handle;
cublasErrchk(cublasCreate(&handle));
int no_matrices = dim[2];
int nrow = dim[0];
int ncol = dim[1];
int matrix_size = nrow * ncol;
size_t i;
std::stringstream ss;
ss << "dim[2] = " << dim[2] << "\nno_matrices = " << no_matrices << "\nnrow = " << nrow << "\nmatrix_size = " << nrow << " x " << ncol << " = " << matrix_size << std::endl;
mexPrintf(ss.str().c_str());
mxGPUArray* gpu_array_inout = mxGPUCopyFromMxArray(prhs[0]);
double* inout_storage = (double*) mxGPUGetData(gpu_array_inout);
size_t info_dimensions[1] = { no_matrices };
mxGPUArray* gpu_array_info = mxGPUCreateGPUArray(1, (mwSize*) info_dimensions, mxINT32_CLASS, mxREAL,
MX_GPU_INITIALIZE_VALUES);
int* out_info = (int*) mxGPUGetData(gpu_array_info);
mexPrintf("after defining gpu_array_info\n");
size_t pivot_dimensions[2] = { nrow, no_matrices };
mxGPUArray* gpu_array_pivot = mxGPUCreateGPUArray(2, (mwSize*) pivot_dimensions, mxINT32_CLASS, mxREAL,
MX_GPU_DO_NOT_INITIALIZE);
int* out_pivot = (int*) mxGPUGetData(gpu_array_pivot);
mexPrintf("after defining gpu_array_pivot\n");
double** inout_pointers_CPU = (double**) malloc(no_matrices * sizeof(double*));
for (i = 0; i < no_matrices; i++) {
inout_pointers_CPU[i] = (double*) ((char*) inout_storage + i * ((size_t) matrix_size) * sizeof(double));
}
double** inout_pointers_GPU;
cudaErrchk(cudaMalloc((void** )&inout_pointers_GPU, no_matrices * sizeof(double*)));
cudaErrchk(
cudaMemcpy(inout_pointers_GPU, inout_pointers_CPU, no_matrices * sizeof(double*), cudaMemcpyHostToDevice));
free(inout_pointers_CPU);
ss.clear();
ss << "check again before calling cublasDgetrfBatched:\nnrow = " << nrow << "\nno_matrices = " << no_matrices << std::endl;
mexPrintf(ss.str().c_str());
cublasErrchk(cublasDgetrfBatched(handle, nrow, inout_pointers_GPU, nrow, out_pivot, out_info, no_matrices));
cublasErrchk(cublasDestroy(handle));
cudaErrchk(cudaFree(inout_pointers_GPU));
if (mxIsGPUArray(prhs[0])) {
plhs[0] = mxGPUCreateMxArrayOnGPU(gpu_array_inout);
if (nlhs > 1) {
plhs[1] = mxGPUCreateMxArrayOnGPU(gpu_array_pivot);
if (nlhs > 2) {
plhs[2] = mxGPUCreateMxArrayOnGPU(gpu_array_info);
}
}
} else {
plhs[0] = mxGPUCreateMxArrayOnCPU(gpu_array_inout);
if (nlhs > 1) {
plhs[1] = mxGPUCreateMxArrayOnCPU(gpu_array_pivot);
if (nlhs > 2) {
plhs[2] = mxGPUCreateMxArrayOnCPU(gpu_array_info);
}
}
}
mxGPUDestroyGPUArray(gpu_array_inout);
mxGPUDestroyGPUArray(gpu_array_pivot);
mxGPUDestroyGPUArray(gpu_array_info);
mxFree((void*) dim);
return;
}
I compile as follows:
mex -L/usr/local/cuda/lib64 -lcudart -lcublas gpuBatchedLU.cu
And I call from MATLAB:
[a1,b1,c1]=gpuBatchedLU(randn(32,32,5)); %no problem
[a2,b2,c2]=gpuBatchedLU(randn(33,33,5)); %produces CUBLAS_STATUS_INVALID_VALUE
I use Matlab R2013b with the parallel toolbox, Cuda 5.5, and a NVS 5200M graphics chip.
Can anyone replicate this problem? I would appreciate any suggestions on how to solve this problem.

The problem seems to be with Matlab R2013b using libcublas.so in version 5.0. The file link is in /MATLAB/R2013b/bin/glnxa64/. Once I changed the link to the libcublas.so of my Cuda 5.5 installation it worked fine.

Why the audio recorder code can work for 8 bit, but cannot work for 16 bit??

I am trying to record the audio at windows, here is my code. it works well for 8 bit, but it cannot work for 16 bit. Can anyone help me?
#include
#include
#include
#pragma comment(lib,"winmm.lib")
using namespace std;
int test(){
HWAVEIN microHandle;
WAVEHDR waveHeader;
MMRESULT result = 0;
WAVEFORMATEX waveformat;
waveformat.wFormatTag = WAVE_FORMAT_PCM;
waveformat.wBitsPerSample=8;
waveformat.nSamplesPerSec=16000;//8000;
waveformat.nAvgBytesPerSec=waveformat.nSamplesPerSec*waveformat.nSamplesPerSec/8;
waveformat.nChannels=1;
waveformat.nBlockAlign=waveformat.nChannels*waveformat.wBitsPerSample/8;
waveformat.cbSize=0;
result = waveInOpen(&microHandle, WAVE_MAPPER, &waveformat, 0L, 0L, CALLBACK_EVENT);
if (result)
{
cout << "Fail step 1" << endl;
cout << result << endl;
Sleep(10000);
return 0;
}
const int BUFSIZE = 16000*4;
char * buf = (char *)malloc(BUFSIZE);
// Set up and prepare header for input
waveHeader.lpData = (LPSTR)buf;
waveHeader.dwBufferLength = BUFSIZE;
waveHeader.dwBytesRecorded=0;
waveHeader.dwUser = 0L;
waveHeader.dwFlags = 0L;
waveHeader.dwLoops = 0L;
waveInPrepareHeader(microHandle, &waveHeader, sizeof(WAVEHDR));
// Insert a wave input buffer
result = waveInAddBuffer(microHandle, &waveHeader, sizeof(WAVEHDR));
if (result)
{
cout << "Fail step 2" << endl;
cout << result << endl;
Sleep(10000);
return 0;
}
result = waveInStart(microHandle);
if (result)
{
cout << "Fail step 3" << endl;
cout << result << endl;
Sleep(10000);
return 0;
}
// Wait until finished recording
do {} while (waveInUnprepareHeader(microHandle, &waveHeader, sizeof(WAVEHDR))==WAVERR_STILLPLAYING);
FILE *fp = fopen("output.pcm","w");
fwrite(buf,1,BUFSIZE,fp);
fclose(fp);
waveInClose(microHandle);
return 0;
}
void main()
{
test();
}
If I set the parameter waveformat.wBitsPerSample = 8, it can record the audio correctly,
but if i set it waveformat.wBitsPerSample = 16, it record the Noise!!!
Can anyone help me?
thanks.

it should bu FILE *fp = fopen("output.pcm","wb"); NOT FILE *fp = fopen("output.pcm","w");

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Pthread program always run slower than normal - parallel-processing

Related

CUDA which is faster? Memory coalescing vs caching?

How to iterate a string using while loop in C++?

Finding incorrect implementation of JudyArray

cublas cublasDgetrfBatched() batched LU factorization doesn't work with matrices bigger than 32x32

Why the audio recorder code can work for 8 bit, but cannot work for 16 bit??

Categories

Resources