Rows represents the number of elements which were sorted and time is in milliseconds:
I have set thread using export OMP_NUM_THREADS=n
There is a constant increasing in execution time irrespective of the number of elements I am taking. Where am I going wrong?
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "omp.h"
/*
OpenMP implementation example
Details of implementation/tutorial can be found here:
http://madhugnadig.com/articles/parallel-
processing/2017/02/25/parallel-computing-in-c-using-openMP.html
*/
void mergeSort( int a[], int i, int j);
void merge( int a[], int i1, int j1, int i2, int j2);
int main()
{ clock_t t;
t = clock();
int *a, num, i;
scanf("%d",&num);
a = ( int *)malloc(sizeof(int) * num);
for(i=0;i<num;i++)
scanf("%d",&a[i]);
mergeSort(a, 0, num-1);
printf("\nsorted array :\n");
for(i=0;i<num;i++)
printf("%d ",a[i]);
t = clock() - t;
double time_taken = ((double)t)/CLOCKS_PER_SEC; // in seconds
printf("\n\n\nYour sorting took %f seconds to execute \n", time_taken);
return 0;
}
void mergeSort( int a[], int i, int j)
{
int mid;
if(i<j)
{
mid=(i+j)/2;
#pragma omp parallel sections
{
#pragma omp section
{
mergeSort(a,i,mid); //left recursion
}
#pragma omp section
{
mergeSort(a,mid+1,j); //right recursion
}
}
merge(a,i,mid,mid+1,j); //merging of two sorted sub-arrays
}
}
void merge( int a[], int i1, int j1, int i2, int j2)
{
int temp[1001000]; //array used for merging
int i,j,k;
i=i1; //beginning of the first list
j=i2; //beginning of the second list
k=0;
while(i<=j1 && j<=j2) //while elements in both lists
{
if(a[i]<a[j])
temp[k++]=a[i++];
else
temp[k++]=a[j++];
}
while(i<=j1) //copy remaining elements of the first list
temp[k++]=a[i++];
while(j<=j2) //copy remaining elements of the second list
temp[k++]=a[j++];
//Transfer elements from temp[] back to a[]
for(i=i1,j=0;i<=j2;i++,j++)
a[i]=temp[j];
}
This is how I have run the code on my macbook:
#include <cstdint>
#include <cstring>
template<typename T>
T oph_(const char *s){
constexpr std::size_t MAX = sizeof(T);
const std::size_t size = strnlen(s, MAX);
T r = 0;
for(auto it = s; it - s < size; ++it)
r = r << 8 | *it;
return r;
}
inline uint64_t oph(const char *s){
return oph_<uint64_t>(s);
}
int main(){
uint64_t const a = oph("New York City");
uint64_t const b = oph("Boston International");
return a > b;
}
I want to convert first 8 characters from const char * to uint64_t so I can easily compare if two strings are greater / lesser.
I am aware that equals will semi-work.
However I am not sure if this is most efficient implementation.
I want the implementation to work on both little and big endian machines.
This is a C implementation, that should be faster that your implementation, but I still need to use strncpy which should be the bottleneck
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <byteswap.h>
union small_str {
uint64_t v;
char buf[8];
};
static uint64_t fill_small_str(const char *str)
{
union small_str ss = { 0 };
strncpy(ss.buf, str, 8);
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
return ss.v;
#else
return bswap_64(ss.v);
#endif
}
int main(void)
{
uint64_t const a = fill_small_str("Aew York City");
uint64_t const b = fill_small_str("Boston International");
printf("%lu ; %lu ; %d\n", a, b, (a < b));
return 0;
}
Below is the link to problem
https://www.hackerrank.com/challenges/and-xor-or/copy-from/16519519
I have implemented a O(n) solution which seems ok (no TLE and 29 testcaes passed out of 32). But my solution is failing for some testcaes and I am not able to find the error, surely I am missing some corner cases. Any hint would be great help. I have posted my code below which I have ran and submitted.
#include <cmath>
#include <cstdio>
#include <vector>
#include <iostream>
#include <stack>
#define ul unsigned long
using namespace std;
ul comp(ul a, ul b){
ul result = ((a&b)^(a|b))&(a^b);
return result;
}
int main() {
/* Enter your code here. Read input from STDIN. Print output to STDOUT */
int n;
cin>>n;
vector<ul>ip(n);
for(int i=0;i<n;i++)
cin>>ip[i];
stack<ul>st;
ul result=0;
for(int i=0;i<n;i++){
if(st.empty())
st.push(ip[i]);
else{
result=max(result,comp(st.top(),ip[i]));
while(!st.empty() && st.top()>ip[i]){
result=max(result,comp(st.top(),ip[i]));
st.pop();
}
st.push(ip[i]);
}
}
cout<<result;
return 0;
}
I'm trying to write a C program that changes colors of picture and than save it. And it also do this for more than one colors separately. For example, i have picture that has red, blue, purple, and green colors. Program should change all colors to black except red and save picture. Than in original picture, it should change colors to black except blue and save another picture. And than do this for purple and green separately as well. Before saving file, picture would be shown in window.
Here is my code:
#include <iostream>
#include <iostream>
#include <iostream>
#include <vector>
#include<time.h>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
int main(int argc, char **argv)
{
//Files' name and windows' name
char *resim[3] = {"112.png", "116.png", "113.png"};
//b,g,r values; 1-Min, 2-Max, 3-Wil be
int imgB1[] ={150, 190, 0};
int imgB2[] ={220 ,255, 80};
int imgB3[] ={168,190, 66};
int imgG1[] ={100,180, 90};
int imgG2[] ={150,255, 150};
int imgG3[] ={119, 200, 136};
int imgR1[] ={150, 0, 200};
int imgR2[] ={235, 204, 250};
int imgR3[] ={103, 25, 250};
for (int m =0; m<3; m++){
cv::Mat img = cv::imread("62.png",0);
for(int i = 0; i < img.rows; ++i) {
for(int j = 0; j < img.cols; ++j) {
int b=int(img.at<cv::Vec3b>(i,j)[0]);
int g=int(img.at<cv::Vec3b>(i,j)[1]);
int r=int(img.at<cv::Vec3b>(i,j)[2]);
int degis=0;
if(b >= imgB1[m] && b < imgB2[m] && g >= imgG1[m] && g <imgG2[m] && r >= imgR1[m] && r < imgR2[m])
{
img.at<cv::Vec3b>(i,j)[0] = imgB3[m];
img.at<cv::Vec3b>(i,j)[1] = imgG3[m];
img.at<cv::Vec3b>(i,j)[2] = imgR3[m];
}
else{
img.at<cv::Vec3b>(i,j)[0] = 0;
img.at<cv::Vec3b>(i,j)[1] = 0;
img.at<cv::Vec3b>(i,j)[2] = 0;
}
}
}
cv::namedWindow(resim[m]);
imshow(resim[m],img);
cv::waitKey(0);
imwrite(resim[m], img);
}
return 0;
}
Program stops. There is only "program has stopped" message in black window. I couldn't find any solution.
#berak
I'm not familiar with opencv. I didn't know inrange.
I rewrite code with inrange. Now it works. I will rearrange it later but i share it if someone needs an example.
#include <iostream>
#include <iostream>
#include <iostream>
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/contrib/contrib.hpp"
using namespace std;
using namespace cv;
int main()
{
Mat src =imread("atlas40.png", CV_LOAD_IMAGE_COLOR);
Mat mask,mask2,mask3,mask4,mask5,mask6,mask7;
// Min values
int blueMin =190;
int greenMin=180;
int redMin =0;
// Max values
int blueMax =255;
int greenMax=255;
int redMax =204;
int blueMin2=0;
int greenMin2=128;
int redMin2=0;
int blueMax2=0;
int greenMax2=255;
int redMax2=191;
int blueMin3=0;
int greenMin3=8;
int redMin3=175;
int blueMax3=92;
int greenMax3=86;
int redMax3=205;
int blueMin4=0;
int greenMin4=90;
int redMin4=200;
int blueMax4=80;
int greenMax4=150;
int redMax4=255;
int blueMin5=190;
int greenMin5=90;
int redMin5=230;
int blueMax5=220;
int greenMax5=150;
int redMax5=255;
int blueMin6=50;
int greenMin6=0;
int redMin6=50;
int blueMax6=255;
int greenMax6=0;
int redMax6=255;
int blueMin7=0;
int greenMin7=200;
int redMin7=200;
int blueMax7=180;
int greenMax7=255;
int redMax7=255;
inRange(src, Scalar(blueMin, greenMin, redMin), Scalar(blueMax, greenMax, redMax), mask);
inRange(src, Scalar(blueMin2, greenMin2, redMin2), Scalar(blueMax2, greenMax2, redMax2), mask2);
inRange(src, Scalar(blueMin3, greenMin3, redMin3), Scalar(blueMax3, greenMax3, redMax3), mask3);
inRange(src, Scalar(blueMin4, greenMin4, redMin4), Scalar(blueMax4, greenMax4, redMax4), mask4);
inRange(src, Scalar(blueMin5, greenMin5, redMin5), Scalar(blueMax5, greenMax5, redMax5), mask5);
inRange(src, Scalar(blueMin6, greenMin6, redMin6), Scalar(blueMax6, greenMax6, redMax6), mask6);
inRange(src, Scalar(blueMin7, greenMin7, redMin7), Scalar(blueMax7, greenMax7, redMax7), mask7);
imshow("Color 1",mask);
imshow("Color 2",mask2);
imshow("Color 3",mask3);
imshow("Color 4",mask4);
imshow("Color 5",mask5);
imshow("Color 6",mask6);
imshow("Color 7",mask7);
int k = waitKey();
imwrite("401.png",mask);
imwrite("402.png",mask2);
imwrite("403.png",mask3);
imwrite("404.png",mask4);
imwrite("405.png",mask5);
imwrite("406.png",mask6);
imwrite("407.png",mask7);
return 0;
}
Thanks for your helps.
I have a CUDA C code, when I try to compile it, nvcc gives me an error with an undefined identifier error: identifier "cudamalloc" is undefined, identifier "cudamemcpy" is undefined.
I'm running Windows 7 with Visual Studio 10 and CUDA Toolkit 4.0
I have installed Cuda on drive "C" and Visual Studio on drive "E" but im not sure that it is the problem.
I use this command to compile:
nvcc -o ej1b ej1b.cu
and this is my program:
#include <cuda.h>
#include <cstdio>
#include <cuda_runtime_api.h>
#include <device_functions.h>
#include "device_launch_parameters.h"
#include <stdio.h>
#include <stdlib.h>
const int N = 512;
const int C = 5;
void init_CPU_array(int vec[],const int N){
unsigned int i;
for(i = 0; i < N; i++) {
vec[i] = i;
}
}
__global__ void kernel(int vec[],const int N, const int C){
int id = blockIdx.x * blockDim.x + threadIdx.x;
if(id<N)
vec[id] = vec[id] * C;
}
int main(){
int vec[N];
int vecRES[N];
int *vecGPU;
unsigned int cantaloc=N*sizeof(int);
init_CPU_array(vec,N);
cudamalloc((void**)&vecGPU,cantaloc);
cudamemcpy(vecGPU,vec,cantaloc,cudaMemcpyHostToDevice);
dim3 dimBlock(64);
dim3 dimGrid((N + dimBlock.x - 1) / dimBlock.x);
printf("-> Variable dimBlock.x = %d\n",dimBlock.x);
kernel<<<dimGrid, dimBlock>>>(vecGPU, N, C);
cudaThreadSynchronize();
cudamemcpy(vecRES,vecGPU,cantaloc,cudaMemcpyDeviceToHost);
cudaFree(vecGPU);
printf("%s \n","-> Resultados");
int i;
for(i=0;i<10;i++){
printf("%d ",vecRES[i]);
printf("%d \n",vec[i]);
}
return 0;
I used all those #include because I don't know where the problem is.
If you read the documentation, you will find the API calls are cudaMalloc and cudaMemcpy. C and C++ are case sensitive languages and you have the names incorrect.