c++ - Segmentation fault on Cuda -


i'm writing cuda program processing images. got segmentation fault problem , i’ve no no idea why. i'm miss minor thing, after hours of trying correct myself couldn’t make running.
i'm setting correct grid,block , shred memory values( @ least think so), according devicequery on hardware(geforce310m total shmem:16384,max threads per block:512 , max block dim 521). here output before segmentation fault:

cols , rows: 256 384 total:98304 // rows*cols block size:512 grid size:192 shared mem:2048 

below kernel code

__global__ void reduce_min(float *minvar,float* d_logluminance) {     extern __shared__ float s_data[];   //shared memeory     unsigned int tid = threadidx.x;     unsigned int global_id = blockidx.x*blockdim.x + tid;      //copy shared mem     s_data[tid] = d_logluminance[global_id];     __syncthreads();      for(unsigned int = 1;i<blockdim.x;i*=2)     {         if(tid%(2*i) == 0)         {             s_data[tid] = min(s_data[tid],s_data[tid+i]);            }         __syncthreads();     }     if(tid == 0)         d_logluminance[blockidx.x] = s_data[0]; } 

and part allocate memory , invoke kernel

size_t size = 512;     size_t sizeofgrid = (numrows*numcols)/size;     const dim3 blocksize(size,1,1);      const dim3 gridsize(sizeofgrid,1,1);     unsigned int sharedmem =(sizeof(float)*size); //shared memory per block      printf("%s\n%d\n%d\n%s%d\n","cols , rows:",numcols,numrows,"total:",numrows*numcols);     printf("%s%d\n%s%d\n%s%d\n","block size:",size,"grid size:",sizeofgrid,"shared mem:",sharedmem);      float* d_lum;     float* outdata;     checkcudaerrors(cudamalloc(&d_lum,sizeof(float)*numrows*numcols));         checkcudaerrors(cudamemcpy(d_lum,d_logluminance,sizeof(float)*numrows*numcols,cudamemcpydevicetodevice));        checkcudaerrors(cudamalloc(&outdata,sizeof(float)*numrows*numcols));      reduce_min<<<gridsize,blocksize,sharedmem>>>(outdata,d_lum);     cudadevicesynchronize();     checkcudaerrors(cudagetlasterror());             checkcudaerrors(cudamemcpy(&min_loglum,outdata,sizeof(float)*numrows*numcols,cudamemcpydevicetohost)); 

thanks in advance help.

checkcudaerrors(cudamemcpy(&min_loglum,outdata,sizeof(float)*numrows*numcols,cudamemcpydevicetohost)); function generating segmentation fault, beacause fogrot mention(and missed when writing code) min_loglum float, not float* of size numrows*numcols. in end trying copy more memory had allocated.


Comments

Popular posts from this blog

c++ - Creating new partition disk winapi -

Android Prevent Bluetooth Pairing Dialog -

VBA function to include CDATA -