Changeset 61


Ignore:
Timestamp:
04/27/10 06:27:17 (3 years ago)
Author:
faltet
Message:

Temporary buffers for compression/decompression only reset when needed.

This optimization makes the decompression quite a bit faster, specially
for moderately large number of threads (4) and buffer sizes (1 MB).

Location:
branches/threaded/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/threaded/src/bench.c

    r60 r61  
    144144  unsigned int elsize = 8;        /* Datatype size */ 
    145145  int rshift = 12;                /* For random data */ 
    146   int nthreads = 4;               /* The number of threads */ 
     146  int nthreads = 2;               /* The number of threads */ 
    147147  int doshuffle = 1;              /* Shuffle? */ 
    148148  unsigned char *orig, *round; 
  • branches/threaded/src/blosc.c

    r60 r61  
    4444int init_threads_done = 0;      /* pool of threads initialized? */ 
    4545int end_threads = 0;            /* should exisiting threads end? */ 
    46 int init_mem = 1;               /* shoudl memory be initialized? */ 
     46int init_temps_done = 0;        /* temporaries in threads initialized? */ 
    4747int giveup;                     /* should (de-)compression give up? */ 
    4848int nblock = -1;                /* block counter */ 
    4949pthread_t threads[MAX_THREADS]; /* opaque structure for threads */ 
    50 int tids[MAX_THREADS];          /* ID per each threads */ 
     50int tids[MAX_THREADS];          /* ID per each thread */ 
    5151pthread_attr_t ct_attr;         /* creation time attributes for threads */ 
    5252 
     
    7676 
    7777 
    78 /* Convenience functions for creating and releasing temporaries */ 
    79 void 
    80 create_temporaries(void) 
    81 { 
    82   int tid; 
    83   size_t blocksize = params.blocksize; 
    84   /* Extended blocksize for temporary destination.  Extended blocksize 
    85    is only useful for compression in parallel mode, but it doesn't 
    86    hurt other modes either. */ 
    87   size_t ebsize = blocksize + params.typesize*sizeof(int); 
    88   unsigned char *tmp, *tmp2; 
    89  
    90   /* Create temporary area for each thread */ 
    91   for (tid = 0; tid < nthreads; tid++) { 
    92 #ifdef _WIN32 
    93     tmp = (unsigned char *)_aligned_malloc(blocksize, 16); 
    94     tmp2 = (unsigned char *)_aligned_malloc(ebsize, 16); 
    95 #elif defined __APPLE__ 
    96     /* Mac OS X guarantees 16-byte alignment in small allocs */ 
    97     tmp = (unsigned char *)malloc(blocksize); 
    98     tmp2 = (unsigned char *)malloc(ebsize); 
    99 #else 
    100     posix_memalign((void **)&tmp, 16, blocksize); 
    101     posix_memalign((void **)&tmp2, 16, ebsize); 
    102 #endif  /* _WIN32 */ 
    103     params.tmp[tid] = tmp; 
    104     params.tmp2[tid] = tmp2; 
    105   } 
    106 } 
    107  
    108  
    109 void 
    110 release_temporaries(void) 
    111 { 
    112   int tid; 
    113   unsigned char *tmp, *tmp2; 
    114  
    115   /* Release buffers */ 
    116   for (tid = 0; tid < nthreads; tid++) { 
    117     tmp = params.tmp[tid]; 
    118     tmp2 = params.tmp2[tid]; 
    119 #ifdef _WIN32 
    120     _aligned_free(tmp); 
    121     _aligned_free(tmp2); 
    122 #else 
    123     free(tmp); 
    124     free(tmp2); 
    125 #endif  /* _WIN32 */ 
    126   } 
    127 } 
     78/* Structure for parameters meant for keeping track of current temporaries */ 
     79struct temp_data { 
     80  int nthreads; 
     81  size_t typesize; 
     82  size_t blocksize; 
     83} current_temp; 
     84 
    12885 
    12986 
     
    447404 
    448405 
     406/* Convenience functions for creating and releasing temporaries */ 
     407void 
     408create_temporaries(void) 
     409{ 
     410  int tid; 
     411  size_t typesize = params.typesize; 
     412  size_t blocksize = params.blocksize; 
     413  /* Extended blocksize for temporary destination.  Extended blocksize 
     414   is only useful for compression in parallel mode, but it doesn't 
     415   hurt other modes either. */ 
     416  size_t ebsize = blocksize + typesize*sizeof(int); 
     417  unsigned char *tmp, *tmp2; 
     418 
     419  /* Create temporary area for each thread */ 
     420  for (tid = 0; tid < nthreads; tid++) { 
     421#ifdef _WIN32 
     422    tmp = (unsigned char *)_aligned_malloc(blocksize, 16); 
     423    tmp2 = (unsigned char *)_aligned_malloc(ebsize, 16); 
     424#elif defined __APPLE__ 
     425    /* Mac OS X guarantees 16-byte alignment in small allocs */ 
     426    tmp = (unsigned char *)malloc(blocksize); 
     427    tmp2 = (unsigned char *)malloc(ebsize); 
     428#else 
     429    posix_memalign((void **)&tmp, 16, blocksize); 
     430    posix_memalign((void **)&tmp2, 16, ebsize); 
     431#endif  /* _WIN32 */ 
     432    params.tmp[tid] = tmp; 
     433    params.tmp2[tid] = tmp2; 
     434  } 
     435 
     436  init_temps_done = 1; 
     437  /* Update params for current temporaries */ 
     438  current_temp.nthreads = nthreads; 
     439  current_temp.typesize = typesize; 
     440  current_temp.blocksize = blocksize; 
     441 
     442} 
     443 
     444 
     445void 
     446release_temporaries(void) 
     447{ 
     448  int tid; 
     449  unsigned char *tmp, *tmp2; 
     450 
     451  /* Release buffers */ 
     452  for (tid = 0; tid < nthreads; tid++) { 
     453    tmp = params.tmp[tid]; 
     454    tmp2 = params.tmp2[tid]; 
     455#ifdef _WIN32 
     456    _aligned_free(tmp); 
     457    _aligned_free(tmp2); 
     458#else 
     459    free(tmp); 
     460    free(tmp2); 
     461#endif  /* _WIN32 */ 
     462  } 
     463 
     464  init_temps_done = 0; 
     465 
     466} 
     467 
     468 
    449469/* Do the compression or decompression of the buffer depending on the 
    450470   global params. */ 
     
    453473  int ntbytes; 
    454474 
    455   create_temporaries(); 
     475  /* Initialize/reset temporaries if needed */ 
     476  if (!init_temps_done) { 
     477    create_temporaries(); 
     478  } 
     479  else if (current_temp.nthreads != nthreads || 
     480           current_temp.typesize != params.typesize || 
     481           current_temp.blocksize != params.blocksize) { 
     482    release_temporaries(); 
     483    create_temporaries(); 
     484  } 
    456485 
    457486  if (nthreads == 1) { 
     
    462491  } 
    463492 
    464   release_temporaries(); 
    465493 
    466494  return ntbytes; 
     
    784812      init_threads_done = 0; 
    785813      end_threads = 0; 
    786       printf("Pool de %d threads acabat!\n", nthreads); 
    787814    } 
    788815    nthreads = nthreads_new; 
    789     init_mem = 1; 
    790816    if (nthreads > 1) { 
    791817      /* Launch a new pool of threads */ 
    792818      init_threads(); 
    793       printf("Pool de %d threads començat!\n", nthreads); 
    794819    } 
    795820    return nthreads_old; 
Note: See TracChangeset for help on using the changeset viewer.