source: trunk/src/blosc.c @ 52

Revision 52, 12.8 KB checked in by faltet, 3 years ago (diff)

Forgot to update the _blosc_d failure condition.

Line 
1/*********************************************************************
2  Blosc - Blocked Suffling and Compression Library
3
4  Author: Francesc Alted (faltet@pytables.org)
5  Creation date: 2009-05-20
6
7  See LICENSES/BLOSC.txt for details about copyright and rights to use.
8**********************************************************************/
9
10
11#include <stdlib.h>
12#include <stdio.h>
13#include <string.h>
14#include <sys/types.h>
15#include <sys/stat.h>
16#include "blosc.h"
17#include "blosclz.h"
18#include "shuffle.h"
19#ifdef _WIN32
20  #include <windows.h>
21#else
22  #include <stdint.h>
23  #include <unistd.h>
24#endif  /* _WIN32 */
25
26
27/* Starting point for the blocksize computation */
28#define BLOCKSIZE (4*1024)      /* 4 KB (page size) */
29
30/* Maximum typesize before considering buffer as a stream of bytes. */
31#define MAXTYPESIZE 256         /* Cannot be larger than 256 */
32
33/* The maximum number of splits in a block for compression */
34#define MAXSPLITS 16         /* Cannot be larger than 128 */
35
36
37/* Global variables for compressing/shuffling actions */
38int clevel;                     /* Compression level */
39int do_shuffle;                 /* Shuffle is active? */
40
41
42
43/* Shuffle & Compress a single block */
44static size_t
45_blosc_c(int clevel, int doshuffle, size_t typesize, size_t blocksize,
46         int leftoverblock, unsigned int ctbytes, unsigned int nbytes,
47         unsigned char* _src, unsigned char* _dest, unsigned char *tmp) {
48  size_t j, neblock, nsplits;
49  int cbytes, maxout;
50  unsigned int btbytes = 0;
51  unsigned char* _tmp;
52
53  if (doshuffle && (typesize > 1)) {
54    /* Shuffle this block (this makes sense only if typesize > 1) */
55    shuffle(typesize, blocksize, _src, tmp);
56    _tmp = tmp;
57  }
58  else {
59    _tmp = _src;
60  }
61
62  /* Compress for each shuffled slice split for this block. */
63  /* If the number of bytes is too large, or we are in a leftover
64     block, do not split all. */
65  if ((typesize <= MAXSPLITS) && (!leftoverblock)) {
66    nsplits = typesize;
67  }
68  else {
69    nsplits = 1;
70  }
71  neblock = blocksize / nsplits;
72  for (j = 0; j < nsplits; j++) {
73    _dest += sizeof(int);
74    btbytes += sizeof(int);
75    ctbytes += sizeof(int);
76    maxout = neblock;
77    if (ctbytes+maxout > nbytes) {
78      maxout = nbytes - ctbytes;   /* avoid buffer overrun */
79      if (maxout <= 0) {
80        return 0;                  /* non-compressible block */
81      }
82    }
83    cbytes = blosclz_compress(clevel, _tmp+j*neblock, neblock,
84                              _dest, maxout);
85    if (cbytes > maxout) {
86      /* Buffer overrun caused by blosclz_compress (should never happen) */
87      return -1;
88    }
89    else if (cbytes < 0) {
90      /* cbytes should never be negative */
91      return -2;
92    }
93    else if ((cbytes == 0) || (cbytes == (int) neblock)) {
94      /* The compressor has been unable to compress data
95         significantly.  Also, it may happen that the compressed
96         buffer has exactly the same length than the buffer size, but
97         this means uncompressible data.  Before doing the copy, check
98         that we are not running into a buffer overflow. */
99      if ((ctbytes+neblock) > nbytes) {
100        return 0;    /* Non-compressible data */
101      }
102      memcpy(_dest, _tmp+j*neblock, neblock);
103      cbytes = neblock;
104    }
105    ((unsigned int *)(_dest))[-1] = cbytes;
106    _dest += cbytes;
107    btbytes += cbytes;
108    ctbytes += cbytes;
109  }  /* Closes j < nsplits */
110
111  return btbytes;
112}
113
114
115unsigned int
116blosc_compress(int clevel, int doshuffle, size_t typesize, size_t nbytes,
117               const void *src, void *dest)
118{
119  unsigned char *_src=NULL;        /* alias for source buffer */
120  unsigned char *_dest=NULL;       /* alias for destination buffer */
121  unsigned char *flags;            /* flags for header */
122  unsigned int *starts;            /* start pointers for each block */
123  size_t nblocks;                  /* number of complete blocks in buffer */
124  size_t tblocks;                  /* number of total blocks in buffer */
125  size_t leftover;                 /* extra bytes at end of buffer */
126  size_t blocksize;                /* length of the block in bytes */
127  size_t bsize;                    /* corrected blocksize for last block */
128  unsigned int ctbytes = 0;        /* the number of bytes in output buffer */
129  unsigned int *ctbytes_;          /* the number of bytes in output buffer */
130  unsigned char *tmp;              /* temporary buffer for data block */
131  int cbytes;                      /* temporary compressed buffer length */
132  int leftoverblock;               /* left over block? */
133  unsigned int i, j;               /* local index variables */
134  const char *too_long_message = "The impossible happened: buffer overflow!\n";
135
136  /* Compression level */
137  if (clevel < 0 || clevel > 9) {
138    /* If clevel not in 0..9, print an error */
139    fprintf(stderr, "`clevel` parameter must be between 0 and 9!\n");
140    return -10;
141  }
142  else if (clevel == 0) {
143    /* No compression wanted.  Just return without doing anything else. */
144    return 0;
145  }
146
147  /* Shuffle */
148  if (doshuffle != 0 && doshuffle != 1) {
149    /* If shuffle not in 0,1, print an error */
150    fprintf(stderr, "`doshuffle` parameter must be either 0 or 1!\n");
151    return -10;
152  }
153
154  /* Compute a blocksize depending on the optimization level */
155  blocksize = BLOCKSIZE;
156  /* 3 first optimization levels will not change blocksize */
157  for (i=4; i<=(unsigned int)clevel; i++) {
158    /* Escape if blocksize grows more than nbytes */
159    if (blocksize*2 > nbytes) break;
160    blocksize *= 2;
161  }
162
163  /* blocksize must be a multiple of the typesize */
164  blocksize = blocksize / typesize * typesize;
165
166  /* Create temporary area */
167#ifdef _WIN32
168  tmp = (unsigned char *)_aligned_malloc(blocksize, 16);
169#elif defined __APPLE__
170  /* Mac OS X guarantees 16-byte alignment in small allocs */
171  tmp = (unsigned char *)(malloc(blocksize));
172#else
173  posix_memalign((void **)&tmp, 16, blocksize);
174#endif  /* _WIN32 */
175
176  nblocks = nbytes / blocksize;
177  leftover = nbytes % blocksize;
178  _src = (unsigned char *)(src);
179  _dest = (unsigned char *)(dest);
180  tblocks = (leftover>0)? nblocks+1: nblocks;
181
182  /* Check typesize limits */
183  if (typesize == MAXTYPESIZE) {
184    typesize = 0;               /* zero means MAXTYPESIZE */
185  }
186  else if (typesize > MAXTYPESIZE) {
187    /* If typesize is too large, treat buffer as an 1-byte stream. */
188    typesize = 1;
189  }
190
191  /* Write header for this block */
192  _dest[0] = BLOSC_VERSION_FORMAT;         /* blosc format version */
193  _dest[1] = BLOSCLZ_VERSION_FORMAT;       /* blosclz format version */
194  flags = _dest+2;                         /* flags */
195  _dest[2] = 0;                            /* zeroes flags */
196  _dest[3] = (unsigned char)typesize;      /* type size */
197  _dest += 4;
198  ctbytes += 4;
199  ((unsigned int *)_dest)[0] = nbytes;     /* size of the chunk */
200  ((unsigned int *)_dest)[1] = blocksize;  /* block size */
201  ctbytes_ = (unsigned int *)(_dest+8);    /* compressed chunk size (pointer) */
202  _dest += sizeof(int)*3;
203  ctbytes += sizeof(int)*3;
204  starts = (unsigned int *)_dest;          /* starts for every block */
205  _dest += sizeof(int)*tblocks;            /* book space for pointers to */
206  ctbytes += sizeof(int)*tblocks;          /* every block in output */
207
208  if (doshuffle == 1) {
209    /* Shuffle is active */
210    *flags |= 0x1;                         /* bit 0 set to one in flags */
211  }
212
213  for (j = 0; j < tblocks; j++) {
214    starts[j] = ctbytes;
215    bsize = blocksize;
216    leftoverblock = 0;
217    if ((j == tblocks - 1) && (leftover > 0)) {
218      bsize = leftover;
219      leftoverblock = 1;
220    }
221    cbytes = _blosc_c(clevel, doshuffle, typesize, bsize, leftoverblock,
222                      ctbytes, nbytes, _src, _dest, tmp);
223    if (cbytes < 0) {
224      fprintf(stderr, too_long_message);
225      ctbytes = cbytes;         /* error in _blosc_c */
226      goto out;
227    }
228    if (cbytes == 0) {
229      ctbytes = 0;              /* uncompressible data */
230      goto out;
231    }
232    _dest += cbytes;
233    _src += blocksize;
234    ctbytes += cbytes;
235  }  /* Close j < tblocks */
236
237  if (ctbytes == nbytes) {
238    ctbytes = 0;               /* non-compressible data */
239    goto out;
240  }
241  else if (ctbytes > nbytes) {
242    fprintf(stderr, too_long_message);
243    ctbytes = -5;               /* too large buffer */
244    goto out;
245  }
246
247 out:
248#ifdef _WIN32
249  _aligned_free(tmp);
250#else
251  free(tmp);
252#endif  /* _WIN32 */
253
254  *ctbytes_ = ctbytes;   /* set the number of compressed bytes in header */
255  return ctbytes;
256
257}
258
259
260/* Decompress & unshuffle a single block */
261static size_t
262_blosc_d(int dounshuffle, size_t typesize, size_t blocksize, int leftoverblock,
263         unsigned char* _src, unsigned char* _dest,
264         unsigned char *tmp, unsigned char *tmp2)
265{
266  size_t j, neblock, nsplits;
267  size_t nbytes, cbytes, ctbytes = 0, ntbytes = 0;
268  unsigned char* _tmp;
269
270  if (dounshuffle && (typesize > 1)) {
271    _tmp = tmp;
272  }
273  else {
274    _tmp = _dest;
275  }
276
277  /* Compress for each shuffled slice split for this block. */
278  /* If the number of bytes is too large, do not split all. */
279  if ((typesize <= MAXSPLITS) && (!leftoverblock)) {
280    nsplits = typesize;
281  }
282  else {
283    nsplits = 1;
284  }
285  neblock = blocksize / nsplits;
286  for (j = 0; j < nsplits; j++) {
287    cbytes = ((unsigned int *)(_src))[0]; /* amount of compressed bytes */
288    _src += sizeof(int);
289    ctbytes += sizeof(int);
290    /* Uncompress */
291    if (cbytes == neblock) {
292      memcpy(_tmp, _src, neblock);
293      nbytes = neblock;
294    }
295    else {
296      nbytes = blosclz_decompress(_src, cbytes, _tmp, neblock);
297      if (nbytes != neblock) {
298        return -2;
299      }
300    }
301    _src += cbytes;
302    ctbytes += cbytes;
303    _tmp += neblock;
304    ntbytes += nbytes;
305  } /* Closes j < nsplits */
306
307  if (dounshuffle && (typesize > 1)) {
308    if ((uintptr_t)_dest % 16 == 0) {
309      /* 16-bytes aligned _dest.  SSE2 unshuffle will work. */
310      unshuffle(typesize, blocksize, tmp, _dest);
311    }
312    else {
313      /* _dest is not aligned.  Use tmp2, which is aligned, and copy. */
314      unshuffle(typesize, blocksize, tmp, tmp2);
315      memcpy(_dest, tmp2, blocksize);
316    }
317  }
318
319  return ctbytes;
320}
321
322
323unsigned int
324blosc_decompress(const void *src, void *dest, size_t dest_size)
325{
326  unsigned char *_src=NULL;          /* alias for source buffer */
327  unsigned char *_dest=NULL;         /* alias for destination buffer */
328  unsigned char version, versionlz;  /* versions for compressed header */
329  unsigned char flags;               /* flags for header */
330  size_t leftover;                   /* extra bytes at end of buffer */
331  size_t nblocks;                    /* number of complete blocks in buffer */
332  size_t tblocks;                    /* number of total blocks in buffer */
333  size_t j;
334  size_t nbytes, ntbytes = 0;
335  int cbytes;
336  unsigned char *tmp, *tmp2;
337  int dounshuffle = 0;
338  unsigned int typesize, blocksize, bsize, ctbytes_;
339  int leftoverblock;               /* left over block? */
340
341  _src = (unsigned char *)(src);
342  _dest = (unsigned char *)(dest);
343
344  /* Read the header block */
345  version = _src[0];                        /* blosc format version */
346  versionlz = _src[1];                      /* blosclz format version */
347  flags = _src[2];                          /* flags */
348  typesize = (unsigned int)_src[3];         /* typesize */
349  _src += 4;
350  nbytes = ((unsigned int *)_src)[0];       /* chunk size */
351  blocksize = ((unsigned int *)_src)[1];    /* block size */
352  ctbytes_ = ((unsigned int *)_src)[2];     /* compressed chunk size */
353  _src += sizeof(int)*3;
354  /* Compute some params */
355  nblocks = nbytes / blocksize;
356  leftover = nbytes % blocksize;
357  tblocks = (leftover>0)? nblocks+1: nblocks;
358  _src += sizeof(int)*tblocks;              /* skip starts of blocks */
359
360  /* Check zero typesizes */
361  if (typesize == 0) {
362    typesize = MAXTYPESIZE;
363  }
364
365  if (nbytes > dest_size) {
366    /* This should never happen, but just in case. */
367    return -1;
368  }
369
370  if ((flags & 0x1) == 1) {
371    /* Input is shuffled.  Unshuffle it. */
372    dounshuffle = 1;
373  }
374
375  /* Create temporary area */
376#ifdef _WIN32
377  tmp = (unsigned char*)_aligned_malloc(blocksize, 16);
378  tmp2 = (unsigned char*)_aligned_malloc(blocksize, 16);
379#elif defined __APPLE__
380  /* Mac OS X guarantees 16-byte alignment in small allocs */
381  tmp = (unsigned char *)(malloc(blocksize));
382  tmp2 = (unsigned char *)(malloc(blocksize));
383#else
384  posix_memalign((void **)&tmp, 16, blocksize);
385  posix_memalign((void **)&tmp2, 16, blocksize);
386#endif  /* _WIN32 */
387
388  for (j = 0; j < tblocks; j++) {
389    bsize = blocksize;
390    leftoverblock = 0;
391    if ((j == tblocks - 1) && (leftover > 0)) {
392      bsize = leftover;
393      leftoverblock = 1;
394    }
395    cbytes = _blosc_d(dounshuffle, typesize, bsize, leftoverblock,
396                      _src, _dest, tmp, tmp2);
397    if (cbytes < 0) {
398      nbytes = cbytes;          /* _blosc_d failure */
399      goto out;
400    }
401    _src += cbytes;
402    _dest += blocksize;
403    ntbytes += blocksize;
404  }
405
406 out:
407#ifdef _WIN32
408  _aligned_free(tmp);
409  _aligned_free(tmp2);
410#else
411  free(tmp);
412  free(tmp2);
413#endif  /* _WIN32 */
414  return nbytes;
415}
416
417
Note: See TracBrowser for help on using the repository browser.