1 /// D translation of stb_image-1.33 (http://nothings.org/stb_image.c)
2 ///
3 /// This port only supports:
4 /// $(UL
5 ///   $(LI PNG 8-bit-per-channel only.)
6 /// )
7 ///
8 //============================    Contributors    =========================
9 //
10 // Image formats                                Optimizations & bugfixes
11 // Sean Barrett (jpeg, png, bmp)                Fabian "ryg" Giesen
12 // Nicolas Schulz (hdr, psd)
13 // Jonathan Dummer (tga)                     Bug fixes & warning fixes
14 // Jean-Marc Lienher (gif)                      Marc LeBlanc
15 // Tom Seddon (pic)                             Christpher Lloyd
16 // Thatcher Ulrich (psd)                        Dave Moore
17 // Won Chun
18 // the Horde3D community
19 // Extensions, features                            Janez Zemva
20 // Jetro Lauha (stbi_info)                      Jonathan Blow
21 // James "moose2000" Brown (iPhone PNG)         Laurent Gomila
22 // Ben "Disch" Wenger (io callbacks)            Aruelien Pocheville
23 // Martin "SpartanJ" Golini                     Ryamond Barbiero
24 // David Woo
25 
26 /// PNG loader
27 module dplug.graphics.pngload;
28 
29 // This has been revived for the sake of loading PNG without too much memory usage.
30 // It turns out stb_image is more efficient than the loaders using std.zlib.
31 // https://github.com/lgvz/imageformats/issues/26
32 
33 import core.stdc.stdlib;
34 import core.stdc..string;
35 
36 enum STBI_VERSION = 1;
37 
38 nothrow:
39 @nogc:
40 
41 enum : int
42 {
43    STBI_default    = 0, // only used for req_comp
44    STBI_grey       = 1,
45    STBI_grey_alpha = 2,
46    STBI_rgb        = 3,
47    STBI_rgb_alpha  = 4
48 };
49 
50 // define faster low-level operations (typically SIMD support)
51 
52 // stbi structure is our basic context used by all images, so it
53 // contains all the IO context, plus some basic image information
54 struct stbi
55 {
56    uint img_x, img_y;
57    int img_n, img_out_n;
58 
59    int buflen;
60    ubyte[128] buffer_start;
61 
62    const(ubyte) *img_buffer;
63    const(ubyte) *img_buffer_end;
64    const(ubyte) *img_buffer_original;
65 }
66 
67 
68 // initialize a memory-decode context
69 void start_mem(stbi *s, const(ubyte)*buffer, int len)
70 {
71    s.img_buffer = buffer;
72    s.img_buffer_original = buffer;
73    s.img_buffer_end = buffer+len;
74 }
75 
76 /// Loads an image from memory.
77 /// Throws: STBImageException on error.
78 ubyte* stbi_load_png_from_memory(const(void)[] buffer, out int width, out int height, out int components, int requestedComponents)
79 {
80    stbi s;
81    start_mem(&s, cast(const(ubyte)*)buffer.ptr, cast(int)(buffer.length));
82    return stbi_png_load(&s, &width, &height, &components, requestedComponents);
83 }
84 
85 
86 //
87 // Common code used by all image loaders
88 //
89 
90 enum : int
91 {
92    SCAN_load=0,
93    SCAN_type,
94    SCAN_header
95 };
96 
97 
98 int get8(stbi *s)
99 {
100    if (s.img_buffer < s.img_buffer_end)
101       return *s.img_buffer++;
102 
103    return 0;
104 }
105 
106 int at_eof(stbi *s)
107 {
108    return s.img_buffer >= s.img_buffer_end;
109 }
110 
111 ubyte get8u(stbi *s)
112 {
113    return cast(ubyte) get8(s);
114 }
115 
116 void skip(stbi *s, int n)
117 {
118    s.img_buffer += n;
119 }
120 
121 int getn(stbi *s, ubyte *buffer, int n)
122 {
123    if (s.img_buffer+n <= s.img_buffer_end) {
124       memcpy(buffer, s.img_buffer, n);
125       s.img_buffer += n;
126       return 1;
127    } else
128       return 0;
129 }
130 
131 int get16(stbi *s)
132 {
133    int z = get8(s);
134    return (z << 8) + get8(s);
135 }
136 
137 uint get32(stbi *s)
138 {
139    uint z = get16(s);
140    return (z << 16) + get16(s);
141 }
142 
143 int get16le(stbi *s)
144 {
145    int z = get8(s);
146    return z + (get8(s) << 8);
147 }
148 
149 uint get32le(stbi *s)
150 {
151    uint z = get16le(s);
152    return z + (get16le(s) << 16);
153 }
154 
155 //
156 //  generic converter from built-in img_n to req_comp
157 //    individual types do this automatically as much as possible (e.g. jpeg
158 //    does all cases internally since it needs to colorspace convert anyway,
159 //    and it never has alpha, so very few cases ). png can automatically
160 //    interleave an alpha=255 channel, but falls back to this for other cases
161 //
162 //  assume data buffer is malloced, so malloc a new one and free that one
163 //  only failure mode is malloc failing
164 
165 ubyte compute_y(int r, int g, int b)
166 {
167    return cast(ubyte) (((r*77) + (g*150) +  (29*b)) >> 8);
168 }
169 
170 ubyte *convert_format(ubyte *data, int img_n, int req_comp, uint x, uint y)
171 {
172     int i,j;
173     ubyte *good;
174 
175     if (req_comp == img_n) return data;
176     assert(req_comp >= 1 && req_comp <= 4);
177 
178     good = cast(ubyte*) malloc(req_comp * x * y);
179     if (good == null) {
180         free(data);
181         assert(false);
182     }
183 
184     for (j=0; j < cast(int) y; ++j) {
185         ubyte *src  = data + j * x * img_n   ;
186         ubyte *dest = good + j * x * req_comp;
187 
188         // convert source image with img_n components to one with req_comp components;
189         // avoid switch per pixel, so use switch per scanline and massive macros
190         switch (img_n * 8 + req_comp)
191         {
192             case 1 * 8 + 2:
193                 for(i=x-1; i >= 0; --i, src += 1, dest += 2)
194                     dest[0] = src[0], dest[1] = 255;
195                 break;
196             case 1 * 8 + 3:
197                 for(i=x-1; i >= 0; --i, src += 1, dest += 3)
198                     dest[0]=dest[1]=dest[2]=src[0];
199                 break;
200             case 1 * 8 + 4:
201                 for(i=x-1; i >= 0; --i, src += 1, dest += 4)
202                     dest[0]=dest[1]=dest[2]=src[0], dest[3]=255;
203                 break;
204             case 2 * 8 + 1:
205                 for(i=x-1; i >= 0; --i, src += 2, dest += 1)
206                     dest[0]=src[0];
207                 break;
208             case 2 * 8 + 3:
209                 for(i=x-1; i >= 0; --i, src += 2, dest += 3)
210                     dest[0]=dest[1]=dest[2]=src[0];
211                 break;
212             case 2 * 8 + 4:
213                 for(i=x-1; i >= 0; --i, src += 2, dest += 4)
214                     dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];
215                 break;
216             case 3 * 8 + 4:
217                 for(i=x-1; i >= 0; --i, src += 3, dest += 4)
218                     dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255;
219                 break;
220             case 3 * 8 + 1:
221                 for(i=x-1; i >= 0; --i, src += 3, dest += 1)
222                     dest[0]=compute_y(src[0],src[1],src[2]);
223                 break;
224             case 3 * 8 + 2:
225                 for(i=x-1; i >= 0; --i, src += 3, dest += 2)
226                     dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255;
227                 break;
228             case 4 * 8 + 1:
229                 for(i=x-1; i >= 0; --i, src += 4, dest += 1)
230                     dest[0]=compute_y(src[0],src[1],src[2]);
231                 break;
232             case 4 * 8 + 2:
233                 for(i=x-1; i >= 0; --i, src += 4, dest += 2)
234                     dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3];
235                 break;
236             case 4 * 8 + 3:
237                 for(i=x-1; i >= 0; --i, src += 4, dest += 3)
238                     dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];
239                 break;
240             default: assert(0);
241         }
242     }
243 
244     free(data);
245     return good;
246 }
247 
248 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
249 //    simple implementation
250 //      - all input must be provided in an upfront buffer
251 //      - all output is written to a single output buffer (can malloc/realloc)
252 //    performance
253 //      - fast huffman
254 
255 // fast-way is faster to check than jpeg huffman, but slow way is slower
256 enum ZFAST_BITS = 9; // accelerate all cases in default tables
257 enum ZFAST_MASK = ((1 << ZFAST_BITS) - 1);
258 
259 // zlib-style huffman encoding
260 // (jpegs packs from left, zlib from right, so can't share code)
261 struct zhuffman
262 {
263    ushort[1 << ZFAST_BITS] fast;
264    ushort[16] firstcode;
265    int[17] maxcode;
266    ushort[16] firstsymbol;
267    ubyte[288] size;
268    ushort[288] value;
269 } ;
270 
271 int bitreverse16(int n)
272 {
273   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
274   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
275   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
276   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
277   return n;
278 }
279 
280 int bit_reverse(int v, int bits)
281 {
282    assert(bits <= 16);
283    // to bit reverse n bits, reverse 16 and shift
284    // e.g. 11 bits, bit reverse and shift away 5
285    return bitreverse16(v) >> (16-bits);
286 }
287 
288 int zbuild_huffman(zhuffman *z, ubyte *sizelist, int num)
289 {
290    int i,k=0;
291    int code;
292    int[16] next_code;
293    int[17] sizes;
294 
295    // DEFLATE spec for generating codes
296    memset(sizes.ptr, 0, sizes.sizeof);
297    memset(z.fast.ptr, 255, z.fast.sizeof);
298    for (i=0; i < num; ++i)
299       ++sizes[sizelist[i]];
300    sizes[0] = 0;
301    for (i=1; i < 16; ++i)
302       assert(sizes[i] <= (1 << i));
303    code = 0;
304    for (i=1; i < 16; ++i) {
305       next_code[i] = code;
306       z.firstcode[i] = cast(ushort) code;
307       z.firstsymbol[i] = cast(ushort) k;
308       code = (code + sizes[i]);
309       if (sizes[i])
310          if (code-1 >= (1 << i))
311              assert(false);
312       z.maxcode[i] = code << (16-i); // preshift for inner loop
313       code <<= 1;
314       k += sizes[i];
315    }
316    z.maxcode[16] = 0x10000; // sentinel
317    for (i=0; i < num; ++i) {
318       int s = sizelist[i];
319       if (s) {
320          int c = next_code[s] - z.firstcode[s] + z.firstsymbol[s];
321          z.size[c] = cast(ubyte)s;
322          z.value[c] = cast(ushort)i;
323          if (s <= ZFAST_BITS) {
324             int k_ = bit_reverse(next_code[s],s);
325             while (k_ < (1 << ZFAST_BITS)) {
326                z.fast[k_] = cast(ushort) c;
327                k_ += (1 << s);
328             }
329          }
330          ++next_code[s];
331       }
332    }
333    return 1;
334 }
335 
336 // zlib-from-memory implementation for PNG reading
337 //    because PNG allows splitting the zlib stream arbitrarily,
338 //    and it's annoying structurally to have PNG call ZLIB call PNG,
339 //    we require PNG read all the IDATs and combine them into a single
340 //    memory buffer
341 
342 struct zbuf
343 {
344    const(ubyte) *zbuffer;
345    const(ubyte) *zbuffer_end;
346    int num_bits;
347    uint code_buffer;
348 
349    ubyte *zout;
350    ubyte *zout_start;
351    ubyte *zout_end;
352    int   z_expandable;
353 
354    zhuffman z_length, z_distance;
355 } ;
356 
357 int zget8(zbuf *z)
358 {
359    if (z.zbuffer >= z.zbuffer_end) return 0;
360    return *z.zbuffer++;
361 }
362 
363 void fill_bits(zbuf *z)
364 {
365    do {
366       assert(z.code_buffer < (1U << z.num_bits));
367       z.code_buffer |= zget8(z) << z.num_bits;
368       z.num_bits += 8;
369    } while (z.num_bits <= 24);
370 }
371 
372 uint zreceive(zbuf *z, int n)
373 {
374    uint k;
375    if (z.num_bits < n) fill_bits(z);
376    k = z.code_buffer & ((1 << n) - 1);
377    z.code_buffer >>= n;
378    z.num_bits -= n;
379    return k;
380 }
381 
382 int zhuffman_decode(zbuf *a, zhuffman *z)
383 {
384    int b,s,k;
385    if (a.num_bits < 16) fill_bits(a);
386    b = z.fast[a.code_buffer & ZFAST_MASK];
387    if (b < 0xffff) {
388       s = z.size[b];
389       a.code_buffer >>= s;
390       a.num_bits -= s;
391       return z.value[b];
392    }
393 
394    // not resolved by fast table, so compute it the slow way
395    // use jpeg approach, which requires MSbits at top
396    k = bit_reverse(a.code_buffer, 16);
397    for (s=ZFAST_BITS+1; ; ++s)
398       if (k < z.maxcode[s])
399          break;
400    if (s == 16) return -1; // invalid code!
401    // code size is s, so:
402    b = (k >> (16-s)) - z.firstcode[s] + z.firstsymbol[s];
403    assert(z.size[b] == s);
404    a.code_buffer >>= s;
405    a.num_bits -= s;
406    return z.value[b];
407 }
408 
409 int expand(zbuf *z, int n)  // need to make room for n bytes
410 {
411    ubyte *q;
412    int cur, limit;
413    if (!z.z_expandable)
414       assert(false, "Output buffer limit, corrupt PNG");
415    cur   = cast(int) (z.zout     - z.zout_start);
416    limit = cast(int) (z.zout_end - z.zout_start);
417    while (cur + n > limit)
418       limit *= 2;
419    q = cast(ubyte*) realloc(z.zout_start, limit);
420    if (q == null)
421       assert(false, "Out of memory");
422    z.zout_start = q;
423    z.zout       = q + cur;
424    z.zout_end   = q + limit;
425    return 1;
426 }
427 
428 static immutable int[31] length_base = [
429    3,4,5,6,7,8,9,10,11,13,
430    15,17,19,23,27,31,35,43,51,59,
431    67,83,99,115,131,163,195,227,258,0,0 ];
432 
433 static immutable int[31] length_extra =
434 [ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 ];
435 
436 static immutable int[32] dist_base = [ 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
437 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0];
438 
439 static immutable int[32] dist_extra =
440 [ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13];
441 
442 int parse_huffman_block(zbuf *a)
443 {
444    for(;;) {
445       int z = zhuffman_decode(a, &a.z_length);
446       if (z < 256) {
447          if (z < 0)
448              assert(false, "Bad Huffman code, corrupt PNG");
449          if (a.zout >= a.zout_end) if (!expand(a, 1)) return 0;
450          *a.zout++ = cast(ubyte) z;
451       } else {
452          ubyte *p;
453          int len,dist;
454          if (z == 256) return 1;
455          z -= 257;
456          len = length_base[z];
457          if (length_extra[z]) len += zreceive(a, length_extra[z]);
458          z = zhuffman_decode(a, &a.z_distance);
459          if (z < 0) assert(false, "Bad Huffman code, corrupt PNG");
460          dist = dist_base[z];
461          if (dist_extra[z]) dist += zreceive(a, dist_extra[z]);
462          if (a.zout - a.zout_start < dist) assert(false, "Bad dist, corrupt PNG");
463          if (a.zout + len > a.zout_end) if (!expand(a, len)) return 0;
464          p = a.zout - dist;
465          while (len--)
466             *a.zout++ = *p++;
467       }
468    }
469 }
470 
471 int compute_huffman_codes(zbuf *a)
472 {
473    static immutable ubyte[19] length_dezigzag = [ 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 ];
474    zhuffman z_codelength;
475    ubyte[286+32+137] lencodes;//padding for maximum single op
476    ubyte[19] codelength_sizes;
477    int i,n;
478 
479    int hlit  = zreceive(a,5) + 257;
480    int hdist = zreceive(a,5) + 1;
481    int hclen = zreceive(a,4) + 4;
482 
483    memset(codelength_sizes.ptr, 0, codelength_sizes.sizeof);
484    for (i=0; i < hclen; ++i) {
485       int s = zreceive(a,3);
486       codelength_sizes[length_dezigzag[i]] = cast(ubyte) s;
487    }
488    if (!zbuild_huffman(&z_codelength, codelength_sizes.ptr, 19)) return 0;
489 
490    n = 0;
491    while (n < hlit + hdist) {
492       int c = zhuffman_decode(a, &z_codelength);
493       assert(c >= 0 && c < 19);
494       if (c < 16)
495          lencodes[n++] = cast(ubyte) c;
496       else if (c == 16) {
497          c = zreceive(a,2)+3;
498          memset(lencodes.ptr+n, lencodes[n-1], c);
499          n += c;
500       } else if (c == 17) {
501          c = zreceive(a,3)+3;
502          memset(lencodes.ptr+n, 0, c);
503          n += c;
504       } else {
505          assert(c == 18);
506          c = zreceive(a,7)+11;
507          memset(lencodes.ptr+n, 0, c);
508          n += c;
509       }
510    }
511    if (n != hlit+hdist) assert(false, "Bad codelengths, corrupt PNG");
512    if (!zbuild_huffman(&a.z_length, lencodes.ptr, hlit)) return 0;
513    if (!zbuild_huffman(&a.z_distance, lencodes.ptr+hlit, hdist)) return 0;
514    return 1;
515 }
516 
517 int parse_uncompressed_block(zbuf *a)
518 {
519    ubyte[4] header;
520    int len,nlen,k;
521    if (a.num_bits & 7)
522       zreceive(a, a.num_bits & 7); // discard
523    // drain the bit-packed data into header
524    k = 0;
525    while (a.num_bits > 0) {
526       header[k++] = cast(ubyte) (a.code_buffer & 255); // wtf this warns?
527       a.code_buffer >>= 8;
528       a.num_bits -= 8;
529    }
530    assert(a.num_bits == 0);
531    // now fill header the normal way
532    while (k < 4)
533       header[k++] = cast(ubyte) zget8(a);
534    len  = header[1] * 256 + header[0];
535    nlen = header[3] * 256 + header[2];
536    if (nlen != (len ^ 0xffff)) assert(false, "Zlib corrupt, corrupt PNG");
537    if (a.zbuffer + len > a.zbuffer_end) assert(false, "Read past buffer, corrupt PNG");
538    if (a.zout + len > a.zout_end)
539       if (!expand(a, len)) return 0;
540    memcpy(a.zout, a.zbuffer, len);
541    a.zbuffer += len;
542    a.zout += len;
543    return 1;
544 }
545 
546 int parse_zlib_header(zbuf *a)
547 {
548    int cmf   = zget8(a);
549    int cm    = cmf & 15;
550    /* int cinfo = cmf >> 4; */
551    int flg   = zget8(a);
552    if ((cmf*256+flg) % 31 != 0) assert(false, "Bad zlib header, corrupt PNG"); // zlib spec
553    if (flg & 32) assert(false, "No preset dict, corrupt PNG"); // preset dictionary not allowed in png
554    if (cm != 8) assert(false, "Bad compression, corrupt PNG");  // DEFLATE required for png
555    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
556    return 1;
557 }
558 
559 // MAYDO: should statically initialize these for optimal thread safety
560 __gshared ubyte[288] default_length;
561 __gshared ubyte[32] default_distance;
562 
563 void init_defaults()
564 {
565    int i;   // use <= to match clearly with spec
566    for (i=0; i <= 143; ++i)     default_length[i]   = 8;
567    for (   ; i <= 255; ++i)     default_length[i]   = 9;
568    for (   ; i <= 279; ++i)     default_length[i]   = 7;
569    for (   ; i <= 287; ++i)     default_length[i]   = 8;
570 
571    for (i=0; i <=  31; ++i)     default_distance[i] = 5;
572 }
573 
574 __gshared int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead
575 int parse_zlib(zbuf *a, int parse_header)
576 {
577    int final_, type;
578    if (parse_header)
579       if (!parse_zlib_header(a)) return 0;
580    a.num_bits = 0;
581    a.code_buffer = 0;
582    do {
583       final_ = zreceive(a,1);
584       type = zreceive(a,2);
585       if (type == 0) {
586          if (!parse_uncompressed_block(a)) return 0;
587       } else if (type == 3) {
588          return 0;
589       } else {
590          if (type == 1) {
591             // use fixed code lengths
592             if (!default_distance[31]) init_defaults();
593             if (!zbuild_huffman(&a.z_length  , default_length.ptr  , 288)) return 0;
594             if (!zbuild_huffman(&a.z_distance, default_distance.ptr,  32)) return 0;
595          } else {
596             if (!compute_huffman_codes(a)) return 0;
597          }
598          if (!parse_huffman_block(a)) return 0;
599       }
600       if (stbi_png_partial && a.zout - a.zout_start > 65536)
601          break;
602    } while (!final_);
603    return 1;
604 }
605 
606 int do_zlib(zbuf *a, ubyte *obuf, int olen, int exp, int parse_header)
607 {
608    a.zout_start = obuf;
609    a.zout       = obuf;
610    a.zout_end   = obuf + olen;
611    a.z_expandable = exp;
612 
613    return parse_zlib(a, parse_header);
614 }
615 
616 ubyte *stbi_zlib_decode_malloc_guesssize(const(ubyte) *buffer, int len, int initial_size, int *outlen)
617 {
618    zbuf a;
619    ubyte *p = cast(ubyte*) malloc(initial_size);
620    if (p == null) return null;
621    a.zbuffer = buffer;
622    a.zbuffer_end = buffer + len;
623    if (do_zlib(&a, p, initial_size, 1, 1)) {
624       if (outlen) *outlen = cast(int) (a.zout - a.zout_start);
625       return a.zout_start;
626    } else {
627       free(a.zout_start);
628       return null;
629    }
630 }
631 
632 ubyte *stbi_zlib_decode_malloc(const(ubyte) *buffer, int len, int *outlen)
633 {
634    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
635 }
636 
637 ubyte *stbi_zlib_decode_malloc_guesssize_headerflag(const(ubyte) *buffer, int len, int initial_size, int *outlen, int parse_header)
638 {
639    zbuf a;
640    ubyte *p = cast(ubyte*) malloc(initial_size);
641    if (p == null) return null;
642    a.zbuffer = buffer;
643    a.zbuffer_end = buffer + len;
644    if (do_zlib(&a, p, initial_size, 1, parse_header)) {
645       if (outlen) *outlen = cast(int) (a.zout - a.zout_start);
646       return a.zout_start;
647    } else {
648       free(a.zout_start);
649       return null;
650    }
651 }
652 
653 int stbi_zlib_decode_buffer(ubyte* obuffer, int olen, const(ubyte)* ibuffer, int ilen)
654 {
655    zbuf a;
656    a.zbuffer = ibuffer;
657    a.zbuffer_end = ibuffer + ilen;
658    if (do_zlib(&a, obuffer, olen, 0, 1))
659       return cast(int) (a.zout - a.zout_start);
660    else
661       return -1;
662 }
663 
664 ubyte *stbi_zlib_decode_noheader_malloc(const(ubyte) *buffer, int len, int *outlen)
665 {
666    zbuf a;
667    ubyte *p = cast(ubyte*) malloc(16384);
668    if (p == null) return null;
669    a.zbuffer = buffer;
670    a.zbuffer_end = buffer+len;
671    if (do_zlib(&a, p, 16384, 1, 0)) {
672       if (outlen) *outlen = cast(int) (a.zout - a.zout_start);
673       return a.zout_start;
674    } else {
675       free(a.zout_start);
676       return null;
677    }
678 }
679 
680 int stbi_zlib_decode_noheader_buffer(ubyte *obuffer, int olen, const(ubyte) *ibuffer, int ilen)
681 {
682    zbuf a;
683    a.zbuffer = ibuffer;
684    a.zbuffer_end = ibuffer + ilen;
685    if (do_zlib(&a, obuffer, olen, 0, 0))
686       return cast(int) (a.zout - a.zout_start);
687    else
688       return -1;
689 }
690 
691 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
692 //    simple implementation
693 //      - only 8-bit samples
694 //      - no CRC checking
695 //      - allocates lots of intermediate memory
696 //        - avoids problem of streaming data between subsystems
697 //        - avoids explicit window management
698 //    performance
699 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
700 
701 
702 struct chunk
703 {
704    uint length;
705    uint type;
706 }
707 
708 uint PNG_TYPE(ubyte a, ubyte b, ubyte c, ubyte d)
709 {
710    return (a << 24) + (b << 16) + (c << 8) + d;
711 }
712 
713 chunk get_chunk_header(stbi *s)
714 {
715    chunk c;
716    c.length = get32(s);
717    c.type   = get32(s);
718    return c;
719 }
720 
721 static int check_png_header(stbi *s)
722 {
723    static immutable ubyte[8] png_sig = [ 137, 80, 78, 71, 13, 10, 26, 10 ];
724    for (int i = 0; i < 8; ++i)
725    {
726        ubyte headerByte = get8u(s);
727        ubyte expected = png_sig[i];
728        if (headerByte != expected)
729            assert(false, "Bad PNG sig, not a PNG");
730    }
731    return 1;
732 }
733 
734 struct png
735 {
736    stbi *s;
737    ubyte *idata;
738    ubyte *expanded;
739    ubyte *out_;
740 }
741 
742 
743 enum : int
744 {
745    F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4,
746    F_avg_first, F_paeth_first
747 }
748 
749 static immutable ubyte[5] first_row_filter =
750 [
751    F_none, F_sub, F_none, F_avg_first, F_paeth_first
752 ];
753 
754 static int paeth(int a, int b, int c)
755 {
756    int p = a + b - c;
757    int pa = abs(p-a);
758    int pb = abs(p-b);
759    int pc = abs(p-c);
760    if (pa <= pb && pa <= pc) return a;
761    if (pb <= pc) return b;
762    return c;
763 }
764 
765 // create the png data from post-deflated data
766 static int create_png_image_raw(png *a, ubyte *raw, uint raw_len, int out_n, uint x, uint y)
767 {
768    stbi *s = a.s;
769    uint i,j,stride = x*out_n;
770    int k;
771    int img_n = s.img_n; // copy it into a local for later
772    assert(out_n == s.img_n || out_n == s.img_n+1);
773    if (stbi_png_partial) y = 1;
774    a.out_ = cast(ubyte*) malloc(x * y * out_n);
775    if (!a.out_) assert(false, "Out of memory");
776    if (!stbi_png_partial) {
777       if (s.img_x == x && s.img_y == y) {
778          if (raw_len != (img_n * x + 1) * y) assert(false, "Not enough pixels, corrupt PNG");
779       } else { // interlaced:
780          if (raw_len < (img_n * x + 1) * y) assert(false, "Not enough pixels, corrupt PNG");
781       }
782    }
783    for (j=0; j < y; ++j) {
784       ubyte *cur = a.out_ + stride*j;
785       ubyte *prior = cur - stride;
786       int filter = *raw++;
787       if (filter > 4) assert(false, "Invalid filter, corrupt PNG");
788       // if first row, use special filter that doesn't sample previous row
789       if (j == 0) filter = first_row_filter[filter];
790       // handle first pixel explicitly
791       for (k=0; k < img_n; ++k) {
792          switch (filter) {
793             case F_none       : cur[k] = raw[k]; break;
794             case F_sub        : cur[k] = raw[k]; break;
795             case F_up         : cur[k] = cast(ubyte)(raw[k] + prior[k]); break;
796             case F_avg        : cur[k] = cast(ubyte)(raw[k] + (prior[k]>>1)); break;
797             case F_paeth      : cur[k] = cast(ubyte) (raw[k] + paeth(0,prior[k],0)); break;
798             case F_avg_first  : cur[k] = raw[k]; break;
799             case F_paeth_first: cur[k] = raw[k]; break;
800             default: break;
801          }
802       }
803       if (img_n != out_n) cur[img_n] = 255;
804       raw += img_n;
805       cur += out_n;
806       prior += out_n;
807       // this is a little gross, so that we don't switch per-pixel or per-component
808       if (img_n == out_n) {
809 
810          for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n)
811             for (k=0; k < img_n; ++k)
812             {
813                switch (filter) {
814                   case F_none:  cur[k] = raw[k]; break;
815                   case F_sub:   cur[k] = cast(ubyte)(raw[k] + cur[k-img_n]); break;
816                   case F_up:    cur[k] = cast(ubyte)(raw[k] + prior[k]); break;
817                   case F_avg:   cur[k] = cast(ubyte)(raw[k] + ((prior[k] + cur[k-img_n])>>1)); break;
818                   case F_paeth:  cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break;
819                   case F_avg_first:    cur[k] = cast(ubyte)(raw[k] + (cur[k-img_n] >> 1)); break;
820                   case F_paeth_first:  cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-img_n],0,0)); break;
821                   default: break;
822                }
823             }
824       } else {
825          assert(img_n+1 == out_n);
826 
827          for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n)
828             for (k=0; k < img_n; ++k)
829             {
830                switch (filter) {
831                   case F_none:  cur[k] = raw[k]; break;
832                   case F_sub:   cur[k] = cast(ubyte)(raw[k] + cur[k-out_n]); break;
833                   case F_up:    cur[k] = cast(ubyte)(raw[k] + prior[k]); break;
834                   case F_avg:   cur[k] = cast(ubyte)(raw[k] + ((prior[k] + cur[k-out_n])>>1)); break;
835                   case F_paeth:  cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break;
836                   case F_avg_first:    cur[k] = cast(ubyte)(raw[k] + (cur[k-out_n] >> 1)); break;
837                   case F_paeth_first:  cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-out_n],0,0)); break;
838                   default: break;
839                }
840             }
841       }
842    }
843    return 1;
844 }
845 
846 int create_png_image(png *a, ubyte *raw, uint raw_len, int out_n, int interlaced)
847 {
848    ubyte *final_;
849    int p;
850    int save;
851    if (!interlaced)
852       return create_png_image_raw(a, raw, raw_len, out_n, a.s.img_x, a.s.img_y);
853    save = stbi_png_partial;
854    stbi_png_partial = 0;
855 
856    // de-interlacing
857    final_ = cast(ubyte*) malloc(a.s.img_x * a.s.img_y * out_n);
858    for (p=0; p < 7; ++p) {
859       static immutable int[7] xorig = [ 0,4,0,2,0,1,0 ];
860       static immutable int[7] yorig = [ 0,0,4,0,2,0,1 ];
861       static immutable int[7] xspc = [ 8,8,4,4,2,2,1 ];
862       static immutable int[7] yspc = [ 8,8,8,4,4,2,2 ];
863       int i,j,x,y;
864       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
865       x = (a.s.img_x - xorig[p] + xspc[p]-1) / xspc[p];
866       y = (a.s.img_y - yorig[p] + yspc[p]-1) / yspc[p];
867       if (x && y) {
868          if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) {
869             free(final_);
870             return 0;
871          }
872          for (j=0; j < y; ++j)
873             for (i=0; i < x; ++i)
874                memcpy(final_ + (j*yspc[p]+yorig[p])*a.s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n,
875                       a.out_ + (j*x+i)*out_n, out_n);
876          free(a.out_);
877          raw += (x*out_n+1)*y;
878          raw_len -= (x*out_n+1)*y;
879       }
880    }
881    a.out_ = final_;
882 
883    stbi_png_partial = save;
884    return 1;
885 }
886 
887 static int compute_transparency(png *z, ubyte[3] tc, int out_n)
888 {
889    stbi *s = z.s;
890    uint i, pixel_count = s.img_x * s.img_y;
891    ubyte *p = z.out_;
892 
893    // compute color-based transparency, assuming we've
894    // already got 255 as the alpha value in the output
895    assert(out_n == 2 || out_n == 4);
896 
897    if (out_n == 2) {
898       for (i=0; i < pixel_count; ++i) {
899          p[1] = (p[0] == tc[0] ? 0 : 255);
900          p += 2;
901       }
902    } else {
903       for (i=0; i < pixel_count; ++i) {
904          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
905             p[3] = 0;
906          p += 4;
907       }
908    }
909    return 1;
910 }
911 
912 int expand_palette(png *a, ubyte *palette, int len, int pal_img_n)
913 {
914    uint i, pixel_count = a.s.img_x * a.s.img_y;
915    ubyte *p;
916    ubyte *temp_out;
917    ubyte *orig = a.out_;
918 
919    p = cast(ubyte*) malloc(pixel_count * pal_img_n);
920    if (p == null)
921       assert(false, "Out of memory");
922 
923    // between here and free(out) below, exitting would leak
924    temp_out = p;
925 
926    if (pal_img_n == 3) {
927       for (i=0; i < pixel_count; ++i) {
928          int n = orig[i]*4;
929          p[0] = palette[n  ];
930          p[1] = palette[n+1];
931          p[2] = palette[n+2];
932          p += 3;
933       }
934    } else {
935       for (i=0; i < pixel_count; ++i) {
936          int n = orig[i]*4;
937          p[0] = palette[n  ];
938          p[1] = palette[n+1];
939          p[2] = palette[n+2];
940          p[3] = palette[n+3];
941          p += 4;
942       }
943    }
944    free(a.out_);
945    a.out_ = temp_out;
946 
947    return 1;
948 }
949 
950 int parse_png_file(png *z, int scan, int req_comp)
951 {
952    ubyte[1024] palette;
953    ubyte pal_img_n=0;
954    ubyte has_trans=0;
955    ubyte[3] tc;
956    uint ioff=0, idata_limit=0, i, pal_len=0;
957    int first=1,k,interlace=0;
958    stbi *s = z.s;
959 
960    z.expanded = null;
961    z.idata = null;
962    z.out_ = null;
963 
964    if (!check_png_header(s)) return 0;
965 
966    if (scan == SCAN_type) return 1;
967 
968    for (;;) {
969       chunk c = get_chunk_header(s);
970       switch (c.type) {
971          case PNG_TYPE('I','H','D','R'): {
972             int depth,color,comp,filter;
973             if (!first) assert(false, "Multiple IHDR, corrupt PNG");
974             first = 0;
975             if (c.length != 13) assert(false, "Bad IHDR len, corrupt PNG");
976             s.img_x = get32(s); if (s.img_x > (1 << 24)) assert(false, "Very large image (corrupt?)");
977             s.img_y = get32(s); if (s.img_y > (1 << 24)) assert(false, "Very large image (corrupt?)");
978             depth = get8(s);  if (depth != 8)        assert(false, "8bit only, PNG not supported: 8-bit only");
979             color = get8(s);  if (color > 6)         assert(false, "Bad ctype, corrupt PNG");
980             if (color == 3) pal_img_n = 3; else if (color & 1) assert(false, "Bad ctype, corrupt PNG");
981             comp  = get8(s);  if (comp) assert(false, "Bad comp method, corrupt PNG");
982             filter= get8(s);  if (filter) assert(false, "Bad filter method, corrupt PNG");
983             interlace = get8(s); if (interlace>1) assert(false, "Bad interlace method, corrupt PNG");
984             if (!s.img_x || !s.img_y) assert(false, "0-pixel image, corrupt PNG");
985             if (!pal_img_n) {
986                s.img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
987                if ((1 << 30) / s.img_x / s.img_n < s.img_y) assert(false, "Image too large to decode");
988                if (scan == SCAN_header) return 1;
989             } else {
990                // if paletted, then pal_n is our final components, and
991                // img_n is # components to decompress/filter.
992                s.img_n = 1;
993                if ((1 << 30) / s.img_x / 4 < s.img_y) assert(false, "Too large, corrupt PNG");
994                // if SCAN_header, have to scan to see if we have a tRNS
995             }
996             break;
997          }
998 
999          case PNG_TYPE('P','L','T','E'):  {
1000             if (first) assert(false, "first not IHDR, corrupt PNG");
1001             if (c.length > 256*3) assert(false, "invalid PLTE, corrupt PNG");
1002             pal_len = c.length / 3;
1003             if (pal_len * 3 != c.length) assert(false, "invalid PLTE, corrupt PNG");
1004             for (i=0; i < pal_len; ++i) {
1005                palette[i*4+0] = get8u(s);
1006                palette[i*4+1] = get8u(s);
1007                palette[i*4+2] = get8u(s);
1008                palette[i*4+3] = 255;
1009             }
1010             break;
1011          }
1012 
1013          case PNG_TYPE('t','R','N','S'): {
1014             if (first) assert(false, "first not IHDR, cCorrupt PNG");
1015             if (z.idata) assert(false, "tRNS after IDAT, corrupt PNG");
1016             if (pal_img_n) {
1017                if (scan == SCAN_header) { s.img_n = 4; return 1; }
1018                if (pal_len == 0) assert(false, "tRNS before PLTE, corrupt PNG");
1019                if (c.length > pal_len) assert(false, "bad tRNS len, corrupt PNG");
1020                pal_img_n = 4;
1021                for (i=0; i < c.length; ++i)
1022                   palette[i*4+3] = get8u(s);
1023             } else {
1024                if (!(s.img_n & 1)) assert(false, "tRNS with alpha, corrupt PNG");
1025                if (c.length != cast(uint) s.img_n*2) assert(false, "bad tRNS len, corrupt PNG");
1026                has_trans = 1;
1027                for (k=0; k < s.img_n; ++k)
1028                   tc[k] = cast(ubyte) get16(s); // non 8-bit images will be larger
1029             }
1030             break;
1031          }
1032 
1033          case PNG_TYPE('I','D','A','T'): {
1034             if (first) assert(false, "first not IHDR, corrupt PNG");
1035             if (pal_img_n && !pal_len) assert(false, "no PLTE, corrupt PNG");
1036             if (scan == SCAN_header) { s.img_n = pal_img_n; return 1; }
1037             if (ioff + c.length > idata_limit) {
1038                ubyte *p;
1039                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
1040                while (ioff + c.length > idata_limit)
1041                   idata_limit *= 2;
1042                p = cast(ubyte*) realloc(z.idata, idata_limit); if (p == null) assert(false, "outofmem, cOut of memory");
1043                z.idata = p;
1044             }
1045             if (!getn(s, z.idata+ioff,c.length)) assert(false, "outofdata, corrupt PNG");
1046             ioff += c.length;
1047             break;
1048          }
1049 
1050          case PNG_TYPE('I','E','N','D'): {
1051             uint raw_len;
1052             if (first) assert(false, "first not IHDR, corrupt PNG");
1053             if (scan != SCAN_load) return 1;
1054             if (z.idata == null) assert(false, "no IDAT, corrupt PNG");
1055             z.expanded = stbi_zlib_decode_malloc_guesssize_headerflag(z.idata, ioff, 16384, cast(int *) &raw_len, 1);
1056             if (z.expanded == null) return 0; // zlib should set error
1057             free(z.idata); z.idata = null;
1058             if ((req_comp == s.img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
1059                s.img_out_n = s.img_n+1;
1060             else
1061                s.img_out_n = s.img_n;
1062             if (!create_png_image(z, z.expanded, raw_len, s.img_out_n, interlace)) return 0;
1063             if (has_trans)
1064                if (!compute_transparency(z, tc, s.img_out_n)) return 0;
1065             if (pal_img_n) {
1066                // pal_img_n == 3 or 4
1067                s.img_n = pal_img_n; // record the actual colors we had
1068                s.img_out_n = pal_img_n;
1069                if (req_comp >= 3) s.img_out_n = req_comp;
1070                if (!expand_palette(z, palette.ptr, pal_len, s.img_out_n))
1071                   return 0;
1072             }
1073             free(z.expanded); z.expanded = null;
1074             return 1;
1075          }
1076 
1077          default:
1078             // if critical, fail
1079             if (first) assert(false, "first not IHDR, corrupt PNG");
1080             if ((c.type & (1 << 29)) == 0) {
1081                assert(false, "PNG not supported: unknown chunk type");
1082             }
1083             skip(s, c.length);
1084             break;
1085       }
1086       // end of chunk, read and skip CRC
1087       get32(s);
1088    }
1089 }
1090 
1091 ubyte *do_png(png *p, int *x, int *y, int *n, int req_comp)
1092 {
1093    ubyte *result=null;
1094    if (req_comp < 0 || req_comp > 4)
1095       assert(false, "Internal error: bad req_comp");
1096    if (parse_png_file(p, SCAN_load, req_comp)) {
1097       result = p.out_;
1098       p.out_ = null;
1099       if (req_comp && req_comp != p.s.img_out_n) {
1100          result = convert_format(result, p.s.img_out_n, req_comp, p.s.img_x, p.s.img_y);
1101          p.s.img_out_n = req_comp;
1102          if (result == null) return result;
1103       }
1104       *x = p.s.img_x;
1105       *y = p.s.img_y;
1106       if (n) *n = p.s.img_n;
1107    }
1108    free(p.out_);      p.out_    = null;
1109    free(p.expanded); p.expanded = null;
1110    free(p.idata);    p.idata    = null;
1111 
1112    return result;
1113 }
1114 
1115 ubyte *stbi_png_load(stbi *s, int *x, int *y, int *comp, int req_comp)
1116 {
1117    png p;
1118    p.s = s;
1119    return do_png(&p, x,y,comp,req_comp);
1120 }