1 /// D translation of stb_image-1.33 (http://nothings.org/stb_image.c)
2 ///
3 /// This port only supports:
4 /// $(UL
5 ///   $(LI PNG 8-bit-per-channel only.)
6 /// )
7 ///
8 //============================    Contributors    =========================
9 //
10 // Image formats                                Optimizations & bugfixes
11 // Sean Barrett (jpeg, png, bmp)                Fabian "ryg" Giesen
12 // Nicolas Schulz (hdr, psd)
13 // Jonathan Dummer (tga)                     Bug fixes & warning fixes
14 // Jean-Marc Lienher (gif)                      Marc LeBlanc
15 // Tom Seddon (pic)                             Christpher Lloyd
16 // Thatcher Ulrich (psd)                        Dave Moore
17 // Won Chun
18 // the Horde3D community
19 // Extensions, features                            Janez Zemva
20 // Jetro Lauha (stbi_info)                      Jonathan Blow
21 // James "moose2000" Brown (iPhone PNG)         Laurent Gomila
22 // Ben "Disch" Wenger (io callbacks)            Aruelien Pocheville
23 // Martin "SpartanJ" Golini                     Ryamond Barbiero
24 // David Woo
25 
26 module dplug.graphics.pngload;
27 
28 // This has been revived for the sake of loading PNG without too much memory usage.
29 // It turns out stb_image is more efficient than the loaders using std.zlib.
30 // https://github.com/lgvz/imageformats/issues/26
31 
32 import core.stdc.stdlib;
33 import core.stdc.string;
34 
35 enum STBI_VERSION = 1;
36 
37 nothrow:
38 @nogc:
39 
40 enum : int
41 {
42    STBI_default    = 0, // only used for req_comp
43    STBI_grey       = 1,
44    STBI_grey_alpha = 2,
45    STBI_rgb        = 3,
46    STBI_rgb_alpha  = 4
47 };
48 
49 // define faster low-level operations (typically SIMD support)
50 
51 // stbi structure is our basic context used by all images, so it
52 // contains all the IO context, plus some basic image information
53 struct stbi
54 {
55    uint img_x, img_y;
56    int img_n, img_out_n;
57 
58    int buflen;
59    ubyte[128] buffer_start;
60 
61    const(ubyte) *img_buffer;
62    const(ubyte) *img_buffer_end;
63    const(ubyte) *img_buffer_original;
64 }
65 
66 
67 // initialize a memory-decode context
68 void start_mem(stbi *s, const(ubyte)*buffer, int len)
69 {
70    s.img_buffer = buffer;
71    s.img_buffer_original = buffer;
72    s.img_buffer_end = buffer+len;
73 }
74 
75 /// Loads an image from memory.
76 /// Throws: STBImageException on error.
77 ubyte* stbi_load_png_from_memory(const(void)[] buffer, out int width, out int height, out int components, int requestedComponents)
78 {
79    stbi s;
80    start_mem(&s, cast(const(ubyte)*)buffer.ptr, cast(int)(buffer.length));
81    return stbi_png_load(&s, &width, &height, &components, requestedComponents);
82 }
83 
84 
85 //
86 // Common code used by all image loaders
87 //
88 
89 enum : int
90 {
91    SCAN_load=0,
92    SCAN_type,
93    SCAN_header
94 };
95 
96 
97 int get8(stbi *s)
98 {
99    if (s.img_buffer < s.img_buffer_end)
100       return *s.img_buffer++;
101 
102    return 0;
103 }
104 
105 int at_eof(stbi *s)
106 {
107    return s.img_buffer >= s.img_buffer_end;
108 }
109 
110 ubyte get8u(stbi *s)
111 {
112    return cast(ubyte) get8(s);
113 }
114 
115 void skip(stbi *s, int n)
116 {
117    s.img_buffer += n;
118 }
119 
120 int getn(stbi *s, ubyte *buffer, int n)
121 {
122    if (s.img_buffer+n <= s.img_buffer_end) {
123       memcpy(buffer, s.img_buffer, n);
124       s.img_buffer += n;
125       return 1;
126    } else
127       return 0;
128 }
129 
130 int get16(stbi *s)
131 {
132    int z = get8(s);
133    return (z << 8) + get8(s);
134 }
135 
136 uint get32(stbi *s)
137 {
138    uint z = get16(s);
139    return (z << 16) + get16(s);
140 }
141 
142 int get16le(stbi *s)
143 {
144    int z = get8(s);
145    return z + (get8(s) << 8);
146 }
147 
148 uint get32le(stbi *s)
149 {
150    uint z = get16le(s);
151    return z + (get16le(s) << 16);
152 }
153 
154 //
155 //  generic converter from built-in img_n to req_comp
156 //    individual types do this automatically as much as possible (e.g. jpeg
157 //    does all cases internally since it needs to colorspace convert anyway,
158 //    and it never has alpha, so very few cases ). png can automatically
159 //    interleave an alpha=255 channel, but falls back to this for other cases
160 //
161 //  assume data buffer is malloced, so malloc a new one and free that one
162 //  only failure mode is malloc failing
163 
164 ubyte compute_y(int r, int g, int b)
165 {
166    return cast(ubyte) (((r*77) + (g*150) +  (29*b)) >> 8);
167 }
168 
169 ubyte *convert_format(ubyte *data, int img_n, int req_comp, uint x, uint y)
170 {
171     int i,j;
172     ubyte *good;
173 
174     if (req_comp == img_n) return data;
175     assert(req_comp >= 1 && req_comp <= 4);
176 
177     good = cast(ubyte*) malloc(req_comp * x * y);
178     if (good == null) {
179         free(data);
180         assert(false);
181     }
182 
183     for (j=0; j < cast(int) y; ++j) {
184         ubyte *src  = data + j * x * img_n   ;
185         ubyte *dest = good + j * x * req_comp;
186 
187         // convert source image with img_n components to one with req_comp components;
188         // avoid switch per pixel, so use switch per scanline and massive macros
189         switch (img_n * 8 + req_comp)
190         {
191             case 1 * 8 + 2:
192                 for(i=x-1; i >= 0; --i, src += 1, dest += 2)
193                     dest[0] = src[0], dest[1] = 255;
194                 break;
195             case 1 * 8 + 3:
196                 for(i=x-1; i >= 0; --i, src += 1, dest += 3)
197                     dest[0]=dest[1]=dest[2]=src[0];
198                 break;
199             case 1 * 8 + 4:
200                 for(i=x-1; i >= 0; --i, src += 1, dest += 4)
201                     dest[0]=dest[1]=dest[2]=src[0], dest[3]=255;
202                 break;
203             case 2 * 8 + 1:
204                 for(i=x-1; i >= 0; --i, src += 2, dest += 1)
205                     dest[0]=src[0];
206                 break;
207             case 2 * 8 + 3:
208                 for(i=x-1; i >= 0; --i, src += 2, dest += 3)
209                     dest[0]=dest[1]=dest[2]=src[0];
210                 break;
211             case 2 * 8 + 4:
212                 for(i=x-1; i >= 0; --i, src += 2, dest += 4)
213                     dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];
214                 break;
215             case 3 * 8 + 4:
216                 for(i=x-1; i >= 0; --i, src += 3, dest += 4)
217                     dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255;
218                 break;
219             case 3 * 8 + 1:
220                 for(i=x-1; i >= 0; --i, src += 3, dest += 1)
221                     dest[0]=compute_y(src[0],src[1],src[2]);
222                 break;
223             case 3 * 8 + 2:
224                 for(i=x-1; i >= 0; --i, src += 3, dest += 2)
225                     dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255;
226                 break;
227             case 4 * 8 + 1:
228                 for(i=x-1; i >= 0; --i, src += 4, dest += 1)
229                     dest[0]=compute_y(src[0],src[1],src[2]);
230                 break;
231             case 4 * 8 + 2:
232                 for(i=x-1; i >= 0; --i, src += 4, dest += 2)
233                     dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3];
234                 break;
235             case 4 * 8 + 3:
236                 for(i=x-1; i >= 0; --i, src += 4, dest += 3)
237                     dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];
238                 break;
239             default: assert(0);
240         }
241     }
242 
243     free(data);
244     return good;
245 }
246 
247 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
248 //    simple implementation
249 //      - all input must be provided in an upfront buffer
250 //      - all output is written to a single output buffer (can malloc/realloc)
251 //    performance
252 //      - fast huffman
253 
254 // fast-way is faster to check than jpeg huffman, but slow way is slower
255 enum ZFAST_BITS = 9; // accelerate all cases in default tables
256 enum ZFAST_MASK = ((1 << ZFAST_BITS) - 1);
257 
258 // zlib-style huffman encoding
259 // (jpegs packs from left, zlib from right, so can't share code)
260 struct zhuffman
261 {
262    ushort[1 << ZFAST_BITS] fast;
263    ushort[16] firstcode;
264    int[17] maxcode;
265    ushort[16] firstsymbol;
266    ubyte[288] size;
267    ushort[288] value;
268 } ;
269 
270 int bitreverse16(int n)
271 {
272   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
273   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
274   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
275   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
276   return n;
277 }
278 
279 int bit_reverse(int v, int bits)
280 {
281    assert(bits <= 16);
282    // to bit reverse n bits, reverse 16 and shift
283    // e.g. 11 bits, bit reverse and shift away 5
284    return bitreverse16(v) >> (16-bits);
285 }
286 
287 int zbuild_huffman(zhuffman *z, ubyte *sizelist, int num)
288 {
289    int i,k=0;
290    int code;
291    int[16] next_code;
292    int[17] sizes;
293 
294    // DEFLATE spec for generating codes
295    memset(sizes.ptr, 0, sizes.sizeof);
296    memset(z.fast.ptr, 255, z.fast.sizeof);
297    for (i=0; i < num; ++i)
298       ++sizes[sizelist[i]];
299    sizes[0] = 0;
300    for (i=1; i < 16; ++i)
301       assert(sizes[i] <= (1 << i));
302    code = 0;
303    for (i=1; i < 16; ++i) {
304       next_code[i] = code;
305       z.firstcode[i] = cast(ushort) code;
306       z.firstsymbol[i] = cast(ushort) k;
307       code = (code + sizes[i]);
308       if (sizes[i])
309          if (code-1 >= (1 << i))
310              assert(false);
311       z.maxcode[i] = code << (16-i); // preshift for inner loop
312       code <<= 1;
313       k += sizes[i];
314    }
315    z.maxcode[16] = 0x10000; // sentinel
316    for (i=0; i < num; ++i) {
317       int s = sizelist[i];
318       if (s) {
319          int c = next_code[s] - z.firstcode[s] + z.firstsymbol[s];
320          z.size[c] = cast(ubyte)s;
321          z.value[c] = cast(ushort)i;
322          if (s <= ZFAST_BITS) {
323             int k_ = bit_reverse(next_code[s],s);
324             while (k_ < (1 << ZFAST_BITS)) {
325                z.fast[k_] = cast(ushort) c;
326                k_ += (1 << s);
327             }
328          }
329          ++next_code[s];
330       }
331    }
332    return 1;
333 }
334 
335 // zlib-from-memory implementation for PNG reading
336 //    because PNG allows splitting the zlib stream arbitrarily,
337 //    and it's annoying structurally to have PNG call ZLIB call PNG,
338 //    we require PNG read all the IDATs and combine them into a single
339 //    memory buffer
340 
341 struct zbuf
342 {
343    const(ubyte) *zbuffer;
344    const(ubyte) *zbuffer_end;
345    int num_bits;
346    uint code_buffer;
347 
348    ubyte *zout;
349    ubyte *zout_start;
350    ubyte *zout_end;
351    int   z_expandable;
352 
353    zhuffman z_length, z_distance;
354 } ;
355 
356 int zget8(zbuf *z)
357 {
358    if (z.zbuffer >= z.zbuffer_end) return 0;
359    return *z.zbuffer++;
360 }
361 
362 void fill_bits(zbuf *z)
363 {
364    do {
365       assert(z.code_buffer < (1U << z.num_bits));
366       z.code_buffer |= zget8(z) << z.num_bits;
367       z.num_bits += 8;
368    } while (z.num_bits <= 24);
369 }
370 
371 uint zreceive(zbuf *z, int n)
372 {
373    uint k;
374    if (z.num_bits < n) fill_bits(z);
375    k = z.code_buffer & ((1 << n) - 1);
376    z.code_buffer >>= n;
377    z.num_bits -= n;
378    return k;
379 }
380 
381 int zhuffman_decode(zbuf *a, zhuffman *z)
382 {
383    int b,s,k;
384    if (a.num_bits < 16) fill_bits(a);
385    b = z.fast[a.code_buffer & ZFAST_MASK];
386    if (b < 0xffff) {
387       s = z.size[b];
388       a.code_buffer >>= s;
389       a.num_bits -= s;
390       return z.value[b];
391    }
392 
393    // not resolved by fast table, so compute it the slow way
394    // use jpeg approach, which requires MSbits at top
395    k = bit_reverse(a.code_buffer, 16);
396    for (s=ZFAST_BITS+1; ; ++s)
397       if (k < z.maxcode[s])
398          break;
399    if (s == 16) return -1; // invalid code!
400    // code size is s, so:
401    b = (k >> (16-s)) - z.firstcode[s] + z.firstsymbol[s];
402    assert(z.size[b] == s);
403    a.code_buffer >>= s;
404    a.num_bits -= s;
405    return z.value[b];
406 }
407 
408 int expand(zbuf *z, int n)  // need to make room for n bytes
409 {
410    ubyte *q;
411    int cur, limit;
412    if (!z.z_expandable)
413       assert(false, "Output buffer limit, corrupt PNG");
414    cur   = cast(int) (z.zout     - z.zout_start);
415    limit = cast(int) (z.zout_end - z.zout_start);
416    while (cur + n > limit)
417       limit *= 2;
418    q = cast(ubyte*) realloc(z.zout_start, limit);
419    if (q == null)
420       assert(false, "Out of memory");
421    z.zout_start = q;
422    z.zout       = q + cur;
423    z.zout_end   = q + limit;
424    return 1;
425 }
426 
427 static immutable int[31] length_base = [
428    3,4,5,6,7,8,9,10,11,13,
429    15,17,19,23,27,31,35,43,51,59,
430    67,83,99,115,131,163,195,227,258,0,0 ];
431 
432 static immutable int[31] length_extra =
433 [ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 ];
434 
435 static immutable int[32] dist_base = [ 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
436 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0];
437 
438 static immutable int[32] dist_extra =
439 [ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13];
440 
441 int parse_huffman_block(zbuf *a)
442 {
443    for(;;) {
444       int z = zhuffman_decode(a, &a.z_length);
445       if (z < 256) {
446          if (z < 0)
447              assert(false, "Bad Huffman code, corrupt PNG");
448          if (a.zout >= a.zout_end) if (!expand(a, 1)) return 0;
449          *a.zout++ = cast(ubyte) z;
450       } else {
451          ubyte *p;
452          int len,dist;
453          if (z == 256) return 1;
454          z -= 257;
455          len = length_base[z];
456          if (length_extra[z]) len += zreceive(a, length_extra[z]);
457          z = zhuffman_decode(a, &a.z_distance);
458          if (z < 0) assert(false, "Bad Huffman code, corrupt PNG");
459          dist = dist_base[z];
460          if (dist_extra[z]) dist += zreceive(a, dist_extra[z]);
461          if (a.zout - a.zout_start < dist) assert(false, "Bad dist, corrupt PNG");
462          if (a.zout + len > a.zout_end) if (!expand(a, len)) return 0;
463          p = a.zout - dist;
464          while (len--)
465             *a.zout++ = *p++;
466       }
467    }
468 }
469 
470 int compute_huffman_codes(zbuf *a)
471 {
472    static immutable ubyte[19] length_dezigzag = [ 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 ];
473    zhuffman z_codelength;
474    ubyte[286+32+137] lencodes;//padding for maximum single op
475    ubyte[19] codelength_sizes;
476    int i,n;
477 
478    int hlit  = zreceive(a,5) + 257;
479    int hdist = zreceive(a,5) + 1;
480    int hclen = zreceive(a,4) + 4;
481 
482    memset(codelength_sizes.ptr, 0, codelength_sizes.sizeof);
483    for (i=0; i < hclen; ++i) {
484       int s = zreceive(a,3);
485       codelength_sizes[length_dezigzag[i]] = cast(ubyte) s;
486    }
487    if (!zbuild_huffman(&z_codelength, codelength_sizes.ptr, 19)) return 0;
488 
489    n = 0;
490    while (n < hlit + hdist) {
491       int c = zhuffman_decode(a, &z_codelength);
492       assert(c >= 0 && c < 19);
493       if (c < 16)
494          lencodes[n++] = cast(ubyte) c;
495       else if (c == 16) {
496          c = zreceive(a,2)+3;
497          memset(lencodes.ptr+n, lencodes[n-1], c);
498          n += c;
499       } else if (c == 17) {
500          c = zreceive(a,3)+3;
501          memset(lencodes.ptr+n, 0, c);
502          n += c;
503       } else {
504          assert(c == 18);
505          c = zreceive(a,7)+11;
506          memset(lencodes.ptr+n, 0, c);
507          n += c;
508       }
509    }
510    if (n != hlit+hdist) assert(false, "Bad codelengths, corrupt PNG");
511    if (!zbuild_huffman(&a.z_length, lencodes.ptr, hlit)) return 0;
512    if (!zbuild_huffman(&a.z_distance, lencodes.ptr+hlit, hdist)) return 0;
513    return 1;
514 }
515 
516 int parse_uncompressed_block(zbuf *a)
517 {
518    ubyte[4] header;
519    int len,nlen,k;
520    if (a.num_bits & 7)
521       zreceive(a, a.num_bits & 7); // discard
522    // drain the bit-packed data into header
523    k = 0;
524    while (a.num_bits > 0) {
525       header[k++] = cast(ubyte) (a.code_buffer & 255); // wtf this warns?
526       a.code_buffer >>= 8;
527       a.num_bits -= 8;
528    }
529    assert(a.num_bits == 0);
530    // now fill header the normal way
531    while (k < 4)
532       header[k++] = cast(ubyte) zget8(a);
533    len  = header[1] * 256 + header[0];
534    nlen = header[3] * 256 + header[2];
535    if (nlen != (len ^ 0xffff)) assert(false, "Zlib corrupt, corrupt PNG");
536    if (a.zbuffer + len > a.zbuffer_end) assert(false, "Read past buffer, corrupt PNG");
537    if (a.zout + len > a.zout_end)
538       if (!expand(a, len)) return 0;
539    memcpy(a.zout, a.zbuffer, len);
540    a.zbuffer += len;
541    a.zout += len;
542    return 1;
543 }
544 
545 int parse_zlib_header(zbuf *a)
546 {
547    int cmf   = zget8(a);
548    int cm    = cmf & 15;
549    /* int cinfo = cmf >> 4; */
550    int flg   = zget8(a);
551    if ((cmf*256+flg) % 31 != 0) assert(false, "Bad zlib header, corrupt PNG"); // zlib spec
552    if (flg & 32) assert(false, "No preset dict, corrupt PNG"); // preset dictionary not allowed in png
553    if (cm != 8) assert(false, "Bad compression, corrupt PNG");  // DEFLATE required for png
554    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
555    return 1;
556 }
557 
558 // MAYDO: should statically initialize these for optimal thread safety
559 __gshared ubyte[288] default_length;
560 __gshared ubyte[32] default_distance;
561 
562 void init_defaults()
563 {
564    int i;   // use <= to match clearly with spec
565    for (i=0; i <= 143; ++i)     default_length[i]   = 8;
566    for (   ; i <= 255; ++i)     default_length[i]   = 9;
567    for (   ; i <= 279; ++i)     default_length[i]   = 7;
568    for (   ; i <= 287; ++i)     default_length[i]   = 8;
569 
570    for (i=0; i <=  31; ++i)     default_distance[i] = 5;
571 }
572 
573 __gshared int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead
574 int parse_zlib(zbuf *a, int parse_header)
575 {
576    int final_, type;
577    if (parse_header)
578       if (!parse_zlib_header(a)) return 0;
579    a.num_bits = 0;
580    a.code_buffer = 0;
581    do {
582       final_ = zreceive(a,1);
583       type = zreceive(a,2);
584       if (type == 0) {
585          if (!parse_uncompressed_block(a)) return 0;
586       } else if (type == 3) {
587          return 0;
588       } else {
589          if (type == 1) {
590             // use fixed code lengths
591             if (!default_distance[31]) init_defaults();
592             if (!zbuild_huffman(&a.z_length  , default_length.ptr  , 288)) return 0;
593             if (!zbuild_huffman(&a.z_distance, default_distance.ptr,  32)) return 0;
594          } else {
595             if (!compute_huffman_codes(a)) return 0;
596          }
597          if (!parse_huffman_block(a)) return 0;
598       }
599       if (stbi_png_partial && a.zout - a.zout_start > 65536)
600          break;
601    } while (!final_);
602    return 1;
603 }
604 
605 int do_zlib(zbuf *a, ubyte *obuf, int olen, int exp, int parse_header)
606 {
607    a.zout_start = obuf;
608    a.zout       = obuf;
609    a.zout_end   = obuf + olen;
610    a.z_expandable = exp;
611 
612    return parse_zlib(a, parse_header);
613 }
614 
615 ubyte *stbi_zlib_decode_malloc_guesssize(const(ubyte) *buffer, int len, int initial_size, int *outlen)
616 {
617    zbuf a;
618    ubyte *p = cast(ubyte*) malloc(initial_size);
619    if (p == null) return null;
620    a.zbuffer = buffer;
621    a.zbuffer_end = buffer + len;
622    if (do_zlib(&a, p, initial_size, 1, 1)) {
623       if (outlen) *outlen = cast(int) (a.zout - a.zout_start);
624       return a.zout_start;
625    } else {
626       free(a.zout_start);
627       return null;
628    }
629 }
630 
631 ubyte *stbi_zlib_decode_malloc(const(ubyte) *buffer, int len, int *outlen)
632 {
633    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
634 }
635 
636 ubyte *stbi_zlib_decode_malloc_guesssize_headerflag(const(ubyte) *buffer, int len, int initial_size, int *outlen, int parse_header)
637 {
638    zbuf a;
639    ubyte *p = cast(ubyte*) malloc(initial_size);
640    if (p == null) return null;
641    a.zbuffer = buffer;
642    a.zbuffer_end = buffer + len;
643    if (do_zlib(&a, p, initial_size, 1, parse_header)) {
644       if (outlen) *outlen = cast(int) (a.zout - a.zout_start);
645       return a.zout_start;
646    } else {
647       free(a.zout_start);
648       return null;
649    }
650 }
651 
652 int stbi_zlib_decode_buffer(ubyte* obuffer, int olen, const(ubyte)* ibuffer, int ilen)
653 {
654    zbuf a;
655    a.zbuffer = ibuffer;
656    a.zbuffer_end = ibuffer + ilen;
657    if (do_zlib(&a, obuffer, olen, 0, 1))
658       return cast(int) (a.zout - a.zout_start);
659    else
660       return -1;
661 }
662 
663 ubyte *stbi_zlib_decode_noheader_malloc(const(ubyte) *buffer, int len, int *outlen)
664 {
665    zbuf a;
666    ubyte *p = cast(ubyte*) malloc(16384);
667    if (p == null) return null;
668    a.zbuffer = buffer;
669    a.zbuffer_end = buffer+len;
670    if (do_zlib(&a, p, 16384, 1, 0)) {
671       if (outlen) *outlen = cast(int) (a.zout - a.zout_start);
672       return a.zout_start;
673    } else {
674       free(a.zout_start);
675       return null;
676    }
677 }
678 
679 int stbi_zlib_decode_noheader_buffer(ubyte *obuffer, int olen, const(ubyte) *ibuffer, int ilen)
680 {
681    zbuf a;
682    a.zbuffer = ibuffer;
683    a.zbuffer_end = ibuffer + ilen;
684    if (do_zlib(&a, obuffer, olen, 0, 0))
685       return cast(int) (a.zout - a.zout_start);
686    else
687       return -1;
688 }
689 
690 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
691 //    simple implementation
692 //      - only 8-bit samples
693 //      - no CRC checking
694 //      - allocates lots of intermediate memory
695 //        - avoids problem of streaming data between subsystems
696 //        - avoids explicit window management
697 //    performance
698 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
699 
700 
701 struct chunk
702 {
703    uint length;
704    uint type;
705 }
706 
707 uint PNG_TYPE(ubyte a, ubyte b, ubyte c, ubyte d)
708 {
709    return (a << 24) + (b << 16) + (c << 8) + d;
710 }
711 
712 chunk get_chunk_header(stbi *s)
713 {
714    chunk c;
715    c.length = get32(s);
716    c.type   = get32(s);
717    return c;
718 }
719 
720 static int check_png_header(stbi *s)
721 {
722    static immutable ubyte[8] png_sig = [ 137, 80, 78, 71, 13, 10, 26, 10 ];
723    for (int i = 0; i < 8; ++i)
724    {
725        ubyte headerByte = get8u(s);
726        ubyte expected = png_sig[i];
727        if (headerByte != expected)
728            assert(false, "Bad PNG sig, not a PNG");
729    }
730    return 1;
731 }
732 
733 struct png
734 {
735    stbi *s;
736    ubyte *idata;
737    ubyte *expanded;
738    ubyte *out_;
739 }
740 
741 
742 enum : int
743 {
744    F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4,
745    F_avg_first, F_paeth_first
746 }
747 
748 static immutable ubyte[5] first_row_filter =
749 [
750    F_none, F_sub, F_none, F_avg_first, F_paeth_first
751 ];
752 
753 static int paeth(int a, int b, int c)
754 {
755    int p = a + b - c;
756    int pa = abs(p-a);
757    int pb = abs(p-b);
758    int pc = abs(p-c);
759    if (pa <= pb && pa <= pc) return a;
760    if (pb <= pc) return b;
761    return c;
762 }
763 
764 // create the png data from post-deflated data
765 static int create_png_image_raw(png *a, ubyte *raw, uint raw_len, int out_n, uint x, uint y)
766 {
767    stbi *s = a.s;
768    uint i,j,stride = x*out_n;
769    int k;
770    int img_n = s.img_n; // copy it into a local for later
771    assert(out_n == s.img_n || out_n == s.img_n+1);
772    if (stbi_png_partial) y = 1;
773    a.out_ = cast(ubyte*) malloc(x * y * out_n);
774    if (!a.out_) assert(false, "Out of memory");
775    if (!stbi_png_partial) {
776       if (s.img_x == x && s.img_y == y) {
777          if (raw_len != (img_n * x + 1) * y) assert(false, "Not enough pixels, corrupt PNG");
778       } else { // interlaced:
779          if (raw_len < (img_n * x + 1) * y) assert(false, "Not enough pixels, corrupt PNG");
780       }
781    }
782    for (j=0; j < y; ++j) {
783       ubyte *cur = a.out_ + stride*j;
784       ubyte *prior = cur - stride;
785       int filter = *raw++;
786       if (filter > 4) assert(false, "Invalid filter, corrupt PNG");
787       // if first row, use special filter that doesn't sample previous row
788       if (j == 0) filter = first_row_filter[filter];
789       // handle first pixel explicitly
790       for (k=0; k < img_n; ++k) {
791          switch (filter) {
792             case F_none       : cur[k] = raw[k]; break;
793             case F_sub        : cur[k] = raw[k]; break;
794             case F_up         : cur[k] = cast(ubyte)(raw[k] + prior[k]); break;
795             case F_avg        : cur[k] = cast(ubyte)(raw[k] + (prior[k]>>1)); break;
796             case F_paeth      : cur[k] = cast(ubyte) (raw[k] + paeth(0,prior[k],0)); break;
797             case F_avg_first  : cur[k] = raw[k]; break;
798             case F_paeth_first: cur[k] = raw[k]; break;
799             default: break;
800          }
801       }
802       if (img_n != out_n) cur[img_n] = 255;
803       raw += img_n;
804       cur += out_n;
805       prior += out_n;
806       // this is a little gross, so that we don't switch per-pixel or per-component
807       if (img_n == out_n) {
808 
809          for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n)
810             for (k=0; k < img_n; ++k)
811             {
812                switch (filter) {
813                   case F_none:  cur[k] = raw[k]; break;
814                   case F_sub:   cur[k] = cast(ubyte)(raw[k] + cur[k-img_n]); break;
815                   case F_up:    cur[k] = cast(ubyte)(raw[k] + prior[k]); break;
816                   case F_avg:   cur[k] = cast(ubyte)(raw[k] + ((prior[k] + cur[k-img_n])>>1)); break;
817                   case F_paeth:  cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break;
818                   case F_avg_first:    cur[k] = cast(ubyte)(raw[k] + (cur[k-img_n] >> 1)); break;
819                   case F_paeth_first:  cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-img_n],0,0)); break;
820                   default: break;
821                }
822             }
823       } else {
824          assert(img_n+1 == out_n);
825 
826          for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n)
827             for (k=0; k < img_n; ++k)
828             {
829                switch (filter) {
830                   case F_none:  cur[k] = raw[k]; break;
831                   case F_sub:   cur[k] = cast(ubyte)(raw[k] + cur[k-out_n]); break;
832                   case F_up:    cur[k] = cast(ubyte)(raw[k] + prior[k]); break;
833                   case F_avg:   cur[k] = cast(ubyte)(raw[k] + ((prior[k] + cur[k-out_n])>>1)); break;
834                   case F_paeth:  cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break;
835                   case F_avg_first:    cur[k] = cast(ubyte)(raw[k] + (cur[k-out_n] >> 1)); break;
836                   case F_paeth_first:  cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-out_n],0,0)); break;
837                   default: break;
838                }
839             }
840       }
841    }
842    return 1;
843 }
844 
845 int create_png_image(png *a, ubyte *raw, uint raw_len, int out_n, int interlaced)
846 {
847    ubyte *final_;
848    int p;
849    int save;
850    if (!interlaced)
851       return create_png_image_raw(a, raw, raw_len, out_n, a.s.img_x, a.s.img_y);
852    save = stbi_png_partial;
853    stbi_png_partial = 0;
854 
855    // de-interlacing
856    final_ = cast(ubyte*) malloc(a.s.img_x * a.s.img_y * out_n);
857    for (p=0; p < 7; ++p) {
858       static immutable int[7] xorig = [ 0,4,0,2,0,1,0 ];
859       static immutable int[7] yorig = [ 0,0,4,0,2,0,1 ];
860       static immutable int[7] xspc = [ 8,8,4,4,2,2,1 ];
861       static immutable int[7] yspc = [ 8,8,8,4,4,2,2 ];
862       int i,j,x,y;
863       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
864       x = (a.s.img_x - xorig[p] + xspc[p]-1) / xspc[p];
865       y = (a.s.img_y - yorig[p] + yspc[p]-1) / yspc[p];
866       if (x && y) {
867          if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) {
868             free(final_);
869             return 0;
870          }
871          for (j=0; j < y; ++j)
872             for (i=0; i < x; ++i)
873                memcpy(final_ + (j*yspc[p]+yorig[p])*a.s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n,
874                       a.out_ + (j*x+i)*out_n, out_n);
875          free(a.out_);
876          raw += (x*out_n+1)*y;
877          raw_len -= (x*out_n+1)*y;
878       }
879    }
880    a.out_ = final_;
881 
882    stbi_png_partial = save;
883    return 1;
884 }
885 
886 static int compute_transparency(png *z, ubyte[3] tc, int out_n)
887 {
888    stbi *s = z.s;
889    uint i, pixel_count = s.img_x * s.img_y;
890    ubyte *p = z.out_;
891 
892    // compute color-based transparency, assuming we've
893    // already got 255 as the alpha value in the output
894    assert(out_n == 2 || out_n == 4);
895 
896    if (out_n == 2) {
897       for (i=0; i < pixel_count; ++i) {
898          p[1] = (p[0] == tc[0] ? 0 : 255);
899          p += 2;
900       }
901    } else {
902       for (i=0; i < pixel_count; ++i) {
903          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
904             p[3] = 0;
905          p += 4;
906       }
907    }
908    return 1;
909 }
910 
911 int expand_palette(png *a, ubyte *palette, int len, int pal_img_n)
912 {
913    uint i, pixel_count = a.s.img_x * a.s.img_y;
914    ubyte *p;
915    ubyte *temp_out;
916    ubyte *orig = a.out_;
917 
918    p = cast(ubyte*) malloc(pixel_count * pal_img_n);
919    if (p == null)
920       assert(false, "Out of memory");
921 
922    // between here and free(out) below, exitting would leak
923    temp_out = p;
924 
925    if (pal_img_n == 3) {
926       for (i=0; i < pixel_count; ++i) {
927          int n = orig[i]*4;
928          p[0] = palette[n  ];
929          p[1] = palette[n+1];
930          p[2] = palette[n+2];
931          p += 3;
932       }
933    } else {
934       for (i=0; i < pixel_count; ++i) {
935          int n = orig[i]*4;
936          p[0] = palette[n  ];
937          p[1] = palette[n+1];
938          p[2] = palette[n+2];
939          p[3] = palette[n+3];
940          p += 4;
941       }
942    }
943    free(a.out_);
944    a.out_ = temp_out;
945 
946    return 1;
947 }
948 
949 int parse_png_file(png *z, int scan, int req_comp)
950 {
951    ubyte[1024] palette;
952    ubyte pal_img_n=0;
953    ubyte has_trans=0;
954    ubyte[3] tc;
955    uint ioff=0, idata_limit=0, i, pal_len=0;
956    int first=1,k,interlace=0;
957    stbi *s = z.s;
958 
959    z.expanded = null;
960    z.idata = null;
961    z.out_ = null;
962 
963    if (!check_png_header(s)) return 0;
964 
965    if (scan == SCAN_type) return 1;
966 
967    for (;;) {
968       chunk c = get_chunk_header(s);
969       switch (c.type) {
970          case PNG_TYPE('I','H','D','R'): {
971             int depth,color,comp,filter;
972             if (!first) assert(false, "Multiple IHDR, corrupt PNG");
973             first = 0;
974             if (c.length != 13) assert(false, "Bad IHDR len, corrupt PNG");
975             s.img_x = get32(s); if (s.img_x > (1 << 24)) assert(false, "Very large image (corrupt?)");
976             s.img_y = get32(s); if (s.img_y > (1 << 24)) assert(false, "Very large image (corrupt?)");
977             depth = get8(s);  if (depth != 8)        assert(false, "8bit only, PNG not supported: 8-bit only");
978             color = get8(s);  if (color > 6)         assert(false, "Bad ctype, corrupt PNG");
979             if (color == 3) pal_img_n = 3; else if (color & 1) assert(false, "Bad ctype, corrupt PNG");
980             comp  = get8(s);  if (comp) assert(false, "Bad comp method, corrupt PNG");
981             filter= get8(s);  if (filter) assert(false, "Bad filter method, corrupt PNG");
982             interlace = get8(s); if (interlace>1) assert(false, "Bad interlace method, corrupt PNG");
983             if (!s.img_x || !s.img_y) assert(false, "0-pixel image, corrupt PNG");
984             if (!pal_img_n) {
985                s.img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
986                if ((1 << 30) / s.img_x / s.img_n < s.img_y) assert(false, "Image too large to decode");
987                if (scan == SCAN_header) return 1;
988             } else {
989                // if paletted, then pal_n is our final components, and
990                // img_n is # components to decompress/filter.
991                s.img_n = 1;
992                if ((1 << 30) / s.img_x / 4 < s.img_y) assert(false, "Too large, corrupt PNG");
993                // if SCAN_header, have to scan to see if we have a tRNS
994             }
995             break;
996          }
997 
998          case PNG_TYPE('P','L','T','E'):  {
999             if (first) assert(false, "first not IHDR, corrupt PNG");
1000             if (c.length > 256*3) assert(false, "invalid PLTE, corrupt PNG");
1001             pal_len = c.length / 3;
1002             if (pal_len * 3 != c.length) assert(false, "invalid PLTE, corrupt PNG");
1003             for (i=0; i < pal_len; ++i) {
1004                palette[i*4+0] = get8u(s);
1005                palette[i*4+1] = get8u(s);
1006                palette[i*4+2] = get8u(s);
1007                palette[i*4+3] = 255;
1008             }
1009             break;
1010          }
1011 
1012          case PNG_TYPE('t','R','N','S'): {
1013             if (first) assert(false, "first not IHDR, cCorrupt PNG");
1014             if (z.idata) assert(false, "tRNS after IDAT, corrupt PNG");
1015             if (pal_img_n) {
1016                if (scan == SCAN_header) { s.img_n = 4; return 1; }
1017                if (pal_len == 0) assert(false, "tRNS before PLTE, corrupt PNG");
1018                if (c.length > pal_len) assert(false, "bad tRNS len, corrupt PNG");
1019                pal_img_n = 4;
1020                for (i=0; i < c.length; ++i)
1021                   palette[i*4+3] = get8u(s);
1022             } else {
1023                if (!(s.img_n & 1)) assert(false, "tRNS with alpha, corrupt PNG");
1024                if (c.length != cast(uint) s.img_n*2) assert(false, "bad tRNS len, corrupt PNG");
1025                has_trans = 1;
1026                for (k=0; k < s.img_n; ++k)
1027                   tc[k] = cast(ubyte) get16(s); // non 8-bit images will be larger
1028             }
1029             break;
1030          }
1031 
1032          case PNG_TYPE('I','D','A','T'): {
1033             if (first) assert(false, "first not IHDR, corrupt PNG");
1034             if (pal_img_n && !pal_len) assert(false, "no PLTE, corrupt PNG");
1035             if (scan == SCAN_header) { s.img_n = pal_img_n; return 1; }
1036             if (ioff + c.length > idata_limit) {
1037                ubyte *p;
1038                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
1039                while (ioff + c.length > idata_limit)
1040                   idata_limit *= 2;
1041                p = cast(ubyte*) realloc(z.idata, idata_limit); if (p == null) assert(false, "outofmem, cOut of memory");
1042                z.idata = p;
1043             }
1044             if (!getn(s, z.idata+ioff,c.length)) assert(false, "outofdata, corrupt PNG");
1045             ioff += c.length;
1046             break;
1047          }
1048 
1049          case PNG_TYPE('I','E','N','D'): {
1050             uint raw_len;
1051             if (first) assert(false, "first not IHDR, corrupt PNG");
1052             if (scan != SCAN_load) return 1;
1053             if (z.idata == null) assert(false, "no IDAT, corrupt PNG");
1054             z.expanded = stbi_zlib_decode_malloc_guesssize_headerflag(z.idata, ioff, 16384, cast(int *) &raw_len, 1);
1055             if (z.expanded == null) return 0; // zlib should set error
1056             free(z.idata); z.idata = null;
1057             if ((req_comp == s.img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
1058                s.img_out_n = s.img_n+1;
1059             else
1060                s.img_out_n = s.img_n;
1061             if (!create_png_image(z, z.expanded, raw_len, s.img_out_n, interlace)) return 0;
1062             if (has_trans)
1063                if (!compute_transparency(z, tc, s.img_out_n)) return 0;
1064             if (pal_img_n) {
1065                // pal_img_n == 3 or 4
1066                s.img_n = pal_img_n; // record the actual colors we had
1067                s.img_out_n = pal_img_n;
1068                if (req_comp >= 3) s.img_out_n = req_comp;
1069                if (!expand_palette(z, palette.ptr, pal_len, s.img_out_n))
1070                   return 0;
1071             }
1072             free(z.expanded); z.expanded = null;
1073             return 1;
1074          }
1075 
1076          default:
1077             // if critical, fail
1078             if (first) assert(false, "first not IHDR, corrupt PNG");
1079             if ((c.type & (1 << 29)) == 0) {
1080                assert(false, "PNG not supported: unknown chunk type");
1081             }
1082             skip(s, c.length);
1083             break;
1084       }
1085       // end of chunk, read and skip CRC
1086       get32(s);
1087    }
1088 }
1089 
1090 ubyte *do_png(png *p, int *x, int *y, int *n, int req_comp)
1091 {
1092    ubyte *result=null;
1093    if (req_comp < 0 || req_comp > 4)
1094       assert(false, "Internal error: bad req_comp");
1095    if (parse_png_file(p, SCAN_load, req_comp)) {
1096       result = p.out_;
1097       p.out_ = null;
1098       if (req_comp && req_comp != p.s.img_out_n) {
1099          result = convert_format(result, p.s.img_out_n, req_comp, p.s.img_x, p.s.img_y);
1100          p.s.img_out_n = req_comp;
1101          if (result == null) return result;
1102       }
1103       *x = p.s.img_x;
1104       *y = p.s.img_y;
1105       if (n) *n = p.s.img_n;
1106    }
1107    free(p.out_);      p.out_    = null;
1108    free(p.expanded); p.expanded = null;
1109    free(p.idata);    p.idata    = null;
1110 
1111    return result;
1112 }
1113 
1114 ubyte *stbi_png_load(stbi *s, int *x, int *y, int *comp, int req_comp)
1115 {
1116    png p;
1117    p.s = s;
1118    return do_png(&p, x,y,comp,req_comp);
1119 }