1 // jpgd.h - C++ class for JPEG decompression.
2 // Rich Geldreich <richgel99@gmail.com>
3 // Alex Evans: Linear memory allocator (taken from jpge.h).
4 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
5 // D translation by Ketmar // Invisible Vector
6 //
7 // This is free and unencumbered software released into the public domain.
8 //
9 // Anyone is free to copy, modify, publish, use, compile, sell, or
10 // distribute this software, either in source code form or as a compiled
11 // binary, for any purpose, commercial or non-commercial, and by any
12 // means.
13 //
14 // In jurisdictions that recognize copyright laws, the author or authors
15 // of this software dedicate any and all copyright interest in the
16 // software to the public domain. We make this dedication for the benefit
17 // of the public at large and to the detriment of our heirs and
18 // successors. We intend this dedication to be an overt act of
19 // relinquishment in perpetuity of all present and future rights to this
20 // software under copyright law.
21 //
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 // OTHER DEALINGS IN THE SOFTWARE.
29 //
30 // For more information, please refer to <http://unlicense.org/>
31 //
32 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
33 //
34 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
35 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
36 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
37 /**
38  * Loads a JPEG image from a memory buffer or a file.
39  * req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
40  * On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
41  * Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
42  */
43 module dplug.graphics.jpegload;
44 
45 nothrow:
46 @nogc:
47 
48 // arsd.color stripped down
49 class TrueColorImage
50 {
51 nothrow:
52 @nogc:
53 	//ubyte[] data; // stored as rgba quads, upper left to right to bottom
54 	/// .
55 	struct Data {
56 		ubyte[] bytes; /// the data as rgba bytes. Stored left to right, top to bottom, no padding.
57 		// the union is no good because the length of the struct is wrong!
58 	}
59 
60 	/// .
61 	Data imageData;
62 	alias imageData.bytes data;
63 
64 	int _width;
65 	int _height;
66 
67 	/// .
68 	int width() const { return _width; }
69 	///.
70 	int height() const { return _height; }
71 
72 	/// Creates with existing data. The data pointer is stored here.
73 	this(int w, int h, ubyte[] data) {
74 		_width = w;
75 		_height = h;
76 		assert(data.length == w * h * 4);
77 		imageData.bytes = data;
78 	}
79 }
80 
81 
82 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
83 // This is slower, but results in higher quality on images with highly saturated colors.
84 //version = JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING;
85 
86 /// Input stream interface.
87 /// This delegate is called when the internal input buffer is empty.
88 /// Parameters:
89 ///   pBuf - input buffer
90 ///   max_bytes_to_read - maximum bytes that can be written to pBuf
91 ///   pEOF_flag - set this to true if at end of stream (no more bytes remaining)
92 ///   Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
93 ///   Notes: This delegate will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
94 alias JpegStreamReadFunc = int delegate (void* pBuf, int max_bytes_to_read, bool* pEOF_flag);
95 
96 
97 // ////////////////////////////////////////////////////////////////////////// //
98 private:
99 void *jpgd_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
100 void jpgd_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
101 
102 // Success/failure error codes.
103 alias jpgd_status = int;
104 enum /*jpgd_status*/ {
105   JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
106   JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
107   JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
108   JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
109   JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
110   JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
111   JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
112   JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
113   JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM,
114 }
115 
116 enum {
117   JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
118   JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384,
119 }
120 
121 // DCT coefficients are stored in this sequence.
122 static immutable int[64] g_ZAG = [  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
123 
124 alias JPEG_MARKER = int;
125 enum /*JPEG_MARKER*/ {
126   M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
127   M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
128   M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
129   M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
130   M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0,
131 }
132 
133 alias JPEG_SUBSAMPLING = int;
134 enum /*JPEG_SUBSAMPLING*/ { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
135 
136 enum CONST_BITS = 13;
137 enum PASS1_BITS = 2;
138 enum SCALEDONE = cast(int)1;
139 
140 enum FIX_0_298631336 = cast(int)2446;  /* FIX(0.298631336) */
141 enum FIX_0_390180644 = cast(int)3196;  /* FIX(0.390180644) */
142 enum FIX_0_541196100 = cast(int)4433;  /* FIX(0.541196100) */
143 enum FIX_0_765366865 = cast(int)6270;  /* FIX(0.765366865) */
144 enum FIX_0_899976223 = cast(int)7373;  /* FIX(0.899976223) */
145 enum FIX_1_175875602 = cast(int)9633;  /* FIX(1.175875602) */
146 enum FIX_1_501321110 = cast(int)12299; /* FIX(1.501321110) */
147 enum FIX_1_847759065 = cast(int)15137; /* FIX(1.847759065) */
148 enum FIX_1_961570560 = cast(int)16069; /* FIX(1.961570560) */
149 enum FIX_2_053119869 = cast(int)16819; /* FIX(2.053119869) */
150 enum FIX_2_562915447 = cast(int)20995; /* FIX(2.562915447) */
151 enum FIX_3_072711026 = cast(int)25172; /* FIX(3.072711026) */
152 
153 int DESCALE() (int x, int n) { pragma(inline, true); return (((x) + (SCALEDONE << ((n)-1))) >> (n)); }
154 int DESCALE_ZEROSHIFT() (int x, int n) { pragma(inline, true); return (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)); }
155 ubyte CLAMP() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i) >> 31) & 0xFF) : i); }
156 
157 
158 // Compiler creates a fast path 1D IDCT for X non-zero columns
159 struct Row(int NONZERO_COLS) {
160 pure nothrow @trusted @nogc:
161   static void idct(int* pTemp, const(jpeg_decoder.jpgd_block_t)* pSrc) {
162     static if (NONZERO_COLS == 0) {
163       // nothing
164     } else static if (NONZERO_COLS == 1) {
165       immutable int dcval = (pSrc[0] << PASS1_BITS);
166       pTemp[0] = dcval;
167       pTemp[1] = dcval;
168       pTemp[2] = dcval;
169       pTemp[3] = dcval;
170       pTemp[4] = dcval;
171       pTemp[5] = dcval;
172       pTemp[6] = dcval;
173       pTemp[7] = dcval;
174     } else {
175       // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
176       //#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
177       template ACCESS_COL(int x) {
178         static if (x < NONZERO_COLS) enum ACCESS_COL = "cast(int)pSrc["~x.stringof~"]"; else enum ACCESS_COL = "0";
179       }
180 
181       immutable int z2 = mixin(ACCESS_COL!2), z3 = mixin(ACCESS_COL!6);
182 
183       immutable int z1 = (z2 + z3)*FIX_0_541196100;
184       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
185       immutable int tmp3 = z1 + z2*FIX_0_765366865;
186 
187       immutable int tmp0 = (mixin(ACCESS_COL!0) + mixin(ACCESS_COL!4)) << CONST_BITS;
188       immutable int tmp1 = (mixin(ACCESS_COL!0) - mixin(ACCESS_COL!4)) << CONST_BITS;
189 
190       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
191 
192       immutable int atmp0 = mixin(ACCESS_COL!7), atmp1 = mixin(ACCESS_COL!5), atmp2 = mixin(ACCESS_COL!3), atmp3 = mixin(ACCESS_COL!1);
193 
194       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
195       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
196 
197       immutable int az1 = bz1*(-FIX_0_899976223);
198       immutable int az2 = bz2*(-FIX_2_562915447);
199       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
200       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
201 
202       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
203       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
204       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
205       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
206 
207       pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
208       pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
209       pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
210       pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
211       pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
212       pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
213       pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
214       pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
215     }
216   }
217 }
218 
219 
220 // Compiler creates a fast path 1D IDCT for X non-zero rows
221 struct Col (int NONZERO_ROWS) {
222 pure nothrow @trusted @nogc:
223   static void idct(ubyte* pDst_ptr, const(int)* pTemp) {
224     static assert(NONZERO_ROWS > 0);
225     static if (NONZERO_ROWS == 1) {
226       int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
227       immutable ubyte dcval_clamped = cast(ubyte)CLAMP(dcval);
228       pDst_ptr[0*8] = dcval_clamped;
229       pDst_ptr[1*8] = dcval_clamped;
230       pDst_ptr[2*8] = dcval_clamped;
231       pDst_ptr[3*8] = dcval_clamped;
232       pDst_ptr[4*8] = dcval_clamped;
233       pDst_ptr[5*8] = dcval_clamped;
234       pDst_ptr[6*8] = dcval_clamped;
235       pDst_ptr[7*8] = dcval_clamped;
236     } else {
237       // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
238       //#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
239       template ACCESS_ROW(int x) {
240         static if (x < NONZERO_ROWS) enum ACCESS_ROW = "pTemp["~(x*8).stringof~"]"; else enum ACCESS_ROW = "0";
241       }
242 
243       immutable int z2 = mixin(ACCESS_ROW!2);
244       immutable int z3 = mixin(ACCESS_ROW!6);
245 
246       immutable int z1 = (z2 + z3)*FIX_0_541196100;
247       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
248       immutable int tmp3 = z1 + z2*FIX_0_765366865;
249 
250       immutable int tmp0 = (mixin(ACCESS_ROW!0) + mixin(ACCESS_ROW!4)) << CONST_BITS;
251       immutable int tmp1 = (mixin(ACCESS_ROW!0) - mixin(ACCESS_ROW!4)) << CONST_BITS;
252 
253       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
254 
255       immutable int atmp0 = mixin(ACCESS_ROW!7), atmp1 = mixin(ACCESS_ROW!5), atmp2 = mixin(ACCESS_ROW!3), atmp3 = mixin(ACCESS_ROW!1);
256 
257       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
258       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
259 
260       immutable int az1 = bz1*(-FIX_0_899976223);
261       immutable int az2 = bz2*(-FIX_2_562915447);
262       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
263       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
264 
265       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
266       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
267       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
268       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
269 
270       int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
271       pDst_ptr[8*0] = cast(ubyte)CLAMP(i);
272 
273       i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
274       pDst_ptr[8*7] = cast(ubyte)CLAMP(i);
275 
276       i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
277       pDst_ptr[8*1] = cast(ubyte)CLAMP(i);
278 
279       i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
280       pDst_ptr[8*6] = cast(ubyte)CLAMP(i);
281 
282       i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
283       pDst_ptr[8*2] = cast(ubyte)CLAMP(i);
284 
285       i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
286       pDst_ptr[8*5] = cast(ubyte)CLAMP(i);
287 
288       i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
289       pDst_ptr[8*3] = cast(ubyte)CLAMP(i);
290 
291       i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
292       pDst_ptr[8*4] = cast(ubyte)CLAMP(i);
293     }
294   }
295 }
296 
297 
298 static immutable ubyte[512] s_idct_row_table = [
299   1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
300   4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
301   6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
302   6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
303   8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
304   8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
305   8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
306   8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
307 ];
308 
309 static immutable ubyte[64] s_idct_col_table = [ 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ];
310 
311 void idct() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr, int block_max_zag) {
312   assert(block_max_zag >= 1);
313   assert(block_max_zag <= 64);
314 
315   if (block_max_zag <= 1)
316   {
317     int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
318     k = CLAMP(k);
319     k = k | (k<<8);
320     k = k | (k<<16);
321 
322     for (int i = 8; i > 0; i--)
323     {
324       *cast(int*)&pDst_ptr[0] = k;
325       *cast(int*)&pDst_ptr[4] = k;
326       pDst_ptr += 8;
327     }
328     return;
329   }
330 
331   int[64] temp;
332 
333   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
334   int* pTemp = temp.ptr;
335 
336   const(ubyte)* pRow_tab = &s_idct_row_table.ptr[(block_max_zag - 1) * 8];
337   int i;
338   for (i = 8; i > 0; i--, pRow_tab++)
339   {
340     switch (*pRow_tab)
341     {
342       case 0: Row!(0).idct(pTemp, pSrc); break;
343       case 1: Row!(1).idct(pTemp, pSrc); break;
344       case 2: Row!(2).idct(pTemp, pSrc); break;
345       case 3: Row!(3).idct(pTemp, pSrc); break;
346       case 4: Row!(4).idct(pTemp, pSrc); break;
347       case 5: Row!(5).idct(pTemp, pSrc); break;
348       case 6: Row!(6).idct(pTemp, pSrc); break;
349       case 7: Row!(7).idct(pTemp, pSrc); break;
350       case 8: Row!(8).idct(pTemp, pSrc); break;
351       default: assert(0);
352     }
353 
354     pSrc += 8;
355     pTemp += 8;
356   }
357 
358   pTemp = temp.ptr;
359 
360   immutable int nonzero_rows = s_idct_col_table.ptr[block_max_zag - 1];
361   for (i = 8; i > 0; i--)
362   {
363     switch (nonzero_rows)
364     {
365       case 1: Col!(1).idct(pDst_ptr, pTemp); break;
366       case 2: Col!(2).idct(pDst_ptr, pTemp); break;
367       case 3: Col!(3).idct(pDst_ptr, pTemp); break;
368       case 4: Col!(4).idct(pDst_ptr, pTemp); break;
369       case 5: Col!(5).idct(pDst_ptr, pTemp); break;
370       case 6: Col!(6).idct(pDst_ptr, pTemp); break;
371       case 7: Col!(7).idct(pDst_ptr, pTemp); break;
372       case 8: Col!(8).idct(pDst_ptr, pTemp); break;
373       default: assert(0);
374     }
375 
376     pTemp++;
377     pDst_ptr++;
378   }
379 }
380 
381 void idct_4x4() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr) {
382   int[64] temp;
383   int* pTemp = temp.ptr;
384   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
385 
386   for (int i = 4; i > 0; i--)
387   {
388     Row!(4).idct(pTemp, pSrc);
389     pSrc += 8;
390     pTemp += 8;
391   }
392 
393   pTemp = temp.ptr;
394   for (int i = 8; i > 0; i--)
395   {
396     Col!(4).idct(pDst_ptr, pTemp);
397     pTemp++;
398     pDst_ptr++;
399   }
400 }
401 
402 
403 // ////////////////////////////////////////////////////////////////////////// //
404 struct jpeg_decoder {
405 nothrow:
406 @nogc:
407 
408 private import core.stdc.string : memcpy, memset;
409 private:
410   static auto JPGD_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
411   static auto JPGD_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
412 
413   alias jpgd_quant_t = short;
414   alias jpgd_block_t = short;
415   alias pDecode_block_func = void function (ref jpeg_decoder, int, int, int);
416 
417   static struct huff_tables {
418     bool ac_table;
419     uint[256] look_up;
420     uint[256] look_up2;
421     ubyte[256] code_size;
422     uint[512] tree;
423   }
424 
425   static struct coeff_buf {
426     ubyte* pData;
427     int block_num_x, block_num_y;
428     int block_len_x, block_len_y;
429     int block_size;
430   }
431 
432   static struct mem_block {
433     mem_block* m_pNext;
434     size_t m_used_count;
435     size_t m_size;
436     char[1] m_data;
437   }
438 
439   mem_block* m_pMem_blocks;
440   int m_image_x_size;
441   int m_image_y_size;
442   JpegStreamReadFunc readfn;
443   int m_progressive_flag;
444   ubyte[JPGD_MAX_HUFF_TABLES] m_huff_ac;
445   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_num;      // pointer to number of Huffman codes per bit size
446   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_val;      // pointer to Huffman codes per bit size
447   jpgd_quant_t*[JPGD_MAX_QUANT_TABLES] m_quant; // pointer to quantization tables
448   int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
449   int m_comps_in_frame;                         // # of components in frame
450   int[JPGD_MAX_COMPONENTS] m_comp_h_samp;       // component's horizontal sampling factor
451   int[JPGD_MAX_COMPONENTS] m_comp_v_samp;       // component's vertical sampling factor
452   int[JPGD_MAX_COMPONENTS] m_comp_quant;        // component's quantization table selector
453   int[JPGD_MAX_COMPONENTS] m_comp_ident;        // component's ID
454   int[JPGD_MAX_COMPONENTS] m_comp_h_blocks;
455   int[JPGD_MAX_COMPONENTS] m_comp_v_blocks;
456   int m_comps_in_scan;                          // # of components in scan
457   int[JPGD_MAX_COMPS_IN_SCAN] m_comp_list;      // components in this scan
458   int[JPGD_MAX_COMPONENTS] m_comp_dc_tab;       // component's DC Huffman coding table selector
459   int[JPGD_MAX_COMPONENTS] m_comp_ac_tab;       // component's AC Huffman coding table selector
460   int m_spectral_start;                         // spectral selection start
461   int m_spectral_end;                           // spectral selection end
462   int m_successive_low;                         // successive approximation low
463   int m_successive_high;                        // successive approximation high
464   int m_max_mcu_x_size;                         // MCU's max. X size in pixels
465   int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
466   int m_blocks_per_mcu;
467   int m_max_blocks_per_row;
468   int m_mcus_per_row, m_mcus_per_col;
469   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_org;
470   int m_total_lines_left;                       // total # lines left in image
471   int m_mcu_lines_left;                         // total # lines left in this MCU
472   int m_real_dest_bytes_per_scan_line;
473   int m_dest_bytes_per_scan_line;               // rounded up
474   int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
475   huff_tables*[JPGD_MAX_HUFF_TABLES] m_pHuff_tabs;
476   coeff_buf*[JPGD_MAX_COMPONENTS] m_dc_coeffs;
477   coeff_buf*[JPGD_MAX_COMPONENTS] m_ac_coeffs;
478   int m_eob_run;
479   int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
480   ubyte* m_pIn_buf_ofs;
481   int m_in_buf_left;
482   int m_tem_flag;
483   bool m_eof_flag;
484   ubyte[128] m_in_buf_pad_start;
485   ubyte[JPGD_IN_BUF_SIZE+128] m_in_buf;
486   ubyte[128] m_in_buf_pad_end;
487   int m_bits_left;
488   uint m_bit_buf;
489   int m_restart_interval;
490   int m_restarts_left;
491   int m_next_restart_num;
492   int m_max_mcus_per_row;
493   int m_max_blocks_per_mcu;
494   int m_expanded_blocks_per_mcu;
495   int m_expanded_blocks_per_row;
496   int m_expanded_blocks_per_component;
497   bool m_freq_domain_chroma_upsample;
498   int m_max_mcus_per_col;
499   uint[JPGD_MAX_COMPONENTS] m_last_dc_val;
500   jpgd_block_t* m_pMCU_coefficients;
501   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_block_max_zag;
502   ubyte* m_pSample_buf;
503   int[256] m_crr;
504   int[256] m_cbb;
505   int[256] m_crg;
506   int[256] m_cbg;
507   ubyte* m_pScan_line_0;
508   ubyte* m_pScan_line_1;
509   jpgd_status m_error_code;
510   bool m_ready_flag;
511   int m_total_bytes_read;
512 
513 public:
514   // Inspect `error_code` after constructing to determine if the stream is valid or not. You may look at the `width`, `height`, etc.
515   // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
516   this (JpegStreamReadFunc rfn) { decode_init(rfn); }
517 
518   ~this () { free_all_blocks(); }
519 
520   @disable this (this); // no copies
521 
522   // Call this method after constructing the object to begin decompression.
523   // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
524   int begin_decoding () {
525     if (m_ready_flag) return JPGD_SUCCESS;
526     if (m_error_code) return JPGD_FAILED;
527 
528     decode_start();
529     m_ready_flag = true;
530     return JPGD_SUCCESS;
531   }
532 
533   // Returns the next scan line.
534   // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (`bytes_per_pixel` will return 1).
535   // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and `bytes_per_pixel` will return 4).
536   // Returns JPGD_SUCCESS if a scan line has been returned.
537   // Returns JPGD_DONE if all scan lines have been returned.
538   // Returns JPGD_FAILED if an error occurred. Inspect `error_code` for a more info.
539   int decode (/*const void** */void** pScan_line, uint* pScan_line_len) {
540     if (m_error_code || !m_ready_flag) return JPGD_FAILED;
541     if (m_total_lines_left == 0) return JPGD_DONE;
542 
543       if (m_mcu_lines_left == 0) {
544         if (m_progressive_flag) load_next_row(); else decode_next_row();
545         // Find the EOI marker if that was the last row.
546         if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
547         m_mcu_lines_left = m_max_mcu_y_size;
548       }
549       if (m_freq_domain_chroma_upsample) {
550         expanded_convert();
551         *pScan_line = m_pScan_line_0;
552       } else {
553         switch (m_scan_type) {
554           case JPGD_YH2V2:
555             if ((m_mcu_lines_left & 1) == 0) {
556               H2V2Convert();
557               *pScan_line = m_pScan_line_0;
558             } else {
559               *pScan_line = m_pScan_line_1;
560             }
561             break;
562           case JPGD_YH2V1:
563             H2V1Convert();
564             *pScan_line = m_pScan_line_0;
565             break;
566           case JPGD_YH1V2:
567             if ((m_mcu_lines_left & 1) == 0) {
568               H1V2Convert();
569               *pScan_line = m_pScan_line_0;
570             } else {
571               *pScan_line = m_pScan_line_1;
572             }
573             break;
574           case JPGD_YH1V1:
575             H1V1Convert();
576             *pScan_line = m_pScan_line_0;
577             break;
578           case JPGD_GRAYSCALE:
579             gray_convert();
580             *pScan_line = m_pScan_line_0;
581             break;
582           default:
583         }
584       }
585       *pScan_line_len = m_real_dest_bytes_per_scan_line;
586       --m_mcu_lines_left;
587       --m_total_lines_left;
588       return JPGD_SUCCESS;
589   }
590 
591   @property const pure nothrow @safe @nogc {
592     jpgd_status error_code () { pragma(inline, true); return m_error_code; }
593 
594     int width () { pragma(inline, true); return m_image_x_size; }
595     int height () { pragma(inline, true); return m_image_y_size; }
596 
597     int num_components () { pragma(inline, true); return m_comps_in_frame; }
598 
599     int bytes_per_pixel () { pragma(inline, true); return m_dest_bytes_per_pixel; }
600     int bytes_per_scan_line () { pragma(inline, true); return m_image_x_size * bytes_per_pixel(); }
601 
602     // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
603     int total_bytes_read () { pragma(inline, true); return m_total_bytes_read; }
604   }
605 
606 private:
607   // Retrieve one character from the input stream.
608   uint get_char () {
609     // Any bytes remaining in buffer?
610     if (!m_in_buf_left) {
611       // Try to get more bytes.
612       prep_in_buffer();
613       // Still nothing to get?
614       if (!m_in_buf_left) {
615         // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
616         int t = m_tem_flag;
617         m_tem_flag ^= 1;
618         return (t ? 0xD9 : 0xFF);
619       }
620     }
621     uint c = *m_pIn_buf_ofs++;
622     --m_in_buf_left;
623     return c;
624   }
625 
626   // Same as previous method, except can indicate if the character is a pad character or not.
627   uint get_char (bool* pPadding_flag) {
628     if (!m_in_buf_left) {
629       prep_in_buffer();
630       if (!m_in_buf_left) {
631         *pPadding_flag = true;
632         int t = m_tem_flag;
633         m_tem_flag ^= 1;
634         return (t ? 0xD9 : 0xFF);
635       }
636     }
637     *pPadding_flag = false;
638     uint c = *m_pIn_buf_ofs++;
639     --m_in_buf_left;
640     return c;
641   }
642 
643   // Inserts a previously retrieved character back into the input buffer.
644   void stuff_char (ubyte q) {
645     *(--m_pIn_buf_ofs) = q;
646     m_in_buf_left++;
647   }
648 
649   // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
650   ubyte get_octet () {
651     bool padding_flag;
652     int c = get_char(&padding_flag);
653     if (c == 0xFF) {
654       if (padding_flag) return 0xFF;
655       c = get_char(&padding_flag);
656       if (padding_flag) { stuff_char(0xFF); return 0xFF; }
657       if (c == 0x00) return 0xFF;
658       stuff_char(cast(ubyte)(c));
659       stuff_char(0xFF);
660       return 0xFF;
661     }
662     return cast(ubyte)(c);
663   }
664 
665   // Retrieves a variable number of bits from the input stream. Does not recognize markers.
666   uint get_bits (int num_bits) {
667     if (!num_bits) return 0;
668     uint i = m_bit_buf >> (32 - num_bits);
669     if ((m_bits_left -= num_bits) <= 0) {
670       m_bit_buf <<= (num_bits += m_bits_left);
671       uint c1 = get_char();
672       uint c2 = get_char();
673       m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
674       m_bit_buf <<= -m_bits_left;
675       m_bits_left += 16;
676       assert(m_bits_left >= 0);
677     } else {
678       m_bit_buf <<= num_bits;
679     }
680     return i;
681   }
682 
683   // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
684   uint get_bits_no_markers (int num_bits) {
685     if (!num_bits) return 0;
686     uint i = m_bit_buf >> (32 - num_bits);
687     if ((m_bits_left -= num_bits) <= 0) {
688       m_bit_buf <<= (num_bits += m_bits_left);
689       if (m_in_buf_left < 2 || m_pIn_buf_ofs[0] == 0xFF || m_pIn_buf_ofs[1] == 0xFF) {
690         uint c1 = get_octet();
691         uint c2 = get_octet();
692         m_bit_buf |= (c1 << 8) | c2;
693       } else {
694         m_bit_buf |= (cast(uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
695         m_in_buf_left -= 2;
696         m_pIn_buf_ofs += 2;
697       }
698       m_bit_buf <<= -m_bits_left;
699       m_bits_left += 16;
700       assert(m_bits_left >= 0);
701     } else {
702       m_bit_buf <<= num_bits;
703     }
704     return i;
705   }
706 
707   // Decodes a Huffman encoded symbol.
708   int huff_decode (huff_tables *pH) {
709     int symbol;
710     // Check first 8-bits: do we have a complete symbol?
711     if ((symbol = pH.look_up.ptr[m_bit_buf >> 24]) < 0) {
712       // Decode more bits, use a tree traversal to find symbol.
713       int ofs = 23;
714       do {
715         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
716         --ofs;
717       } while (symbol < 0);
718       get_bits_no_markers(8 + (23 - ofs));
719     } else {
720       get_bits_no_markers(pH.code_size.ptr[symbol]);
721     }
722     return symbol;
723   }
724 
725   // Decodes a Huffman encoded symbol.
726   int huff_decode (huff_tables *pH, ref int extra_bits) {
727     int symbol;
728     // Check first 8-bits: do we have a complete symbol?
729     if ((symbol = pH.look_up2.ptr[m_bit_buf >> 24]) < 0) {
730       // Use a tree traversal to find symbol.
731       int ofs = 23;
732       do {
733         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
734         --ofs;
735       } while (symbol < 0);
736       get_bits_no_markers(8 + (23 - ofs));
737       extra_bits = get_bits_no_markers(symbol & 0xF);
738     } else {
739       assert(((symbol >> 8) & 31) == pH.code_size.ptr[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
740       if (symbol & 0x8000) {
741         get_bits_no_markers((symbol >> 8) & 31);
742         extra_bits = symbol >> 16;
743       } else {
744         int code_size = (symbol >> 8) & 31;
745         int num_extra_bits = symbol & 0xF;
746         int bits = code_size + num_extra_bits;
747         if (bits <= (m_bits_left + 16)) {
748           extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
749         } else {
750           get_bits_no_markers(code_size);
751           extra_bits = get_bits_no_markers(num_extra_bits);
752         }
753       }
754       symbol &= 0xFF;
755     }
756     return symbol;
757   }
758 
759   // Tables and macro used to fully decode the DPCM differences.
760   static immutable int[16] s_extend_test = [ 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 ];
761   static immutable int[16] s_extend_offset = [ 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 ];
762   static immutable int[18] s_extend_mask = [ 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) ];
763   // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
764   //#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
765   static JPGD_HUFF_EXTEND (int x, int s) nothrow @trusted @nogc { pragma(inline, true); return (((x) < s_extend_test.ptr[s & 15]) ? ((x) + s_extend_offset.ptr[s & 15]) : (x)); }
766 
767   // Clamps a value between 0-255.
768   //static ubyte clamp (int i) { if (cast(uint)(i) > 255) i = (((~i) >> 31) & 0xFF); return cast(ubyte)(i); }
769   alias clamp = CLAMP;
770 
771   static struct DCT_Upsample {
772   static:
773     static struct Matrix44 {
774     pure nothrow @trusted @nogc:
775       alias Element_Type = int;
776       enum { NUM_ROWS = 4, NUM_COLS = 4 }
777 
778       Element_Type[NUM_COLS][NUM_ROWS] v;
779 
780       this() (in auto ref Matrix44 m) {
781         foreach (immutable r; 0..NUM_ROWS) v[r][] = m.v[r][];
782       }
783 
784       //@property int rows () const { pragma(inline, true); return NUM_ROWS; }
785       //@property int cols () const { pragma(inline, true); return NUM_COLS; }
786 
787       ref inout(Element_Type) at (int r, int c) inout { pragma(inline, true); return v.ptr[r].ptr[c]; }
788 
789       ref Matrix44 opOpAssign(string op:"+") (in auto ref Matrix44 a) {
790         foreach (int r; 0..NUM_ROWS) {
791           at(r, 0) += a.at(r, 0);
792           at(r, 1) += a.at(r, 1);
793           at(r, 2) += a.at(r, 2);
794           at(r, 3) += a.at(r, 3);
795         }
796         return this;
797       }
798 
799       ref Matrix44 opOpAssign(string op:"-") (in auto ref Matrix44 a) {
800         foreach (int r; 0..NUM_ROWS) {
801           at(r, 0) -= a.at(r, 0);
802           at(r, 1) -= a.at(r, 1);
803           at(r, 2) -= a.at(r, 2);
804           at(r, 3) -= a.at(r, 3);
805         }
806         return this;
807       }
808 
809       Matrix44 opBinary(string op:"+") (in auto ref Matrix44 b) const {
810         alias a = this;
811         Matrix44 ret;
812         foreach (int r; 0..NUM_ROWS) {
813           ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
814           ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
815           ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
816           ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
817         }
818         return ret;
819       }
820 
821       Matrix44 opBinary(string op:"-") (in auto ref Matrix44 b) const {
822         alias a = this;
823         Matrix44 ret;
824         foreach (int r; 0..NUM_ROWS) {
825           ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
826           ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
827           ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
828           ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
829         }
830         return ret;
831       }
832 
833       static void add_and_store() (jpgd_block_t* pDst, in auto ref Matrix44 a, in auto ref Matrix44 b) {
834         foreach (int r; 0..4) {
835           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) + b.at(r, 0));
836           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) + b.at(r, 1));
837           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) + b.at(r, 2));
838           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) + b.at(r, 3));
839         }
840       }
841 
842       static void sub_and_store() (jpgd_block_t* pDst, in auto ref Matrix44 a, in auto ref Matrix44 b) {
843         foreach (int r; 0..4) {
844           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) - b.at(r, 0));
845           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) - b.at(r, 1));
846           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) - b.at(r, 2));
847           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) - b.at(r, 3));
848         }
849       }
850     }
851 
852     enum FRACT_BITS = 10;
853     enum SCALE = 1 << FRACT_BITS;
854 
855     alias Temp_Type = int;
856 
857     static int D(T) (T i) { pragma(inline, true); return (((i) + (SCALE >> 1)) >> FRACT_BITS); }
858     enum F(float i) = (cast(int)((i) * SCALE + 0.5f));
859 
860     // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
861     static struct P_Q(int NUM_ROWS, int NUM_COLS) {
862       static void calc (ref Matrix44 P, ref Matrix44 Q, const(jpgd_block_t)* pSrc) {
863         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
864         template AT(int c, int r) {
865           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
866         }
867         // 4x8 = 4x8 times 8x8, matrix 0 is constant
868         immutable Temp_Type X000 = mixin(AT!(0, 0));
869         immutable Temp_Type X001 = mixin(AT!(0, 1));
870         immutable Temp_Type X002 = mixin(AT!(0, 2));
871         immutable Temp_Type X003 = mixin(AT!(0, 3));
872         immutable Temp_Type X004 = mixin(AT!(0, 4));
873         immutable Temp_Type X005 = mixin(AT!(0, 5));
874         immutable Temp_Type X006 = mixin(AT!(0, 6));
875         immutable Temp_Type X007 = mixin(AT!(0, 7));
876         immutable Temp_Type X010 = D(F!(0.415735f) * mixin(AT!(1, 0)) + F!(0.791065f) * mixin(AT!(3, 0)) + F!(-0.352443f) * mixin(AT!(5, 0)) + F!(0.277785f) * mixin(AT!(7, 0)));
877         immutable Temp_Type X011 = D(F!(0.415735f) * mixin(AT!(1, 1)) + F!(0.791065f) * mixin(AT!(3, 1)) + F!(-0.352443f) * mixin(AT!(5, 1)) + F!(0.277785f) * mixin(AT!(7, 1)));
878         immutable Temp_Type X012 = D(F!(0.415735f) * mixin(AT!(1, 2)) + F!(0.791065f) * mixin(AT!(3, 2)) + F!(-0.352443f) * mixin(AT!(5, 2)) + F!(0.277785f) * mixin(AT!(7, 2)));
879         immutable Temp_Type X013 = D(F!(0.415735f) * mixin(AT!(1, 3)) + F!(0.791065f) * mixin(AT!(3, 3)) + F!(-0.352443f) * mixin(AT!(5, 3)) + F!(0.277785f) * mixin(AT!(7, 3)));
880         immutable Temp_Type X014 = D(F!(0.415735f) * mixin(AT!(1, 4)) + F!(0.791065f) * mixin(AT!(3, 4)) + F!(-0.352443f) * mixin(AT!(5, 4)) + F!(0.277785f) * mixin(AT!(7, 4)));
881         immutable Temp_Type X015 = D(F!(0.415735f) * mixin(AT!(1, 5)) + F!(0.791065f) * mixin(AT!(3, 5)) + F!(-0.352443f) * mixin(AT!(5, 5)) + F!(0.277785f) * mixin(AT!(7, 5)));
882         immutable Temp_Type X016 = D(F!(0.415735f) * mixin(AT!(1, 6)) + F!(0.791065f) * mixin(AT!(3, 6)) + F!(-0.352443f) * mixin(AT!(5, 6)) + F!(0.277785f) * mixin(AT!(7, 6)));
883         immutable Temp_Type X017 = D(F!(0.415735f) * mixin(AT!(1, 7)) + F!(0.791065f) * mixin(AT!(3, 7)) + F!(-0.352443f) * mixin(AT!(5, 7)) + F!(0.277785f) * mixin(AT!(7, 7)));
884         immutable Temp_Type X020 = mixin(AT!(4, 0));
885         immutable Temp_Type X021 = mixin(AT!(4, 1));
886         immutable Temp_Type X022 = mixin(AT!(4, 2));
887         immutable Temp_Type X023 = mixin(AT!(4, 3));
888         immutable Temp_Type X024 = mixin(AT!(4, 4));
889         immutable Temp_Type X025 = mixin(AT!(4, 5));
890         immutable Temp_Type X026 = mixin(AT!(4, 6));
891         immutable Temp_Type X027 = mixin(AT!(4, 7));
892         immutable Temp_Type X030 = D(F!(0.022887f) * mixin(AT!(1, 0)) + F!(-0.097545f) * mixin(AT!(3, 0)) + F!(0.490393f) * mixin(AT!(5, 0)) + F!(0.865723f) * mixin(AT!(7, 0)));
893         immutable Temp_Type X031 = D(F!(0.022887f) * mixin(AT!(1, 1)) + F!(-0.097545f) * mixin(AT!(3, 1)) + F!(0.490393f) * mixin(AT!(5, 1)) + F!(0.865723f) * mixin(AT!(7, 1)));
894         immutable Temp_Type X032 = D(F!(0.022887f) * mixin(AT!(1, 2)) + F!(-0.097545f) * mixin(AT!(3, 2)) + F!(0.490393f) * mixin(AT!(5, 2)) + F!(0.865723f) * mixin(AT!(7, 2)));
895         immutable Temp_Type X033 = D(F!(0.022887f) * mixin(AT!(1, 3)) + F!(-0.097545f) * mixin(AT!(3, 3)) + F!(0.490393f) * mixin(AT!(5, 3)) + F!(0.865723f) * mixin(AT!(7, 3)));
896         immutable Temp_Type X034 = D(F!(0.022887f) * mixin(AT!(1, 4)) + F!(-0.097545f) * mixin(AT!(3, 4)) + F!(0.490393f) * mixin(AT!(5, 4)) + F!(0.865723f) * mixin(AT!(7, 4)));
897         immutable Temp_Type X035 = D(F!(0.022887f) * mixin(AT!(1, 5)) + F!(-0.097545f) * mixin(AT!(3, 5)) + F!(0.490393f) * mixin(AT!(5, 5)) + F!(0.865723f) * mixin(AT!(7, 5)));
898         immutable Temp_Type X036 = D(F!(0.022887f) * mixin(AT!(1, 6)) + F!(-0.097545f) * mixin(AT!(3, 6)) + F!(0.490393f) * mixin(AT!(5, 6)) + F!(0.865723f) * mixin(AT!(7, 6)));
899         immutable Temp_Type X037 = D(F!(0.022887f) * mixin(AT!(1, 7)) + F!(-0.097545f) * mixin(AT!(3, 7)) + F!(0.490393f) * mixin(AT!(5, 7)) + F!(0.865723f) * mixin(AT!(7, 7)));
900 
901         // 4x4 = 4x8 times 8x4, matrix 1 is constant
902         P.at(0, 0) = X000;
903         P.at(0, 1) = D(X001 * F!(0.415735f) + X003 * F!(0.791065f) + X005 * F!(-0.352443f) + X007 * F!(0.277785f));
904         P.at(0, 2) = X004;
905         P.at(0, 3) = D(X001 * F!(0.022887f) + X003 * F!(-0.097545f) + X005 * F!(0.490393f) + X007 * F!(0.865723f));
906         P.at(1, 0) = X010;
907         P.at(1, 1) = D(X011 * F!(0.415735f) + X013 * F!(0.791065f) + X015 * F!(-0.352443f) + X017 * F!(0.277785f));
908         P.at(1, 2) = X014;
909         P.at(1, 3) = D(X011 * F!(0.022887f) + X013 * F!(-0.097545f) + X015 * F!(0.490393f) + X017 * F!(0.865723f));
910         P.at(2, 0) = X020;
911         P.at(2, 1) = D(X021 * F!(0.415735f) + X023 * F!(0.791065f) + X025 * F!(-0.352443f) + X027 * F!(0.277785f));
912         P.at(2, 2) = X024;
913         P.at(2, 3) = D(X021 * F!(0.022887f) + X023 * F!(-0.097545f) + X025 * F!(0.490393f) + X027 * F!(0.865723f));
914         P.at(3, 0) = X030;
915         P.at(3, 1) = D(X031 * F!(0.415735f) + X033 * F!(0.791065f) + X035 * F!(-0.352443f) + X037 * F!(0.277785f));
916         P.at(3, 2) = X034;
917         P.at(3, 3) = D(X031 * F!(0.022887f) + X033 * F!(-0.097545f) + X035 * F!(0.490393f) + X037 * F!(0.865723f));
918         // 40 muls 24 adds
919 
920         // 4x4 = 4x8 times 8x4, matrix 1 is constant
921         Q.at(0, 0) = D(X001 * F!(0.906127f) + X003 * F!(-0.318190f) + X005 * F!(0.212608f) + X007 * F!(-0.180240f));
922         Q.at(0, 1) = X002;
923         Q.at(0, 2) = D(X001 * F!(-0.074658f) + X003 * F!(0.513280f) + X005 * F!(0.768178f) + X007 * F!(-0.375330f));
924         Q.at(0, 3) = X006;
925         Q.at(1, 0) = D(X011 * F!(0.906127f) + X013 * F!(-0.318190f) + X015 * F!(0.212608f) + X017 * F!(-0.180240f));
926         Q.at(1, 1) = X012;
927         Q.at(1, 2) = D(X011 * F!(-0.074658f) + X013 * F!(0.513280f) + X015 * F!(0.768178f) + X017 * F!(-0.375330f));
928         Q.at(1, 3) = X016;
929         Q.at(2, 0) = D(X021 * F!(0.906127f) + X023 * F!(-0.318190f) + X025 * F!(0.212608f) + X027 * F!(-0.180240f));
930         Q.at(2, 1) = X022;
931         Q.at(2, 2) = D(X021 * F!(-0.074658f) + X023 * F!(0.513280f) + X025 * F!(0.768178f) + X027 * F!(-0.375330f));
932         Q.at(2, 3) = X026;
933         Q.at(3, 0) = D(X031 * F!(0.906127f) + X033 * F!(-0.318190f) + X035 * F!(0.212608f) + X037 * F!(-0.180240f));
934         Q.at(3, 1) = X032;
935         Q.at(3, 2) = D(X031 * F!(-0.074658f) + X033 * F!(0.513280f) + X035 * F!(0.768178f) + X037 * F!(-0.375330f));
936         Q.at(3, 3) = X036;
937         // 40 muls 24 adds
938       }
939     }
940 
941     static struct R_S(int NUM_ROWS, int NUM_COLS) {
942       static void calc(ref Matrix44 R, ref Matrix44 S, const(jpgd_block_t)* pSrc) {
943         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
944         template AT(int c, int r) {
945           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
946         }
947         // 4x8 = 4x8 times 8x8, matrix 0 is constant
948         immutable Temp_Type X100 = D(F!(0.906127f) * mixin(AT!(1, 0)) + F!(-0.318190f) * mixin(AT!(3, 0)) + F!(0.212608f) * mixin(AT!(5, 0)) + F!(-0.180240f) * mixin(AT!(7, 0)));
949         immutable Temp_Type X101 = D(F!(0.906127f) * mixin(AT!(1, 1)) + F!(-0.318190f) * mixin(AT!(3, 1)) + F!(0.212608f) * mixin(AT!(5, 1)) + F!(-0.180240f) * mixin(AT!(7, 1)));
950         immutable Temp_Type X102 = D(F!(0.906127f) * mixin(AT!(1, 2)) + F!(-0.318190f) * mixin(AT!(3, 2)) + F!(0.212608f) * mixin(AT!(5, 2)) + F!(-0.180240f) * mixin(AT!(7, 2)));
951         immutable Temp_Type X103 = D(F!(0.906127f) * mixin(AT!(1, 3)) + F!(-0.318190f) * mixin(AT!(3, 3)) + F!(0.212608f) * mixin(AT!(5, 3)) + F!(-0.180240f) * mixin(AT!(7, 3)));
952         immutable Temp_Type X104 = D(F!(0.906127f) * mixin(AT!(1, 4)) + F!(-0.318190f) * mixin(AT!(3, 4)) + F!(0.212608f) * mixin(AT!(5, 4)) + F!(-0.180240f) * mixin(AT!(7, 4)));
953         immutable Temp_Type X105 = D(F!(0.906127f) * mixin(AT!(1, 5)) + F!(-0.318190f) * mixin(AT!(3, 5)) + F!(0.212608f) * mixin(AT!(5, 5)) + F!(-0.180240f) * mixin(AT!(7, 5)));
954         immutable Temp_Type X106 = D(F!(0.906127f) * mixin(AT!(1, 6)) + F!(-0.318190f) * mixin(AT!(3, 6)) + F!(0.212608f) * mixin(AT!(5, 6)) + F!(-0.180240f) * mixin(AT!(7, 6)));
955         immutable Temp_Type X107 = D(F!(0.906127f) * mixin(AT!(1, 7)) + F!(-0.318190f) * mixin(AT!(3, 7)) + F!(0.212608f) * mixin(AT!(5, 7)) + F!(-0.180240f) * mixin(AT!(7, 7)));
956         immutable Temp_Type X110 = mixin(AT!(2, 0));
957         immutable Temp_Type X111 = mixin(AT!(2, 1));
958         immutable Temp_Type X112 = mixin(AT!(2, 2));
959         immutable Temp_Type X113 = mixin(AT!(2, 3));
960         immutable Temp_Type X114 = mixin(AT!(2, 4));
961         immutable Temp_Type X115 = mixin(AT!(2, 5));
962         immutable Temp_Type X116 = mixin(AT!(2, 6));
963         immutable Temp_Type X117 = mixin(AT!(2, 7));
964         immutable Temp_Type X120 = D(F!(-0.074658f) * mixin(AT!(1, 0)) + F!(0.513280f) * mixin(AT!(3, 0)) + F!(0.768178f) * mixin(AT!(5, 0)) + F!(-0.375330f) * mixin(AT!(7, 0)));
965         immutable Temp_Type X121 = D(F!(-0.074658f) * mixin(AT!(1, 1)) + F!(0.513280f) * mixin(AT!(3, 1)) + F!(0.768178f) * mixin(AT!(5, 1)) + F!(-0.375330f) * mixin(AT!(7, 1)));
966         immutable Temp_Type X122 = D(F!(-0.074658f) * mixin(AT!(1, 2)) + F!(0.513280f) * mixin(AT!(3, 2)) + F!(0.768178f) * mixin(AT!(5, 2)) + F!(-0.375330f) * mixin(AT!(7, 2)));
967         immutable Temp_Type X123 = D(F!(-0.074658f) * mixin(AT!(1, 3)) + F!(0.513280f) * mixin(AT!(3, 3)) + F!(0.768178f) * mixin(AT!(5, 3)) + F!(-0.375330f) * mixin(AT!(7, 3)));
968         immutable Temp_Type X124 = D(F!(-0.074658f) * mixin(AT!(1, 4)) + F!(0.513280f) * mixin(AT!(3, 4)) + F!(0.768178f) * mixin(AT!(5, 4)) + F!(-0.375330f) * mixin(AT!(7, 4)));
969         immutable Temp_Type X125 = D(F!(-0.074658f) * mixin(AT!(1, 5)) + F!(0.513280f) * mixin(AT!(3, 5)) + F!(0.768178f) * mixin(AT!(5, 5)) + F!(-0.375330f) * mixin(AT!(7, 5)));
970         immutable Temp_Type X126 = D(F!(-0.074658f) * mixin(AT!(1, 6)) + F!(0.513280f) * mixin(AT!(3, 6)) + F!(0.768178f) * mixin(AT!(5, 6)) + F!(-0.375330f) * mixin(AT!(7, 6)));
971         immutable Temp_Type X127 = D(F!(-0.074658f) * mixin(AT!(1, 7)) + F!(0.513280f) * mixin(AT!(3, 7)) + F!(0.768178f) * mixin(AT!(5, 7)) + F!(-0.375330f) * mixin(AT!(7, 7)));
972         immutable Temp_Type X130 = mixin(AT!(6, 0));
973         immutable Temp_Type X131 = mixin(AT!(6, 1));
974         immutable Temp_Type X132 = mixin(AT!(6, 2));
975         immutable Temp_Type X133 = mixin(AT!(6, 3));
976         immutable Temp_Type X134 = mixin(AT!(6, 4));
977         immutable Temp_Type X135 = mixin(AT!(6, 5));
978         immutable Temp_Type X136 = mixin(AT!(6, 6));
979         immutable Temp_Type X137 = mixin(AT!(6, 7));
980         // 80 muls 48 adds
981 
982         // 4x4 = 4x8 times 8x4, matrix 1 is constant
983         R.at(0, 0) = X100;
984         R.at(0, 1) = D(X101 * F!(0.415735f) + X103 * F!(0.791065f) + X105 * F!(-0.352443f) + X107 * F!(0.277785f));
985         R.at(0, 2) = X104;
986         R.at(0, 3) = D(X101 * F!(0.022887f) + X103 * F!(-0.097545f) + X105 * F!(0.490393f) + X107 * F!(0.865723f));
987         R.at(1, 0) = X110;
988         R.at(1, 1) = D(X111 * F!(0.415735f) + X113 * F!(0.791065f) + X115 * F!(-0.352443f) + X117 * F!(0.277785f));
989         R.at(1, 2) = X114;
990         R.at(1, 3) = D(X111 * F!(0.022887f) + X113 * F!(-0.097545f) + X115 * F!(0.490393f) + X117 * F!(0.865723f));
991         R.at(2, 0) = X120;
992         R.at(2, 1) = D(X121 * F!(0.415735f) + X123 * F!(0.791065f) + X125 * F!(-0.352443f) + X127 * F!(0.277785f));
993         R.at(2, 2) = X124;
994         R.at(2, 3) = D(X121 * F!(0.022887f) + X123 * F!(-0.097545f) + X125 * F!(0.490393f) + X127 * F!(0.865723f));
995         R.at(3, 0) = X130;
996         R.at(3, 1) = D(X131 * F!(0.415735f) + X133 * F!(0.791065f) + X135 * F!(-0.352443f) + X137 * F!(0.277785f));
997         R.at(3, 2) = X134;
998         R.at(3, 3) = D(X131 * F!(0.022887f) + X133 * F!(-0.097545f) + X135 * F!(0.490393f) + X137 * F!(0.865723f));
999         // 40 muls 24 adds
1000         // 4x4 = 4x8 times 8x4, matrix 1 is constant
1001         S.at(0, 0) = D(X101 * F!(0.906127f) + X103 * F!(-0.318190f) + X105 * F!(0.212608f) + X107 * F!(-0.180240f));
1002         S.at(0, 1) = X102;
1003         S.at(0, 2) = D(X101 * F!(-0.074658f) + X103 * F!(0.513280f) + X105 * F!(0.768178f) + X107 * F!(-0.375330f));
1004         S.at(0, 3) = X106;
1005         S.at(1, 0) = D(X111 * F!(0.906127f) + X113 * F!(-0.318190f) + X115 * F!(0.212608f) + X117 * F!(-0.180240f));
1006         S.at(1, 1) = X112;
1007         S.at(1, 2) = D(X111 * F!(-0.074658f) + X113 * F!(0.513280f) + X115 * F!(0.768178f) + X117 * F!(-0.375330f));
1008         S.at(1, 3) = X116;
1009         S.at(2, 0) = D(X121 * F!(0.906127f) + X123 * F!(-0.318190f) + X125 * F!(0.212608f) + X127 * F!(-0.180240f));
1010         S.at(2, 1) = X122;
1011         S.at(2, 2) = D(X121 * F!(-0.074658f) + X123 * F!(0.513280f) + X125 * F!(0.768178f) + X127 * F!(-0.375330f));
1012         S.at(2, 3) = X126;
1013         S.at(3, 0) = D(X131 * F!(0.906127f) + X133 * F!(-0.318190f) + X135 * F!(0.212608f) + X137 * F!(-0.180240f));
1014         S.at(3, 1) = X132;
1015         S.at(3, 2) = D(X131 * F!(-0.074658f) + X133 * F!(0.513280f) + X135 * F!(0.768178f) + X137 * F!(-0.375330f));
1016         S.at(3, 3) = X136;
1017         // 40 muls 24 adds
1018       }
1019     }
1020   } // end namespace DCT_Upsample
1021 
1022   // Unconditionally frees all allocated m_blocks.
1023   void free_all_blocks () {
1024     //m_pStream = null;
1025     readfn = null;
1026     for (mem_block *b = m_pMem_blocks; b; ) {
1027       mem_block* n = b.m_pNext;
1028       jpgd_free(b);
1029       b = n;
1030     }
1031     m_pMem_blocks = null;
1032   }
1033 
1034   // This method handles all errors. It will never return.
1035   // It could easily be changed to use C++ exceptions.
1036   /*JPGD_NORETURN*/ void stop_decoding (jpgd_status status) {
1037     m_error_code = status;
1038     free_all_blocks();
1039     //longjmp(m_jmp_state, status);
1040     assert(false, "jpeg decoding error");
1041   }
1042 
1043   void* alloc (size_t nSize, bool zero=false) {
1044     nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1045     char *rv = null;
1046     for (mem_block *b = m_pMem_blocks; b; b = b.m_pNext)
1047     {
1048       if ((b.m_used_count + nSize) <= b.m_size)
1049       {
1050         rv = b.m_data.ptr + b.m_used_count;
1051         b.m_used_count += nSize;
1052         break;
1053       }
1054     }
1055     if (!rv)
1056     {
1057       int capacity = cast(int) JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1058       mem_block *b = cast(mem_block*)jpgd_malloc(mem_block.sizeof + capacity);
1059       if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
1060       b.m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1061       b.m_used_count = nSize;
1062       b.m_size = capacity;
1063       rv = b.m_data.ptr;
1064     }
1065     if (zero) memset(rv, 0, nSize);
1066     return rv;
1067   }
1068 
1069   void word_clear (void *p, ushort c, uint n) {
1070     ubyte *pD = cast(ubyte*)p;
1071     immutable ubyte l = c & 0xFF, h = (c >> 8) & 0xFF;
1072     while (n)
1073     {
1074       pD[0] = l; pD[1] = h; pD += 2;
1075       n--;
1076     }
1077   }
1078 
1079   // Refill the input buffer.
1080   // This method will sit in a loop until (A) the buffer is full or (B)
1081   // the stream's read() method reports and end of file condition.
1082   void prep_in_buffer () {
1083     m_in_buf_left = 0;
1084     m_pIn_buf_ofs = m_in_buf.ptr;
1085 
1086     if (m_eof_flag)
1087       return;
1088 
1089     do
1090     {
1091       int bytes_read = readfn(m_in_buf.ptr + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
1092       if (bytes_read == -1)
1093         stop_decoding(JPGD_STREAM_READ);
1094 
1095       m_in_buf_left += bytes_read;
1096     } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1097 
1098     m_total_bytes_read += m_in_buf_left;
1099 
1100     // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1101     // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1102     word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1103   }
1104 
1105   // Read a Huffman code table.
1106   void read_dht_marker () {
1107     int i, index, count;
1108     ubyte[17] huff_num;
1109     ubyte[256] huff_val;
1110 
1111     uint num_left = get_bits(16);
1112 
1113     if (num_left < 2)
1114       stop_decoding(JPGD_BAD_DHT_MARKER);
1115 
1116     num_left -= 2;
1117 
1118     while (num_left)
1119     {
1120       index = get_bits(8);
1121 
1122       huff_num.ptr[0] = 0;
1123 
1124       count = 0;
1125 
1126       for (i = 1; i <= 16; i++)
1127       {
1128         huff_num.ptr[i] = cast(ubyte)(get_bits(8));
1129         count += huff_num.ptr[i];
1130       }
1131 
1132       if (count > 255)
1133         stop_decoding(JPGD_BAD_DHT_COUNTS);
1134 
1135       for (i = 0; i < count; i++)
1136         huff_val.ptr[i] = cast(ubyte)(get_bits(8));
1137 
1138       i = 1 + 16 + count;
1139 
1140       if (num_left < cast(uint)i)
1141         stop_decoding(JPGD_BAD_DHT_MARKER);
1142 
1143       num_left -= i;
1144 
1145       if ((index & 0x10) > 0x10)
1146         stop_decoding(JPGD_BAD_DHT_INDEX);
1147 
1148       index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1149 
1150       if (index >= JPGD_MAX_HUFF_TABLES)
1151         stop_decoding(JPGD_BAD_DHT_INDEX);
1152 
1153       if (!m_huff_num.ptr[index])
1154         m_huff_num.ptr[index] = cast(ubyte*)alloc(17);
1155 
1156       if (!m_huff_val.ptr[index])
1157         m_huff_val.ptr[index] = cast(ubyte*)alloc(256);
1158 
1159       m_huff_ac.ptr[index] = (index & 0x10) != 0;
1160       memcpy(m_huff_num.ptr[index], huff_num.ptr, 17);
1161       memcpy(m_huff_val.ptr[index], huff_val.ptr, 256);
1162     }
1163   }
1164 
1165   // Read a quantization table.
1166   void read_dqt_marker () {
1167     int n, i, prec;
1168     uint num_left;
1169     uint temp;
1170 
1171     num_left = get_bits(16);
1172 
1173     if (num_left < 2)
1174       stop_decoding(JPGD_BAD_DQT_MARKER);
1175 
1176     num_left -= 2;
1177 
1178     while (num_left)
1179     {
1180       n = get_bits(8);
1181       prec = n >> 4;
1182       n &= 0x0F;
1183 
1184       if (n >= JPGD_MAX_QUANT_TABLES)
1185         stop_decoding(JPGD_BAD_DQT_TABLE);
1186 
1187       if (!m_quant.ptr[n])
1188         m_quant.ptr[n] = cast(jpgd_quant_t*)alloc(64 * jpgd_quant_t.sizeof);
1189 
1190       // read quantization entries, in zag order
1191       for (i = 0; i < 64; i++)
1192       {
1193         temp = get_bits(8);
1194 
1195         if (prec)
1196           temp = (temp << 8) + get_bits(8);
1197 
1198         m_quant.ptr[n][i] = cast(jpgd_quant_t)(temp);
1199       }
1200 
1201       i = 64 + 1;
1202 
1203       if (prec)
1204         i += 64;
1205 
1206       if (num_left < cast(uint)i)
1207         stop_decoding(JPGD_BAD_DQT_LENGTH);
1208 
1209       num_left -= i;
1210     }
1211   }
1212 
1213   // Read the start of frame (SOF) marker.
1214   void read_sof_marker () {
1215     int i;
1216     uint num_left;
1217 
1218     num_left = get_bits(16);
1219 
1220     if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
1221       stop_decoding(JPGD_BAD_PRECISION);
1222 
1223     m_image_y_size = get_bits(16);
1224 
1225     if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1226       stop_decoding(JPGD_BAD_HEIGHT);
1227 
1228     m_image_x_size = get_bits(16);
1229 
1230     if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1231       stop_decoding(JPGD_BAD_WIDTH);
1232 
1233     m_comps_in_frame = get_bits(8);
1234 
1235     if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1236       stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1237 
1238     if (num_left != cast(uint)(m_comps_in_frame * 3 + 8))
1239       stop_decoding(JPGD_BAD_SOF_LENGTH);
1240 
1241     for (i = 0; i < m_comps_in_frame; i++)
1242     {
1243       m_comp_ident.ptr[i]  = get_bits(8);
1244       m_comp_h_samp.ptr[i] = get_bits(4);
1245       m_comp_v_samp.ptr[i] = get_bits(4);
1246       m_comp_quant.ptr[i]  = get_bits(8);
1247     }
1248   }
1249 
1250   // Used to skip unrecognized markers.
1251   void skip_variable_marker () {
1252     uint num_left;
1253 
1254     num_left = get_bits(16);
1255 
1256     if (num_left < 2)
1257       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1258 
1259     num_left -= 2;
1260 
1261     while (num_left)
1262     {
1263       get_bits(8);
1264       num_left--;
1265     }
1266   }
1267 
1268   // Read a define restart interval (DRI) marker.
1269   void read_dri_marker () {
1270     if (get_bits(16) != 4)
1271       stop_decoding(JPGD_BAD_DRI_LENGTH);
1272 
1273     m_restart_interval = get_bits(16);
1274   }
1275 
1276   // Read a start of scan (SOS) marker.
1277   void read_sos_marker () {
1278     uint num_left;
1279     int i, ci, n, c, cc;
1280 
1281     num_left = get_bits(16);
1282 
1283     n = get_bits(8);
1284 
1285     m_comps_in_scan = n;
1286 
1287     num_left -= 3;
1288 
1289     if ( (num_left != cast(uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1290       stop_decoding(JPGD_BAD_SOS_LENGTH);
1291 
1292     for (i = 0; i < n; i++)
1293     {
1294       cc = get_bits(8);
1295       c = get_bits(8);
1296       num_left -= 2;
1297 
1298       for (ci = 0; ci < m_comps_in_frame; ci++)
1299         if (cc == m_comp_ident.ptr[ci])
1300           break;
1301 
1302       if (ci >= m_comps_in_frame)
1303         stop_decoding(JPGD_BAD_SOS_COMP_ID);
1304 
1305       m_comp_list.ptr[i]    = ci;
1306       m_comp_dc_tab.ptr[ci] = (c >> 4) & 15;
1307       m_comp_ac_tab.ptr[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1308     }
1309 
1310     m_spectral_start  = get_bits(8);
1311     m_spectral_end    = get_bits(8);
1312     m_successive_high = get_bits(4);
1313     m_successive_low  = get_bits(4);
1314 
1315     if (!m_progressive_flag)
1316     {
1317       m_spectral_start = 0;
1318       m_spectral_end = 63;
1319     }
1320 
1321     num_left -= 3;
1322 
1323     /* read past whatever is num_left */
1324     while (num_left)
1325     {
1326       get_bits(8);
1327       num_left--;
1328     }
1329   }
1330 
1331   // Finds the next marker.
1332   int next_marker () {
1333     uint c, bytes;
1334 
1335     bytes = 0;
1336 
1337     do
1338     {
1339       do
1340       {
1341         bytes++;
1342         c = get_bits(8);
1343       } while (c != 0xFF);
1344 
1345       do
1346       {
1347         c = get_bits(8);
1348       } while (c == 0xFF);
1349 
1350     } while (c == 0);
1351 
1352     // If bytes > 0 here, there where extra bytes before the marker (not good).
1353 
1354     return c;
1355   }
1356 
1357   // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1358   // encountered.
1359   int process_markers () {
1360     int c;
1361 
1362     for ( ; ; ) {
1363       c = next_marker();
1364 
1365       switch (c)
1366       {
1367         case M_SOF0:
1368         case M_SOF1:
1369         case M_SOF2:
1370         case M_SOF3:
1371         case M_SOF5:
1372         case M_SOF6:
1373         case M_SOF7:
1374         //case M_JPG:
1375         case M_SOF9:
1376         case M_SOF10:
1377         case M_SOF11:
1378         case M_SOF13:
1379         case M_SOF14:
1380         case M_SOF15:
1381         case M_SOI:
1382         case M_EOI:
1383         case M_SOS:
1384           return c;
1385         case M_DHT:
1386           read_dht_marker();
1387           break;
1388         // No arithmitic support - dumb patents!
1389         case M_DAC:
1390           stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1391           break;
1392         case M_DQT:
1393           read_dqt_marker();
1394           break;
1395         case M_DRI:
1396           read_dri_marker();
1397           break;
1398         //case M_APP0:  /* no need to read the JFIF marker */
1399 
1400         case M_JPG:
1401         case M_RST0:    /* no parameters */
1402         case M_RST1:
1403         case M_RST2:
1404         case M_RST3:
1405         case M_RST4:
1406         case M_RST5:
1407         case M_RST6:
1408         case M_RST7:
1409         case M_TEM:
1410           stop_decoding(JPGD_UNEXPECTED_MARKER);
1411           break;
1412         default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1413           skip_variable_marker();
1414           break;
1415       }
1416     }
1417   }
1418 
1419   // Finds the start of image (SOI) marker.
1420   // This code is rather defensive: it only checks the first 512 bytes to avoid
1421   // false positives.
1422   void locate_soi_marker () {
1423     uint lastchar, thischar;
1424     uint bytesleft;
1425 
1426     lastchar = get_bits(8);
1427 
1428     thischar = get_bits(8);
1429 
1430     /* ok if it's a normal JPEG file without a special header */
1431 
1432     if ((lastchar == 0xFF) && (thischar == M_SOI))
1433       return;
1434 
1435     bytesleft = 4096; //512;
1436 
1437     for ( ; ; )
1438     {
1439       if (--bytesleft == 0)
1440         stop_decoding(JPGD_NOT_JPEG);
1441 
1442       lastchar = thischar;
1443 
1444       thischar = get_bits(8);
1445 
1446       if (lastchar == 0xFF)
1447       {
1448         if (thischar == M_SOI)
1449           break;
1450         else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1451           stop_decoding(JPGD_NOT_JPEG);
1452       }
1453     }
1454 
1455     // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1456     thischar = (m_bit_buf >> 24) & 0xFF;
1457 
1458     if (thischar != 0xFF)
1459       stop_decoding(JPGD_NOT_JPEG);
1460   }
1461 
1462   // Find a start of frame (SOF) marker.
1463   void locate_sof_marker () {
1464     locate_soi_marker();
1465 
1466     int c = process_markers();
1467 
1468     switch (c)
1469     {
1470       case M_SOF2:
1471         m_progressive_flag = true;
1472         goto case;
1473       case M_SOF0:  /* baseline DCT */
1474       case M_SOF1:  /* extended sequential DCT */
1475         read_sof_marker();
1476         break;
1477       case M_SOF9:  /* Arithmitic coding */
1478         stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1479         break;
1480       default:
1481         stop_decoding(JPGD_UNSUPPORTED_MARKER);
1482         break;
1483     }
1484   }
1485 
1486   // Find a start of scan (SOS) marker.
1487   int locate_sos_marker () {
1488     int c;
1489 
1490     c = process_markers();
1491 
1492     if (c == M_EOI)
1493       return false;
1494     else if (c != M_SOS)
1495       stop_decoding(JPGD_UNEXPECTED_MARKER);
1496 
1497     read_sos_marker();
1498 
1499     return true;
1500   }
1501 
1502   // Reset everything to default/uninitialized state.
1503   void initit (JpegStreamReadFunc rfn) {
1504     m_pMem_blocks = null;
1505     m_error_code = JPGD_SUCCESS;
1506     m_ready_flag = false;
1507     m_image_x_size = m_image_y_size = 0;
1508     readfn = rfn;
1509     m_progressive_flag = false;
1510 
1511     memset(m_huff_ac.ptr, 0, m_huff_ac.sizeof);
1512     memset(m_huff_num.ptr, 0, m_huff_num.sizeof);
1513     memset(m_huff_val.ptr, 0, m_huff_val.sizeof);
1514     memset(m_quant.ptr, 0, m_quant.sizeof);
1515 
1516     m_scan_type = 0;
1517     m_comps_in_frame = 0;
1518 
1519     memset(m_comp_h_samp.ptr, 0, m_comp_h_samp.sizeof);
1520     memset(m_comp_v_samp.ptr, 0, m_comp_v_samp.sizeof);
1521     memset(m_comp_quant.ptr, 0, m_comp_quant.sizeof);
1522     memset(m_comp_ident.ptr, 0, m_comp_ident.sizeof);
1523     memset(m_comp_h_blocks.ptr, 0, m_comp_h_blocks.sizeof);
1524     memset(m_comp_v_blocks.ptr, 0, m_comp_v_blocks.sizeof);
1525 
1526     m_comps_in_scan = 0;
1527     memset(m_comp_list.ptr, 0, m_comp_list.sizeof);
1528     memset(m_comp_dc_tab.ptr, 0, m_comp_dc_tab.sizeof);
1529     memset(m_comp_ac_tab.ptr, 0, m_comp_ac_tab.sizeof);
1530 
1531     m_spectral_start = 0;
1532     m_spectral_end = 0;
1533     m_successive_low = 0;
1534     m_successive_high = 0;
1535     m_max_mcu_x_size = 0;
1536     m_max_mcu_y_size = 0;
1537     m_blocks_per_mcu = 0;
1538     m_max_blocks_per_row = 0;
1539     m_mcus_per_row = 0;
1540     m_mcus_per_col = 0;
1541     m_expanded_blocks_per_component = 0;
1542     m_expanded_blocks_per_mcu = 0;
1543     m_expanded_blocks_per_row = 0;
1544     m_freq_domain_chroma_upsample = false;
1545 
1546     memset(m_mcu_org.ptr, 0, m_mcu_org.sizeof);
1547 
1548     m_total_lines_left = 0;
1549     m_mcu_lines_left = 0;
1550     m_real_dest_bytes_per_scan_line = 0;
1551     m_dest_bytes_per_scan_line = 0;
1552     m_dest_bytes_per_pixel = 0;
1553 
1554     memset(m_pHuff_tabs.ptr, 0, m_pHuff_tabs.sizeof);
1555 
1556     memset(m_dc_coeffs.ptr, 0, m_dc_coeffs.sizeof);
1557     memset(m_ac_coeffs.ptr, 0, m_ac_coeffs.sizeof);
1558     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1559 
1560     m_eob_run = 0;
1561 
1562     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1563 
1564     m_pIn_buf_ofs = m_in_buf.ptr;
1565     m_in_buf_left = 0;
1566     m_eof_flag = false;
1567     m_tem_flag = 0;
1568 
1569     memset(m_in_buf_pad_start.ptr, 0, m_in_buf_pad_start.sizeof);
1570     memset(m_in_buf.ptr, 0, m_in_buf.sizeof);
1571     memset(m_in_buf_pad_end.ptr, 0, m_in_buf_pad_end.sizeof);
1572 
1573     m_restart_interval = 0;
1574     m_restarts_left    = 0;
1575     m_next_restart_num = 0;
1576 
1577     m_max_mcus_per_row = 0;
1578     m_max_blocks_per_mcu = 0;
1579     m_max_mcus_per_col = 0;
1580 
1581     memset(m_last_dc_val.ptr, 0, m_last_dc_val.sizeof);
1582     m_pMCU_coefficients = null;
1583     m_pSample_buf = null;
1584 
1585     m_total_bytes_read = 0;
1586 
1587     m_pScan_line_0 = null;
1588     m_pScan_line_1 = null;
1589 
1590     // Ready the input buffer.
1591     prep_in_buffer();
1592 
1593     // Prime the bit buffer.
1594     m_bits_left = 16;
1595     m_bit_buf = 0;
1596 
1597     get_bits(16);
1598     get_bits(16);
1599 
1600     for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1601       m_mcu_block_max_zag.ptr[i] = 64;
1602   }
1603 
1604   enum SCALEBITS = 16;
1605   enum ONE_HALF = (cast(int) 1 << (SCALEBITS-1));
1606   enum FIX(float x) = (cast(int)((x) * (1L<<SCALEBITS) + 0.5f));
1607 
1608   // Create a few tables that allow us to quickly convert YCbCr to RGB.
1609   void create_look_ups () {
1610     for (int i = 0; i <= 255; i++)
1611     {
1612       int k = i - 128;
1613       m_crr.ptr[i] = ( FIX!(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
1614       m_cbb.ptr[i] = ( FIX!(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
1615       m_crg.ptr[i] = (-FIX!(0.71414f)) * k;
1616       m_cbg.ptr[i] = (-FIX!(0.34414f)) * k + ONE_HALF;
1617     }
1618   }
1619 
1620   // This method throws back into the stream any bytes that where read
1621   // into the bit buffer during initial marker scanning.
1622   void fix_in_buffer () {
1623     // In case any 0xFF's where pulled into the buffer during marker scanning.
1624     assert((m_bits_left & 7) == 0);
1625 
1626     if (m_bits_left == 16)
1627       stuff_char(cast(ubyte)(m_bit_buf & 0xFF));
1628 
1629     if (m_bits_left >= 8)
1630       stuff_char(cast(ubyte)((m_bit_buf >> 8) & 0xFF));
1631 
1632     stuff_char(cast(ubyte)((m_bit_buf >> 16) & 0xFF));
1633     stuff_char(cast(ubyte)((m_bit_buf >> 24) & 0xFF));
1634 
1635     m_bits_left = 16;
1636     get_bits_no_markers(16);
1637     get_bits_no_markers(16);
1638   }
1639 
1640   void transform_mcu (int mcu_row) {
1641     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1642     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1643 
1644     for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1645     {
1646       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1647       pSrc_ptr += 64;
1648       pDst_ptr += 64;
1649     }
1650   }
1651 
1652   static immutable ubyte[64] s_max_rc = [
1653     17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1654     102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1655     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1656     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1657   ];
1658 
1659   void transform_mcu_expand (int mcu_row) {
1660     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1661     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1662 
1663     // Y IDCT
1664     int mcu_block;
1665     for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1666     {
1667       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1668       pSrc_ptr += 64;
1669       pDst_ptr += 64;
1670     }
1671 
1672     // Chroma IDCT, with upsampling
1673     jpgd_block_t[64] temp_block;
1674 
1675     for (int i = 0; i < 2; i++)
1676     {
1677       DCT_Upsample.Matrix44 P, Q, R, S;
1678 
1679       assert(m_mcu_block_max_zag.ptr[mcu_block] >= 1);
1680       assert(m_mcu_block_max_zag.ptr[mcu_block] <= 64);
1681 
1682       int max_zag = m_mcu_block_max_zag.ptr[mcu_block++] - 1;
1683       if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1684       switch (s_max_rc.ptr[max_zag])
1685       {
1686       case 1*16+1:
1687         DCT_Upsample.P_Q!(1, 1).calc(P, Q, pSrc_ptr);
1688         DCT_Upsample.R_S!(1, 1).calc(R, S, pSrc_ptr);
1689         break;
1690       case 1*16+2:
1691         DCT_Upsample.P_Q!(1, 2).calc(P, Q, pSrc_ptr);
1692         DCT_Upsample.R_S!(1, 2).calc(R, S, pSrc_ptr);
1693         break;
1694       case 2*16+2:
1695         DCT_Upsample.P_Q!(2, 2).calc(P, Q, pSrc_ptr);
1696         DCT_Upsample.R_S!(2, 2).calc(R, S, pSrc_ptr);
1697         break;
1698       case 3*16+2:
1699         DCT_Upsample.P_Q!(3, 2).calc(P, Q, pSrc_ptr);
1700         DCT_Upsample.R_S!(3, 2).calc(R, S, pSrc_ptr);
1701         break;
1702       case 3*16+3:
1703         DCT_Upsample.P_Q!(3, 3).calc(P, Q, pSrc_ptr);
1704         DCT_Upsample.R_S!(3, 3).calc(R, S, pSrc_ptr);
1705         break;
1706       case 3*16+4:
1707         DCT_Upsample.P_Q!(3, 4).calc(P, Q, pSrc_ptr);
1708         DCT_Upsample.R_S!(3, 4).calc(R, S, pSrc_ptr);
1709         break;
1710       case 4*16+4:
1711         DCT_Upsample.P_Q!(4, 4).calc(P, Q, pSrc_ptr);
1712         DCT_Upsample.R_S!(4, 4).calc(R, S, pSrc_ptr);
1713         break;
1714       case 5*16+4:
1715         DCT_Upsample.P_Q!(5, 4).calc(P, Q, pSrc_ptr);
1716         DCT_Upsample.R_S!(5, 4).calc(R, S, pSrc_ptr);
1717         break;
1718       case 5*16+5:
1719         DCT_Upsample.P_Q!(5, 5).calc(P, Q, pSrc_ptr);
1720         DCT_Upsample.R_S!(5, 5).calc(R, S, pSrc_ptr);
1721         break;
1722       case 5*16+6:
1723         DCT_Upsample.P_Q!(5, 6).calc(P, Q, pSrc_ptr);
1724         DCT_Upsample.R_S!(5, 6).calc(R, S, pSrc_ptr);
1725         break;
1726       case 6*16+6:
1727         DCT_Upsample.P_Q!(6, 6).calc(P, Q, pSrc_ptr);
1728         DCT_Upsample.R_S!(6, 6).calc(R, S, pSrc_ptr);
1729         break;
1730       case 7*16+6:
1731         DCT_Upsample.P_Q!(7, 6).calc(P, Q, pSrc_ptr);
1732         DCT_Upsample.R_S!(7, 6).calc(R, S, pSrc_ptr);
1733         break;
1734       case 7*16+7:
1735         DCT_Upsample.P_Q!(7, 7).calc(P, Q, pSrc_ptr);
1736         DCT_Upsample.R_S!(7, 7).calc(R, S, pSrc_ptr);
1737         break;
1738       case 7*16+8:
1739         DCT_Upsample.P_Q!(7, 8).calc(P, Q, pSrc_ptr);
1740         DCT_Upsample.R_S!(7, 8).calc(R, S, pSrc_ptr);
1741         break;
1742       case 8*16+8:
1743         DCT_Upsample.P_Q!(8, 8).calc(P, Q, pSrc_ptr);
1744         DCT_Upsample.R_S!(8, 8).calc(R, S, pSrc_ptr);
1745         break;
1746       default:
1747         assert(false);
1748       }
1749 
1750       auto a = DCT_Upsample.Matrix44(P + Q);
1751       P -= Q;
1752       DCT_Upsample.Matrix44* b = &P;
1753       auto c = DCT_Upsample.Matrix44(R + S);
1754       R -= S;
1755       DCT_Upsample.Matrix44* d = &R;
1756 
1757       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, a, c);
1758       idct_4x4(temp_block.ptr, pDst_ptr);
1759       pDst_ptr += 64;
1760 
1761       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, a, c);
1762       idct_4x4(temp_block.ptr, pDst_ptr);
1763       pDst_ptr += 64;
1764 
1765       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, *b, *d);
1766       idct_4x4(temp_block.ptr, pDst_ptr);
1767       pDst_ptr += 64;
1768 
1769       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, *b, *d);
1770       idct_4x4(temp_block.ptr, pDst_ptr);
1771       pDst_ptr += 64;
1772 
1773       pSrc_ptr += 64;
1774     }
1775   }
1776 
1777   // Loads and dequantizes the next row of (already decoded) coefficients.
1778   // Progressive images only.
1779   void load_next_row () {
1780     int i;
1781     jpgd_block_t *p;
1782     jpgd_quant_t *q;
1783     int mcu_row, mcu_block, row_block = 0;
1784     int component_num, component_id;
1785     int[JPGD_MAX_COMPONENTS] block_x_mcu;
1786 
1787     memset(block_x_mcu.ptr, 0, JPGD_MAX_COMPONENTS * int.sizeof);
1788 
1789     for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1790     {
1791       int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1792 
1793       for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1794       {
1795         component_id = m_mcu_org.ptr[mcu_block];
1796         q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1797 
1798         p = m_pMCU_coefficients + 64 * mcu_block;
1799 
1800         jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1801         jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1802         p[0] = pDC[0];
1803         memcpy(&p[1], &pAC[1], 63 * jpgd_block_t.sizeof);
1804 
1805         for (i = 63; i > 0; i--)
1806           if (p[g_ZAG[i]])
1807             break;
1808 
1809         m_mcu_block_max_zag.ptr[mcu_block] = i + 1;
1810 
1811         for ( ; i >= 0; i--)
1812           if (p[g_ZAG[i]])
1813             p[g_ZAG[i]] = cast(jpgd_block_t)(p[g_ZAG[i]] * q[i]);
1814 
1815         row_block++;
1816 
1817         if (m_comps_in_scan == 1)
1818           block_x_mcu.ptr[component_id]++;
1819         else
1820         {
1821           if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
1822           {
1823             block_x_mcu_ofs = 0;
1824 
1825             if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
1826             {
1827               block_y_mcu_ofs = 0;
1828 
1829               block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
1830             }
1831           }
1832         }
1833       }
1834 
1835       if (m_freq_domain_chroma_upsample)
1836         transform_mcu_expand(mcu_row);
1837       else
1838         transform_mcu(mcu_row);
1839     }
1840 
1841     if (m_comps_in_scan == 1)
1842       m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
1843     else
1844     {
1845       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
1846       {
1847         component_id = m_comp_list.ptr[component_num];
1848 
1849         m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
1850       }
1851     }
1852   }
1853 
1854   // Restart interval processing.
1855   void process_restart () {
1856     int i;
1857     int c = 0;
1858 
1859     // Align to a byte boundry
1860     // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1861     //get_bits_no_markers(m_bits_left & 7);
1862 
1863     // Let's scan a little bit to find the marker, but not _too_ far.
1864     // 1536 is a "fudge factor" that determines how much to scan.
1865     for (i = 1536; i > 0; i--)
1866       if (get_char() == 0xFF)
1867         break;
1868 
1869     if (i == 0)
1870       stop_decoding(JPGD_BAD_RESTART_MARKER);
1871 
1872     for ( ; i > 0; i--)
1873       if ((c = get_char()) != 0xFF)
1874         break;
1875 
1876     if (i == 0)
1877       stop_decoding(JPGD_BAD_RESTART_MARKER);
1878 
1879     // Is it the expected marker? If not, something bad happened.
1880     if (c != (m_next_restart_num + M_RST0))
1881       stop_decoding(JPGD_BAD_RESTART_MARKER);
1882 
1883     // Reset each component's DC prediction values.
1884     memset(&m_last_dc_val, 0, m_comps_in_frame * uint.sizeof);
1885 
1886     m_eob_run = 0;
1887 
1888     m_restarts_left = m_restart_interval;
1889 
1890     m_next_restart_num = (m_next_restart_num + 1) & 7;
1891 
1892     // Get the bit buffer going again...
1893 
1894     m_bits_left = 16;
1895     get_bits_no_markers(16);
1896     get_bits_no_markers(16);
1897   }
1898 
1899   static int dequantize_ac (int c, int q) { pragma(inline, true); c *= q; return c; }
1900 
1901   // Decodes and dequantizes the next row of coefficients.
1902   void decode_next_row () {
1903     int row_block = 0;
1904 
1905     for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1906     {
1907       if ((m_restart_interval) && (m_restarts_left == 0))
1908         process_restart();
1909 
1910       jpgd_block_t* p = m_pMCU_coefficients;
1911       for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
1912       {
1913         int component_id = m_mcu_org.ptr[mcu_block];
1914         jpgd_quant_t* q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1915 
1916         int r, s;
1917         s = huff_decode(m_pHuff_tabs.ptr[m_comp_dc_tab.ptr[component_id]], r);
1918         s = JPGD_HUFF_EXTEND(r, s);
1919 
1920         m_last_dc_val.ptr[component_id] = (s += m_last_dc_val.ptr[component_id]);
1921 
1922         p[0] = cast(jpgd_block_t)(s * q[0]);
1923 
1924         int prev_num_set = m_mcu_block_max_zag.ptr[mcu_block];
1925 
1926         huff_tables *pH = m_pHuff_tabs.ptr[m_comp_ac_tab.ptr[component_id]];
1927 
1928         int k;
1929         for (k = 1; k < 64; k++)
1930         {
1931           int extra_bits;
1932           s = huff_decode(pH, extra_bits);
1933 
1934           r = s >> 4;
1935           s &= 15;
1936 
1937           if (s)
1938           {
1939             if (r)
1940             {
1941               if ((k + r) > 63)
1942                 stop_decoding(JPGD_DECODE_ERROR);
1943 
1944               if (k < prev_num_set)
1945               {
1946                 int n = JPGD_MIN(r, prev_num_set - k);
1947                 int kt = k;
1948                 while (n--)
1949                   p[g_ZAG[kt++]] = 0;
1950               }
1951 
1952               k += r;
1953             }
1954 
1955             s = JPGD_HUFF_EXTEND(extra_bits, s);
1956 
1957             assert(k < 64);
1958 
1959             p[g_ZAG[k]] = cast(jpgd_block_t)(dequantize_ac(s, q[k])); //s * q[k];
1960           }
1961           else
1962           {
1963             if (r == 15)
1964             {
1965               if ((k + 16) > 64)
1966                 stop_decoding(JPGD_DECODE_ERROR);
1967 
1968               if (k < prev_num_set)
1969               {
1970                 int n = JPGD_MIN(16, prev_num_set - k);
1971                 int kt = k;
1972                 while (n--)
1973                 {
1974                   assert(kt <= 63);
1975                   p[g_ZAG[kt++]] = 0;
1976                 }
1977               }
1978 
1979               k += 16 - 1; // - 1 because the loop counter is k
1980               assert(p[g_ZAG[k]] == 0);
1981             }
1982             else
1983               break;
1984           }
1985         }
1986 
1987         if (k < prev_num_set)
1988         {
1989           int kt = k;
1990           while (kt < prev_num_set)
1991             p[g_ZAG[kt++]] = 0;
1992         }
1993 
1994         m_mcu_block_max_zag.ptr[mcu_block] = k;
1995 
1996         row_block++;
1997       }
1998 
1999       if (m_freq_domain_chroma_upsample)
2000         transform_mcu_expand(mcu_row);
2001       else
2002         transform_mcu(mcu_row);
2003 
2004       m_restarts_left--;
2005     }
2006   }
2007 
2008   // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
2009   void H1V1Convert () {
2010     int row = m_max_mcu_y_size - m_mcu_lines_left;
2011     ubyte *d = m_pScan_line_0;
2012     ubyte *s = m_pSample_buf + row * 8;
2013 
2014     for (int i = m_max_mcus_per_row; i > 0; i--)
2015     {
2016       for (int j = 0; j < 8; j++)
2017       {
2018         int y = s[j];
2019         int cb = s[64+j];
2020         int cr = s[128+j];
2021 
2022         d[0] = clamp(y + m_crr.ptr[cr]);
2023         d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2024         d[2] = clamp(y + m_cbb.ptr[cb]);
2025         d[3] = 255;
2026 
2027         d += 4;
2028       }
2029 
2030       s += 64*3;
2031     }
2032   }
2033 
2034   // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
2035   void H2V1Convert () {
2036     int row = m_max_mcu_y_size - m_mcu_lines_left;
2037     ubyte *d0 = m_pScan_line_0;
2038     ubyte *y = m_pSample_buf + row * 8;
2039     ubyte *c = m_pSample_buf + 2*64 + row * 8;
2040 
2041     for (int i = m_max_mcus_per_row; i > 0; i--)
2042     {
2043       for (int l = 0; l < 2; l++)
2044       {
2045         for (int j = 0; j < 4; j++)
2046         {
2047           int cb = c[0];
2048           int cr = c[64];
2049 
2050           int rc = m_crr.ptr[cr];
2051           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2052           int bc = m_cbb.ptr[cb];
2053 
2054           int yy = y[j<<1];
2055           d0[0] = clamp(yy+rc);
2056           d0[1] = clamp(yy+gc);
2057           d0[2] = clamp(yy+bc);
2058           d0[3] = 255;
2059 
2060           yy = y[(j<<1)+1];
2061           d0[4] = clamp(yy+rc);
2062           d0[5] = clamp(yy+gc);
2063           d0[6] = clamp(yy+bc);
2064           d0[7] = 255;
2065 
2066           d0 += 8;
2067 
2068           c++;
2069         }
2070         y += 64;
2071       }
2072 
2073       y += 64*4 - 64*2;
2074       c += 64*4 - 8;
2075     }
2076   }
2077 
2078   // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2079   void H1V2Convert () {
2080     int row = m_max_mcu_y_size - m_mcu_lines_left;
2081     ubyte *d0 = m_pScan_line_0;
2082     ubyte *d1 = m_pScan_line_1;
2083     ubyte *y;
2084     ubyte *c;
2085 
2086     if (row < 8)
2087       y = m_pSample_buf + row * 8;
2088     else
2089       y = m_pSample_buf + 64*1 + (row & 7) * 8;
2090 
2091     c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2092 
2093     for (int i = m_max_mcus_per_row; i > 0; i--)
2094     {
2095       for (int j = 0; j < 8; j++)
2096       {
2097         int cb = c[0+j];
2098         int cr = c[64+j];
2099 
2100         int rc = m_crr.ptr[cr];
2101         int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2102         int bc = m_cbb.ptr[cb];
2103 
2104         int yy = y[j];
2105         d0[0] = clamp(yy+rc);
2106         d0[1] = clamp(yy+gc);
2107         d0[2] = clamp(yy+bc);
2108         d0[3] = 255;
2109 
2110         yy = y[8+j];
2111         d1[0] = clamp(yy+rc);
2112         d1[1] = clamp(yy+gc);
2113         d1[2] = clamp(yy+bc);
2114         d1[3] = 255;
2115 
2116         d0 += 4;
2117         d1 += 4;
2118       }
2119 
2120       y += 64*4;
2121       c += 64*4;
2122     }
2123   }
2124 
2125   // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2126   void H2V2Convert () {
2127     int row = m_max_mcu_y_size - m_mcu_lines_left;
2128     ubyte *d0 = m_pScan_line_0;
2129     ubyte *d1 = m_pScan_line_1;
2130     ubyte *y;
2131     ubyte *c;
2132 
2133     if (row < 8)
2134       y = m_pSample_buf + row * 8;
2135     else
2136       y = m_pSample_buf + 64*2 + (row & 7) * 8;
2137 
2138     c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2139 
2140     for (int i = m_max_mcus_per_row; i > 0; i--)
2141     {
2142       for (int l = 0; l < 2; l++)
2143       {
2144         for (int j = 0; j < 8; j += 2)
2145         {
2146           int cb = c[0];
2147           int cr = c[64];
2148 
2149           int rc = m_crr.ptr[cr];
2150           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2151           int bc = m_cbb.ptr[cb];
2152 
2153           int yy = y[j];
2154           d0[0] = clamp(yy+rc);
2155           d0[1] = clamp(yy+gc);
2156           d0[2] = clamp(yy+bc);
2157           d0[3] = 255;
2158 
2159           yy = y[j+1];
2160           d0[4] = clamp(yy+rc);
2161           d0[5] = clamp(yy+gc);
2162           d0[6] = clamp(yy+bc);
2163           d0[7] = 255;
2164 
2165           yy = y[j+8];
2166           d1[0] = clamp(yy+rc);
2167           d1[1] = clamp(yy+gc);
2168           d1[2] = clamp(yy+bc);
2169           d1[3] = 255;
2170 
2171           yy = y[j+8+1];
2172           d1[4] = clamp(yy+rc);
2173           d1[5] = clamp(yy+gc);
2174           d1[6] = clamp(yy+bc);
2175           d1[7] = 255;
2176 
2177           d0 += 8;
2178           d1 += 8;
2179 
2180           c++;
2181         }
2182         y += 64;
2183       }
2184 
2185       y += 64*6 - 64*2;
2186       c += 64*6 - 8;
2187     }
2188   }
2189 
2190   // Y (1 block per MCU) to 8-bit grayscale
2191   void gray_convert () {
2192     int row = m_max_mcu_y_size - m_mcu_lines_left;
2193     ubyte *d = m_pScan_line_0;
2194     ubyte *s = m_pSample_buf + row * 8;
2195 
2196     for (int i = m_max_mcus_per_row; i > 0; i--)
2197     {
2198       *cast(uint*)d = *cast(uint*)s;
2199       *cast(uint*)(&d[4]) = *cast(uint*)(&s[4]);
2200 
2201       s += 64;
2202       d += 8;
2203     }
2204   }
2205 
2206   void expanded_convert () {
2207     int row = m_max_mcu_y_size - m_mcu_lines_left;
2208 
2209     ubyte* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp.ptr[0] + (row & 7) * 8;
2210 
2211     ubyte* d = m_pScan_line_0;
2212 
2213     for (int i = m_max_mcus_per_row; i > 0; i--)
2214     {
2215       for (int k = 0; k < m_max_mcu_x_size; k += 8)
2216       {
2217         immutable int Y_ofs = k * 8;
2218         immutable int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2219         immutable int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2220         for (int j = 0; j < 8; j++)
2221         {
2222           int y = Py[Y_ofs + j];
2223           int cb = Py[Cb_ofs + j];
2224           int cr = Py[Cr_ofs + j];
2225 
2226           d[0] = clamp(y + m_crr.ptr[cr]);
2227           d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2228           d[2] = clamp(y + m_cbb.ptr[cb]);
2229           d[3] = 255;
2230 
2231           d += 4;
2232         }
2233       }
2234 
2235       Py += 64 * m_expanded_blocks_per_mcu;
2236     }
2237   }
2238 
2239   // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2240   void find_eoi () {
2241     if (!m_progressive_flag)
2242     {
2243       // Attempt to read the EOI marker.
2244       //get_bits_no_markers(m_bits_left & 7);
2245 
2246       // Prime the bit buffer
2247       m_bits_left = 16;
2248       get_bits(16);
2249       get_bits(16);
2250 
2251       // The next marker _should_ be EOI
2252       process_markers();
2253     }
2254 
2255     m_total_bytes_read -= m_in_buf_left;
2256   }
2257 
2258   // Creates the tables needed for efficient Huffman decoding.
2259   void make_huff_table (int index, huff_tables *pH) {
2260     int p, i, l, si;
2261     ubyte[257] huffsize;
2262     uint[257] huffcode;
2263     uint code;
2264     uint subtree;
2265     int code_size;
2266     int lastp;
2267     int nextfreeentry;
2268     int currententry;
2269 
2270     pH.ac_table = m_huff_ac.ptr[index] != 0;
2271 
2272     p = 0;
2273 
2274     for (l = 1; l <= 16; l++)
2275     {
2276       for (i = 1; i <= m_huff_num.ptr[index][l]; i++)
2277         huffsize.ptr[p++] = cast(ubyte)(l);
2278     }
2279 
2280     huffsize.ptr[p] = 0;
2281 
2282     lastp = p;
2283 
2284     code = 0;
2285     si = huffsize.ptr[0];
2286     p = 0;
2287 
2288     while (huffsize.ptr[p])
2289     {
2290       while (huffsize.ptr[p] == si)
2291       {
2292         huffcode.ptr[p++] = code;
2293         code++;
2294       }
2295 
2296       code <<= 1;
2297       si++;
2298     }
2299 
2300     memset(pH.look_up.ptr, 0, pH.look_up.sizeof);
2301     memset(pH.look_up2.ptr, 0, pH.look_up2.sizeof);
2302     memset(pH.tree.ptr, 0, pH.tree.sizeof);
2303     memset(pH.code_size.ptr, 0, pH.code_size.sizeof);
2304 
2305     nextfreeentry = -1;
2306 
2307     p = 0;
2308 
2309     while (p < lastp)
2310     {
2311       i = m_huff_val.ptr[index][p];
2312       code = huffcode.ptr[p];
2313       code_size = huffsize.ptr[p];
2314 
2315       pH.code_size.ptr[i] = cast(ubyte)(code_size);
2316 
2317       if (code_size <= 8)
2318       {
2319         code <<= (8 - code_size);
2320 
2321         for (l = 1 << (8 - code_size); l > 0; l--)
2322         {
2323           assert(i < 256);
2324 
2325           pH.look_up.ptr[code] = i;
2326 
2327           bool has_extrabits = false;
2328           int extra_bits = 0;
2329           int num_extra_bits = i & 15;
2330 
2331           int bits_to_fetch = code_size;
2332           if (num_extra_bits)
2333           {
2334             int total_codesize = code_size + num_extra_bits;
2335             if (total_codesize <= 8)
2336             {
2337               has_extrabits = true;
2338               extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2339               assert(extra_bits <= 0x7FFF);
2340               bits_to_fetch += num_extra_bits;
2341             }
2342           }
2343 
2344           if (!has_extrabits)
2345             pH.look_up2.ptr[code] = i | (bits_to_fetch << 8);
2346           else
2347             pH.look_up2.ptr[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2348 
2349           code++;
2350         }
2351       }
2352       else
2353       {
2354         subtree = (code >> (code_size - 8)) & 0xFF;
2355 
2356         currententry = pH.look_up.ptr[subtree];
2357 
2358         if (currententry == 0)
2359         {
2360           pH.look_up.ptr[subtree] = currententry = nextfreeentry;
2361           pH.look_up2.ptr[subtree] = currententry = nextfreeentry;
2362 
2363           nextfreeentry -= 2;
2364         }
2365 
2366         code <<= (16 - (code_size - 8));
2367 
2368         for (l = code_size; l > 9; l--)
2369         {
2370           if ((code & 0x8000) == 0)
2371             currententry--;
2372 
2373           if (pH.tree.ptr[-currententry - 1] == 0)
2374           {
2375             pH.tree.ptr[-currententry - 1] = nextfreeentry;
2376 
2377             currententry = nextfreeentry;
2378 
2379             nextfreeentry -= 2;
2380           }
2381           else
2382             currententry = pH.tree.ptr[-currententry - 1];
2383 
2384           code <<= 1;
2385         }
2386 
2387         if ((code & 0x8000) == 0)
2388           currententry--;
2389 
2390         pH.tree.ptr[-currententry - 1] = i;
2391       }
2392 
2393       p++;
2394     }
2395   }
2396 
2397   // Verifies the quantization tables needed for this scan are available.
2398   void check_quant_tables () {
2399     for (int i = 0; i < m_comps_in_scan; i++)
2400       if (m_quant.ptr[m_comp_quant.ptr[m_comp_list.ptr[i]]] == null)
2401         stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2402   }
2403 
2404   // Verifies that all the Huffman tables needed for this scan are available.
2405   void check_huff_tables () {
2406     for (int i = 0; i < m_comps_in_scan; i++)
2407     {
2408       if ((m_spectral_start == 0) && (m_huff_num.ptr[m_comp_dc_tab.ptr[m_comp_list.ptr[i]]] == null))
2409         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2410 
2411       if ((m_spectral_end > 0) && (m_huff_num.ptr[m_comp_ac_tab.ptr[m_comp_list.ptr[i]]] == null))
2412         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2413     }
2414 
2415     for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2416       if (m_huff_num.ptr[i])
2417       {
2418         if (!m_pHuff_tabs.ptr[i])
2419           m_pHuff_tabs.ptr[i] = cast(huff_tables*)alloc(huff_tables.sizeof);
2420 
2421         make_huff_table(i, m_pHuff_tabs.ptr[i]);
2422       }
2423   }
2424 
2425   // Determines the component order inside each MCU.
2426   // Also calcs how many MCU's are on each row, etc.
2427   void calc_mcu_block_order () {
2428     int component_num, component_id;
2429     int max_h_samp = 0, max_v_samp = 0;
2430 
2431     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2432     {
2433       if (m_comp_h_samp.ptr[component_id] > max_h_samp)
2434         max_h_samp = m_comp_h_samp.ptr[component_id];
2435 
2436       if (m_comp_v_samp.ptr[component_id] > max_v_samp)
2437         max_v_samp = m_comp_v_samp.ptr[component_id];
2438     }
2439 
2440     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2441     {
2442       m_comp_h_blocks.ptr[component_id] = ((((m_image_x_size * m_comp_h_samp.ptr[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2443       m_comp_v_blocks.ptr[component_id] = ((((m_image_y_size * m_comp_v_samp.ptr[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2444     }
2445 
2446     if (m_comps_in_scan == 1)
2447     {
2448       m_mcus_per_row = m_comp_h_blocks.ptr[m_comp_list.ptr[0]];
2449       m_mcus_per_col = m_comp_v_blocks.ptr[m_comp_list.ptr[0]];
2450     }
2451     else
2452     {
2453       m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2454       m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2455     }
2456 
2457     if (m_comps_in_scan == 1)
2458     {
2459       m_mcu_org.ptr[0] = m_comp_list.ptr[0];
2460 
2461       m_blocks_per_mcu = 1;
2462     }
2463     else
2464     {
2465       m_blocks_per_mcu = 0;
2466 
2467       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2468       {
2469         int num_blocks;
2470 
2471         component_id = m_comp_list.ptr[component_num];
2472 
2473         num_blocks = m_comp_h_samp.ptr[component_id] * m_comp_v_samp.ptr[component_id];
2474 
2475         while (num_blocks--)
2476           m_mcu_org.ptr[m_blocks_per_mcu++] = component_id;
2477       }
2478     }
2479   }
2480 
2481   // Starts a new scan.
2482   int init_scan () {
2483     if (!locate_sos_marker())
2484       return false;
2485 
2486     calc_mcu_block_order();
2487 
2488     check_huff_tables();
2489 
2490     check_quant_tables();
2491 
2492     memset(m_last_dc_val.ptr, 0, m_comps_in_frame * uint.sizeof);
2493 
2494     m_eob_run = 0;
2495 
2496     if (m_restart_interval)
2497     {
2498       m_restarts_left = m_restart_interval;
2499       m_next_restart_num = 0;
2500     }
2501 
2502     fix_in_buffer();
2503 
2504     return true;
2505   }
2506 
2507   // Starts a frame. Determines if the number of components or sampling factors
2508   // are supported.
2509   void init_frame () {
2510     int i;
2511 
2512     if (m_comps_in_frame == 1)
2513     {
2514       if ((m_comp_h_samp.ptr[0] != 1) || (m_comp_v_samp.ptr[0] != 1))
2515         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2516 
2517       m_scan_type = JPGD_GRAYSCALE;
2518       m_max_blocks_per_mcu = 1;
2519       m_max_mcu_x_size = 8;
2520       m_max_mcu_y_size = 8;
2521     }
2522     else if (m_comps_in_frame == 3)
2523     {
2524       if ( ((m_comp_h_samp.ptr[1] != 1) || (m_comp_v_samp.ptr[1] != 1)) ||
2525            ((m_comp_h_samp.ptr[2] != 1) || (m_comp_v_samp.ptr[2] != 1)) )
2526         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2527 
2528       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2529       {
2530         m_scan_type = JPGD_YH1V1;
2531 
2532         m_max_blocks_per_mcu = 3;
2533         m_max_mcu_x_size = 8;
2534         m_max_mcu_y_size = 8;
2535       }
2536       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2537       {
2538         m_scan_type = JPGD_YH2V1;
2539         m_max_blocks_per_mcu = 4;
2540         m_max_mcu_x_size = 16;
2541         m_max_mcu_y_size = 8;
2542       }
2543       else if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 2))
2544       {
2545         m_scan_type = JPGD_YH1V2;
2546         m_max_blocks_per_mcu = 4;
2547         m_max_mcu_x_size = 8;
2548         m_max_mcu_y_size = 16;
2549       }
2550       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2551       {
2552         m_scan_type = JPGD_YH2V2;
2553         m_max_blocks_per_mcu = 6;
2554         m_max_mcu_x_size = 16;
2555         m_max_mcu_y_size = 16;
2556       }
2557       else
2558         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2559     }
2560     else
2561       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2562 
2563     m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2564     m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2565 
2566     // These values are for the *destination* pixels: after conversion.
2567     if (m_scan_type == JPGD_GRAYSCALE)
2568       m_dest_bytes_per_pixel = 1;
2569     else
2570       m_dest_bytes_per_pixel = 4;
2571 
2572     m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2573 
2574     m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2575 
2576     // Initialize two scan line buffers.
2577     m_pScan_line_0 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2578     if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2579       m_pScan_line_1 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2580 
2581     m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2582 
2583     // Should never happen
2584     if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2585       stop_decoding(JPGD_ASSERTION_ERROR);
2586 
2587     // Allocate the coefficient buffer, enough for one MCU
2588     m_pMCU_coefficients = cast(jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * jpgd_block_t.sizeof);
2589 
2590     for (i = 0; i < m_max_blocks_per_mcu; i++)
2591       m_mcu_block_max_zag.ptr[i] = 64;
2592 
2593     m_expanded_blocks_per_component = m_comp_h_samp.ptr[0] * m_comp_v_samp.ptr[0];
2594     m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2595     m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2596     // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2597     m_freq_domain_chroma_upsample = false;
2598     version(JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING) {
2599       m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2600     }
2601 
2602     if (m_freq_domain_chroma_upsample)
2603       m_pSample_buf = cast(ubyte*)alloc(m_expanded_blocks_per_row * 64);
2604     else
2605       m_pSample_buf = cast(ubyte*)alloc(m_max_blocks_per_row * 64);
2606 
2607     m_total_lines_left = m_image_y_size;
2608 
2609     m_mcu_lines_left = 0;
2610 
2611     create_look_ups();
2612   }
2613 
2614   // The coeff_buf series of methods originally stored the coefficients
2615   // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2616   // was used to make this process more efficient. Now, we can store the entire
2617   // thing in RAM.
2618   coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) {
2619     coeff_buf* cb = cast(coeff_buf*)alloc(coeff_buf.sizeof);
2620 
2621     cb.block_num_x = block_num_x;
2622     cb.block_num_y = block_num_y;
2623     cb.block_len_x = block_len_x;
2624     cb.block_len_y = block_len_y;
2625     cb.block_size = (block_len_x * block_len_y) * cast(int)(jpgd_block_t.sizeof);
2626     cb.pData = cast(ubyte*)alloc(cb.block_size * block_num_x * block_num_y, true);
2627     return cb;
2628   }
2629 
2630   jpgd_block_t* coeff_buf_getp (coeff_buf *cb, int block_x, int block_y) {
2631     assert((block_x < cb.block_num_x) && (block_y < cb.block_num_y));
2632     return cast(jpgd_block_t*)(cb.pData + block_x * cb.block_size + block_y * (cb.block_size * cb.block_num_x));
2633   }
2634 
2635   // The following methods decode the various types of m_blocks encountered
2636   // in progressively encoded images.
2637   static void decode_block_dc_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2638     int s, r;
2639     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2640 
2641     if ((s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_dc_tab.ptr[component_id]])) != 0)
2642     {
2643       r = pD.get_bits_no_markers(s);
2644       s = JPGD_HUFF_EXTEND(r, s);
2645     }
2646 
2647     pD.m_last_dc_val.ptr[component_id] = (s += pD.m_last_dc_val.ptr[component_id]);
2648 
2649     p[0] = cast(jpgd_block_t)(s << pD.m_successive_low);
2650   }
2651 
2652   static void decode_block_dc_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2653     if (pD.get_bits_no_markers(1))
2654     {
2655       jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2656 
2657       p[0] |= (1 << pD.m_successive_low);
2658     }
2659   }
2660 
2661   static void decode_block_ac_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2662     int k, s, r;
2663 
2664     if (pD.m_eob_run)
2665     {
2666       pD.m_eob_run--;
2667       return;
2668     }
2669 
2670     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2671 
2672     for (k = pD.m_spectral_start; k <= pD.m_spectral_end; k++)
2673     {
2674       s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2675 
2676       r = s >> 4;
2677       s &= 15;
2678 
2679       if (s)
2680       {
2681         if ((k += r) > 63)
2682           pD.stop_decoding(JPGD_DECODE_ERROR);
2683 
2684         r = pD.get_bits_no_markers(s);
2685         s = JPGD_HUFF_EXTEND(r, s);
2686 
2687         p[g_ZAG[k]] = cast(jpgd_block_t)(s << pD.m_successive_low);
2688       }
2689       else
2690       {
2691         if (r == 15)
2692         {
2693           if ((k += 15) > 63)
2694             pD.stop_decoding(JPGD_DECODE_ERROR);
2695         }
2696         else
2697         {
2698           pD.m_eob_run = 1 << r;
2699 
2700           if (r)
2701             pD.m_eob_run += pD.get_bits_no_markers(r);
2702 
2703           pD.m_eob_run--;
2704 
2705           break;
2706         }
2707       }
2708     }
2709   }
2710 
2711   static void decode_block_ac_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2712     int s, k, r;
2713     int p1 = 1 << pD.m_successive_low;
2714     int m1 = (-1) << pD.m_successive_low;
2715     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2716 
2717     assert(pD.m_spectral_end <= 63);
2718 
2719     k = pD.m_spectral_start;
2720 
2721     if (pD.m_eob_run == 0)
2722     {
2723       for ( ; k <= pD.m_spectral_end; k++)
2724       {
2725         s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2726 
2727         r = s >> 4;
2728         s &= 15;
2729 
2730         if (s)
2731         {
2732           if (s != 1)
2733             pD.stop_decoding(JPGD_DECODE_ERROR);
2734 
2735           if (pD.get_bits_no_markers(1))
2736             s = p1;
2737           else
2738             s = m1;
2739         }
2740         else
2741         {
2742           if (r != 15)
2743           {
2744             pD.m_eob_run = 1 << r;
2745 
2746             if (r)
2747               pD.m_eob_run += pD.get_bits_no_markers(r);
2748 
2749             break;
2750           }
2751         }
2752 
2753         do
2754         {
2755           jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2756 
2757           if (*this_coef != 0)
2758           {
2759             if (pD.get_bits_no_markers(1))
2760             {
2761               if ((*this_coef & p1) == 0)
2762               {
2763                 if (*this_coef >= 0)
2764                   *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2765                 else
2766                   *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2767               }
2768             }
2769           }
2770           else
2771           {
2772             if (--r < 0)
2773               break;
2774           }
2775 
2776           k++;
2777 
2778         } while (k <= pD.m_spectral_end);
2779 
2780         if ((s) && (k < 64))
2781         {
2782           p[g_ZAG[k]] = cast(jpgd_block_t)(s);
2783         }
2784       }
2785     }
2786 
2787     if (pD.m_eob_run > 0)
2788     {
2789       for ( ; k <= pD.m_spectral_end; k++)
2790       {
2791         jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2792 
2793         if (*this_coef != 0)
2794         {
2795           if (pD.get_bits_no_markers(1))
2796           {
2797             if ((*this_coef & p1) == 0)
2798             {
2799               if (*this_coef >= 0)
2800                 *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2801               else
2802                 *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2803             }
2804           }
2805         }
2806       }
2807 
2808       pD.m_eob_run--;
2809     }
2810   }
2811 
2812   // Decode a scan in a progressively encoded image.
2813   void decode_scan (pDecode_block_func decode_block_func) {
2814     int mcu_row, mcu_col, mcu_block;
2815     int[JPGD_MAX_COMPONENTS] block_x_mcu;
2816     int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
2817 
2818     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
2819 
2820     for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
2821     {
2822       int component_num, component_id;
2823 
2824       memset(block_x_mcu.ptr, 0, block_x_mcu.sizeof);
2825 
2826       for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2827       {
2828         int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2829 
2830         if ((m_restart_interval) && (m_restarts_left == 0))
2831           process_restart();
2832 
2833         for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2834         {
2835           component_id = m_mcu_org.ptr[mcu_block];
2836 
2837           decode_block_func(this, component_id, block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
2838 
2839           if (m_comps_in_scan == 1)
2840             block_x_mcu.ptr[component_id]++;
2841           else
2842           {
2843             if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
2844             {
2845               block_x_mcu_ofs = 0;
2846 
2847               if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
2848               {
2849                 block_y_mcu_ofs = 0;
2850                 block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
2851               }
2852             }
2853           }
2854         }
2855 
2856         m_restarts_left--;
2857       }
2858 
2859       if (m_comps_in_scan == 1)
2860         m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
2861       else
2862       {
2863         for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2864         {
2865           component_id = m_comp_list.ptr[component_num];
2866           m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
2867         }
2868       }
2869     }
2870   }
2871 
2872   // Decode a progressively encoded image.
2873   void init_progressive () {
2874     int i;
2875 
2876     if (m_comps_in_frame == 4)
2877       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2878 
2879     // Allocate the coefficient buffers.
2880     for (i = 0; i < m_comps_in_frame; i++)
2881     {
2882       m_dc_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 1, 1);
2883       m_ac_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 8, 8);
2884     }
2885 
2886     for ( ; ; )
2887     {
2888       int dc_only_scan, refinement_scan;
2889       pDecode_block_func decode_block_func;
2890 
2891       if (!init_scan())
2892         break;
2893 
2894       dc_only_scan = (m_spectral_start == 0);
2895       refinement_scan = (m_successive_high != 0);
2896 
2897       if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
2898         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2899 
2900       if (dc_only_scan)
2901       {
2902         if (m_spectral_end)
2903           stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2904       }
2905       else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
2906         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2907 
2908       if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
2909         stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2910 
2911       if (dc_only_scan)
2912       {
2913         if (refinement_scan)
2914           decode_block_func = &decode_block_dc_refine;
2915         else
2916           decode_block_func = &decode_block_dc_first;
2917       }
2918       else
2919       {
2920         if (refinement_scan)
2921           decode_block_func = &decode_block_ac_refine;
2922         else
2923           decode_block_func = &decode_block_ac_first;
2924       }
2925 
2926       decode_scan(decode_block_func);
2927 
2928       m_bits_left = 16;
2929       get_bits(16);
2930       get_bits(16);
2931     }
2932 
2933     m_comps_in_scan = m_comps_in_frame;
2934 
2935     for (i = 0; i < m_comps_in_frame; i++)
2936       m_comp_list.ptr[i] = i;
2937 
2938     calc_mcu_block_order();
2939   }
2940 
2941   void init_sequential () {
2942     if (!init_scan())
2943       stop_decoding(JPGD_UNEXPECTED_MARKER);
2944   }
2945 
2946   void decode_start () {
2947     init_frame();
2948 
2949     if (m_progressive_flag)
2950       init_progressive();
2951     else
2952       init_sequential();
2953   }
2954 
2955   void decode_init (JpegStreamReadFunc rfn) {
2956     initit(rfn);
2957     locate_sof_marker();
2958   }
2959 }
2960 
2961 
2962 // ////////////////////////////////////////////////////////////////////////// //
2963 /// read JPEG image header, determine dimensions and number of components.
2964 /// return `false` if image is not JPEG (i hope).
2965 public bool detect_jpeg_image_from_stream (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps) {
2966   if (rfn is null) return false;
2967   auto decoder = jpeg_decoder(rfn);
2968   version(jpegd_test) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
2969   if (decoder.error_code != JPGD_SUCCESS) return false;
2970   width = decoder.width;
2971   height = decoder.height;
2972   actual_comps = decoder.num_components;
2973   return true;
2974 }
2975 
2976 // ////////////////////////////////////////////////////////////////////////// //
2977 /// read JPEG image header, determine dimensions and number of components.
2978 /// return `false` if image is not JPEG (i hope).
2979 public bool detect_jpeg_image_from_memory (const(void)[] buf, out int width, out int height, out int actual_comps) {
2980   bool m_eof_flag;
2981   size_t bufpos;
2982   auto b = cast(const(ubyte)*)buf.ptr;
2983 
2984   return detect_jpeg_image_from_stream(
2985     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
2986       import core.stdc.string : memcpy;
2987       if (bufpos >= buf.length) {
2988         *pEOF_flag = true;
2989         return 0;
2990       }
2991       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
2992       memcpy(pBuf, b, max_bytes_to_read);
2993       b += max_bytes_to_read;
2994       return max_bytes_to_read;
2995     },
2996     width, height, actual_comps);
2997 }
2998 
2999 
3000 // ////////////////////////////////////////////////////////////////////////// //
3001 /// decompress JPEG image, what else?
3002 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3003 public ubyte[] decompress_jpeg_image_from_stream(scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps, int req_comps=-1) {
3004   import core.stdc.string : memcpy;
3005 
3006   //actual_comps = 0;
3007   if (rfn is null) return null;
3008   if (req_comps != -1 && req_comps != 1 && req_comps != 3 && req_comps != 4) return null;
3009 
3010   auto decoder = jpeg_decoder(rfn);
3011   if (decoder.error_code != JPGD_SUCCESS) return null;
3012   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3013 
3014   immutable int image_width = decoder.width;
3015   immutable int image_height = decoder.height;
3016   width = image_width;
3017   height = image_height;
3018   actual_comps = decoder.num_components;
3019   if (req_comps < 0) req_comps = decoder.num_components;
3020 
3021   if (decoder.begin_decoding() != JPGD_SUCCESS) return null;
3022 
3023   immutable int dst_bpl = image_width*req_comps;
3024 
3025    ubyte* pImage_data = cast(ubyte*)jpgd_malloc(dst_bpl*image_height);
3026    if (pImage_data is null) return null;
3027    auto idata = pImage_data[0..dst_bpl*image_height];
3028 
3029   for (int y = 0; y < image_height; ++y) {
3030     const(ubyte)* pScan_line;
3031     uint scan_line_len;
3032     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3033       jpgd_free(pImage_data);
3034       return null;
3035     }
3036 
3037     ubyte* pDst = pImage_data+y*dst_bpl;
3038 
3039     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3040       memcpy(pDst, pScan_line, dst_bpl);
3041     } else if (decoder.num_components == 1) {
3042       if (req_comps == 3) {
3043         for (int x = 0; x < image_width; ++x) {
3044           ubyte luma = pScan_line[x];
3045           pDst[0] = luma;
3046           pDst[1] = luma;
3047           pDst[2] = luma;
3048           pDst += 3;
3049         }
3050       } else {
3051         for (int x = 0; x < image_width; ++x) {
3052           ubyte luma = pScan_line[x];
3053           pDst[0] = luma;
3054           pDst[1] = luma;
3055           pDst[2] = luma;
3056           pDst[3] = 255;
3057           pDst += 4;
3058         }
3059       }
3060     } else if (decoder.num_components == 3) {
3061       if (req_comps == 1) {
3062         immutable int YR = 19595, YG = 38470, YB = 7471;
3063         for (int x = 0; x < image_width; ++x) {
3064           int r = pScan_line[x*4+0];
3065           int g = pScan_line[x*4+1];
3066           int b = pScan_line[x*4+2];
3067           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3068         }
3069       } else {
3070         for (int x = 0; x < image_width; ++x) {
3071           pDst[0] = pScan_line[x*4+0];
3072           pDst[1] = pScan_line[x*4+1];
3073           pDst[2] = pScan_line[x*4+2];
3074           pDst += 3;
3075         }
3076       }
3077     }
3078   }
3079 
3080   return idata;
3081 }
3082 
3083 
3084 // ////////////////////////////////////////////////////////////////////////// //
3085 /// decompress JPEG image from memory buffer.
3086 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3087 public ubyte[] decompress_jpeg_image_from_memory(const(void)[] buf, out int width, out int height, out int actual_comps, int req_comps=-1) {
3088   bool m_eof_flag;
3089   size_t bufpos;
3090   auto b = cast(const(ubyte)*)buf.ptr;
3091 
3092   return decompress_jpeg_image_from_stream(
3093     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3094       import core.stdc.string : memcpy;
3095       if (bufpos >= buf.length) {
3096         *pEOF_flag = true;
3097         return 0;
3098       }
3099       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3100       memcpy(pBuf, b, max_bytes_to_read);
3101       b += max_bytes_to_read;
3102       return max_bytes_to_read;
3103     },
3104     width, height, actual_comps, req_comps);
3105 }