1 // jpgd.h - C++ class for JPEG decompression.
2 // Rich Geldreich <richgel99@gmail.com>
3 // Alex Evans: Linear memory allocator (taken from jpge.h).
4 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
5 // D translation by Ketmar // Invisible Vector
6 //
7 // This is free and unencumbered software released into the public domain.
8 //
9 // Anyone is free to copy, modify, publish, use, compile, sell, or
10 // distribute this software, either in source code form or as a compiled
11 // binary, for any purpose, commercial or non-commercial, and by any
12 // means.
13 //
14 // In jurisdictions that recognize copyright laws, the author or authors
15 // of this software dedicate any and all copyright interest in the
16 // software to the public domain. We make this dedication for the benefit
17 // of the public at large and to the detriment of our heirs and
18 // successors. We intend this dedication to be an overt act of
19 // relinquishment in perpetuity of all present and future rights to this
20 // software under copyright law.
21 //
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 // OTHER DEALINGS IN THE SOFTWARE.
29 //
30 // For more information, please refer to <http://unlicense.org/>
31 //
32 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
33 //
34 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
35 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
36 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
37 /**
38  * Loads a JPEG image from a memory buffer or a file.
39  * req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
40  * On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
41  * Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
42  */
43 /// JPEG loader.
44 module dplug.graphics.jpegload;
45 
46 nothrow:
47 @nogc:
48 
49 // arsd.color stripped down
50 class TrueColorImage
51 {
52 nothrow:
53 @nogc:
54 	//ubyte[] data; // stored as rgba quads, upper left to right to bottom
55 	/// .
56 	struct Data {
57 		ubyte[] bytes; /// the data as rgba bytes. Stored left to right, top to bottom, no padding.
58 		// the union is no good because the length of the struct is wrong!
59 	}
60 
61 	/// .
62 	Data imageData;
63 	alias imageData.bytes data;
64 
65 	int _width;
66 	int _height;
67 
68 	/// .
69 	int width() const { return _width; }
70 	///.
71 	int height() const { return _height; }
72 
73 	/// Creates with existing data. The data pointer is stored here.
74 	this(int w, int h, ubyte[] data) {
75 		_width = w;
76 		_height = h;
77 		assert(data.length == w * h * 4);
78 		imageData.bytes = data;
79 	}
80 }
81 
82 
83 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
84 // This is slower, but results in higher quality on images with highly saturated colors.
85 //version = JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING;
86 
87 /// Input stream interface.
88 /// This delegate is called when the internal input buffer is empty.
89 /// Parameters:
90 ///   pBuf - input buffer
91 ///   max_bytes_to_read - maximum bytes that can be written to pBuf
92 ///   pEOF_flag - set this to true if at end of stream (no more bytes remaining)
93 ///   Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
94 ///   Notes: This delegate will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
95 alias JpegStreamReadFunc = int delegate (void* pBuf, int max_bytes_to_read, bool* pEOF_flag);
96 
97 
98 // ////////////////////////////////////////////////////////////////////////// //
99 private:
100 void *jpgd_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
101 void jpgd_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
102 
103 // Success/failure error codes.
104 alias jpgd_status = int;
105 enum /*jpgd_status*/ {
106   JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
107   JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
108   JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
109   JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
110   JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
111   JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
112   JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
113   JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
114   JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM,
115 }
116 
117 enum {
118   JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
119   JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384,
120 }
121 
122 // DCT coefficients are stored in this sequence.
123 static immutable int[64] g_ZAG = [  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
124 
125 alias JPEG_MARKER = int;
126 enum /*JPEG_MARKER*/ {
127   M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
128   M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
129   M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
130   M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
131   M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0,
132 }
133 
134 alias JPEG_SUBSAMPLING = int;
135 enum /*JPEG_SUBSAMPLING*/ { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
136 
137 enum CONST_BITS = 13;
138 enum PASS1_BITS = 2;
139 enum SCALEDONE = cast(int)1;
140 
141 enum FIX_0_298631336 = cast(int)2446;  /* FIX(0.298631336) */
142 enum FIX_0_390180644 = cast(int)3196;  /* FIX(0.390180644) */
143 enum FIX_0_541196100 = cast(int)4433;  /* FIX(0.541196100) */
144 enum FIX_0_765366865 = cast(int)6270;  /* FIX(0.765366865) */
145 enum FIX_0_899976223 = cast(int)7373;  /* FIX(0.899976223) */
146 enum FIX_1_175875602 = cast(int)9633;  /* FIX(1.175875602) */
147 enum FIX_1_501321110 = cast(int)12299; /* FIX(1.501321110) */
148 enum FIX_1_847759065 = cast(int)15137; /* FIX(1.847759065) */
149 enum FIX_1_961570560 = cast(int)16069; /* FIX(1.961570560) */
150 enum FIX_2_053119869 = cast(int)16819; /* FIX(2.053119869) */
151 enum FIX_2_562915447 = cast(int)20995; /* FIX(2.562915447) */
152 enum FIX_3_072711026 = cast(int)25172; /* FIX(3.072711026) */
153 
154 int DESCALE() (int x, int n) { pragma(inline, true); return (((x) + (SCALEDONE << ((n)-1))) >> (n)); }
155 int DESCALE_ZEROSHIFT() (int x, int n) { pragma(inline, true); return (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)); }
156 ubyte CLAMP() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i) >> 31) & 0xFF) : i); }
157 
158 
159 // Compiler creates a fast path 1D IDCT for X non-zero columns
160 struct Row(int NONZERO_COLS) {
161 pure nothrow @trusted @nogc:
162   static void idct(int* pTemp, const(jpeg_decoder.jpgd_block_t)* pSrc) {
163     static if (NONZERO_COLS == 0) {
164       // nothing
165     } else static if (NONZERO_COLS == 1) {
166       immutable int dcval = (pSrc[0] << PASS1_BITS);
167       pTemp[0] = dcval;
168       pTemp[1] = dcval;
169       pTemp[2] = dcval;
170       pTemp[3] = dcval;
171       pTemp[4] = dcval;
172       pTemp[5] = dcval;
173       pTemp[6] = dcval;
174       pTemp[7] = dcval;
175     } else {
176       // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
177       //#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
178       template ACCESS_COL(int x) {
179         static if (x < NONZERO_COLS) enum ACCESS_COL = "cast(int)pSrc["~x.stringof~"]"; else enum ACCESS_COL = "0";
180       }
181 
182       immutable int z2 = mixin(ACCESS_COL!2), z3 = mixin(ACCESS_COL!6);
183 
184       immutable int z1 = (z2 + z3)*FIX_0_541196100;
185       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
186       immutable int tmp3 = z1 + z2*FIX_0_765366865;
187 
188       immutable int tmp0 = (mixin(ACCESS_COL!0) + mixin(ACCESS_COL!4)) << CONST_BITS;
189       immutable int tmp1 = (mixin(ACCESS_COL!0) - mixin(ACCESS_COL!4)) << CONST_BITS;
190 
191       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
192 
193       immutable int atmp0 = mixin(ACCESS_COL!7), atmp1 = mixin(ACCESS_COL!5), atmp2 = mixin(ACCESS_COL!3), atmp3 = mixin(ACCESS_COL!1);
194 
195       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
196       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
197 
198       immutable int az1 = bz1*(-FIX_0_899976223);
199       immutable int az2 = bz2*(-FIX_2_562915447);
200       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
201       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
202 
203       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
204       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
205       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
206       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
207 
208       pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
209       pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
210       pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
211       pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
212       pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
213       pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
214       pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
215       pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
216     }
217   }
218 }
219 
220 
221 // Compiler creates a fast path 1D IDCT for X non-zero rows
222 struct Col (int NONZERO_ROWS) {
223 pure nothrow @trusted @nogc:
224   static void idct(ubyte* pDst_ptr, const(int)* pTemp) {
225     static assert(NONZERO_ROWS > 0);
226     static if (NONZERO_ROWS == 1) {
227       int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
228       immutable ubyte dcval_clamped = cast(ubyte)CLAMP(dcval);
229       pDst_ptr[0*8] = dcval_clamped;
230       pDst_ptr[1*8] = dcval_clamped;
231       pDst_ptr[2*8] = dcval_clamped;
232       pDst_ptr[3*8] = dcval_clamped;
233       pDst_ptr[4*8] = dcval_clamped;
234       pDst_ptr[5*8] = dcval_clamped;
235       pDst_ptr[6*8] = dcval_clamped;
236       pDst_ptr[7*8] = dcval_clamped;
237     } else {
238       // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
239       //#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
240       template ACCESS_ROW(int x) {
241         static if (x < NONZERO_ROWS) enum ACCESS_ROW = "pTemp["~(x*8).stringof~"]"; else enum ACCESS_ROW = "0";
242       }
243 
244       immutable int z2 = mixin(ACCESS_ROW!2);
245       immutable int z3 = mixin(ACCESS_ROW!6);
246 
247       immutable int z1 = (z2 + z3)*FIX_0_541196100;
248       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
249       immutable int tmp3 = z1 + z2*FIX_0_765366865;
250 
251       immutable int tmp0 = (mixin(ACCESS_ROW!0) + mixin(ACCESS_ROW!4)) << CONST_BITS;
252       immutable int tmp1 = (mixin(ACCESS_ROW!0) - mixin(ACCESS_ROW!4)) << CONST_BITS;
253 
254       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
255 
256       immutable int atmp0 = mixin(ACCESS_ROW!7), atmp1 = mixin(ACCESS_ROW!5), atmp2 = mixin(ACCESS_ROW!3), atmp3 = mixin(ACCESS_ROW!1);
257 
258       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
259       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
260 
261       immutable int az1 = bz1*(-FIX_0_899976223);
262       immutable int az2 = bz2*(-FIX_2_562915447);
263       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
264       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
265 
266       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
267       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
268       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
269       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
270 
271       int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
272       pDst_ptr[8*0] = cast(ubyte)CLAMP(i);
273 
274       i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
275       pDst_ptr[8*7] = cast(ubyte)CLAMP(i);
276 
277       i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
278       pDst_ptr[8*1] = cast(ubyte)CLAMP(i);
279 
280       i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
281       pDst_ptr[8*6] = cast(ubyte)CLAMP(i);
282 
283       i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
284       pDst_ptr[8*2] = cast(ubyte)CLAMP(i);
285 
286       i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
287       pDst_ptr[8*5] = cast(ubyte)CLAMP(i);
288 
289       i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
290       pDst_ptr[8*3] = cast(ubyte)CLAMP(i);
291 
292       i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
293       pDst_ptr[8*4] = cast(ubyte)CLAMP(i);
294     }
295   }
296 }
297 
298 
299 static immutable ubyte[512] s_idct_row_table = [
300   1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
301   4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
302   6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
303   6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
304   8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
305   8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
306   8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
307   8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
308 ];
309 
310 static immutable ubyte[64] s_idct_col_table = [ 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ];
311 
312 void idct() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr, int block_max_zag) {
313   assert(block_max_zag >= 1);
314   assert(block_max_zag <= 64);
315 
316   if (block_max_zag <= 1)
317   {
318     int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
319     k = CLAMP(k);
320     k = k | (k<<8);
321     k = k | (k<<16);
322 
323     for (int i = 8; i > 0; i--)
324     {
325       *cast(int*)&pDst_ptr[0] = k;
326       *cast(int*)&pDst_ptr[4] = k;
327       pDst_ptr += 8;
328     }
329     return;
330   }
331 
332   int[64] temp;
333 
334   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
335   int* pTemp = temp.ptr;
336 
337   const(ubyte)* pRow_tab = &s_idct_row_table.ptr[(block_max_zag - 1) * 8];
338   int i;
339   for (i = 8; i > 0; i--, pRow_tab++)
340   {
341     switch (*pRow_tab)
342     {
343       case 0: Row!(0).idct(pTemp, pSrc); break;
344       case 1: Row!(1).idct(pTemp, pSrc); break;
345       case 2: Row!(2).idct(pTemp, pSrc); break;
346       case 3: Row!(3).idct(pTemp, pSrc); break;
347       case 4: Row!(4).idct(pTemp, pSrc); break;
348       case 5: Row!(5).idct(pTemp, pSrc); break;
349       case 6: Row!(6).idct(pTemp, pSrc); break;
350       case 7: Row!(7).idct(pTemp, pSrc); break;
351       case 8: Row!(8).idct(pTemp, pSrc); break;
352       default: assert(0);
353     }
354 
355     pSrc += 8;
356     pTemp += 8;
357   }
358 
359   pTemp = temp.ptr;
360 
361   immutable int nonzero_rows = s_idct_col_table.ptr[block_max_zag - 1];
362   for (i = 8; i > 0; i--)
363   {
364     switch (nonzero_rows)
365     {
366       case 1: Col!(1).idct(pDst_ptr, pTemp); break;
367       case 2: Col!(2).idct(pDst_ptr, pTemp); break;
368       case 3: Col!(3).idct(pDst_ptr, pTemp); break;
369       case 4: Col!(4).idct(pDst_ptr, pTemp); break;
370       case 5: Col!(5).idct(pDst_ptr, pTemp); break;
371       case 6: Col!(6).idct(pDst_ptr, pTemp); break;
372       case 7: Col!(7).idct(pDst_ptr, pTemp); break;
373       case 8: Col!(8).idct(pDst_ptr, pTemp); break;
374       default: assert(0);
375     }
376 
377     pTemp++;
378     pDst_ptr++;
379   }
380 }
381 
382 void idct_4x4() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr) {
383   int[64] temp;
384   int* pTemp = temp.ptr;
385   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
386 
387   for (int i = 4; i > 0; i--)
388   {
389     Row!(4).idct(pTemp, pSrc);
390     pSrc += 8;
391     pTemp += 8;
392   }
393 
394   pTemp = temp.ptr;
395   for (int i = 8; i > 0; i--)
396   {
397     Col!(4).idct(pDst_ptr, pTemp);
398     pTemp++;
399     pDst_ptr++;
400   }
401 }
402 
403 
404 // ////////////////////////////////////////////////////////////////////////// //
405 struct jpeg_decoder {
406 nothrow:
407 @nogc:
408 
409 private import core.stdc..string : memcpy, memset;
410 private:
411   static auto JPGD_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
412   static auto JPGD_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
413 
414   alias jpgd_quant_t = short;
415   alias jpgd_block_t = short;
416   alias pDecode_block_func = void function (ref jpeg_decoder, int, int, int);
417 
418   static struct huff_tables {
419     bool ac_table;
420     uint[256] look_up;
421     uint[256] look_up2;
422     ubyte[256] code_size;
423     uint[512] tree;
424   }
425 
426   static struct coeff_buf {
427     ubyte* pData;
428     int block_num_x, block_num_y;
429     int block_len_x, block_len_y;
430     int block_size;
431   }
432 
433   static struct mem_block {
434     mem_block* m_pNext;
435     size_t m_used_count;
436     size_t m_size;
437     char[1] m_data;
438   }
439 
440   mem_block* m_pMem_blocks;
441   int m_image_x_size;
442   int m_image_y_size;
443   JpegStreamReadFunc readfn;
444   int m_progressive_flag;
445   ubyte[JPGD_MAX_HUFF_TABLES] m_huff_ac;
446   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_num;      // pointer to number of Huffman codes per bit size
447   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_val;      // pointer to Huffman codes per bit size
448   jpgd_quant_t*[JPGD_MAX_QUANT_TABLES] m_quant; // pointer to quantization tables
449   int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
450   int m_comps_in_frame;                         // # of components in frame
451   int[JPGD_MAX_COMPONENTS] m_comp_h_samp;       // component's horizontal sampling factor
452   int[JPGD_MAX_COMPONENTS] m_comp_v_samp;       // component's vertical sampling factor
453   int[JPGD_MAX_COMPONENTS] m_comp_quant;        // component's quantization table selector
454   int[JPGD_MAX_COMPONENTS] m_comp_ident;        // component's ID
455   int[JPGD_MAX_COMPONENTS] m_comp_h_blocks;
456   int[JPGD_MAX_COMPONENTS] m_comp_v_blocks;
457   int m_comps_in_scan;                          // # of components in scan
458   int[JPGD_MAX_COMPS_IN_SCAN] m_comp_list;      // components in this scan
459   int[JPGD_MAX_COMPONENTS] m_comp_dc_tab;       // component's DC Huffman coding table selector
460   int[JPGD_MAX_COMPONENTS] m_comp_ac_tab;       // component's AC Huffman coding table selector
461   int m_spectral_start;                         // spectral selection start
462   int m_spectral_end;                           // spectral selection end
463   int m_successive_low;                         // successive approximation low
464   int m_successive_high;                        // successive approximation high
465   int m_max_mcu_x_size;                         // MCU's max. X size in pixels
466   int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
467   int m_blocks_per_mcu;
468   int m_max_blocks_per_row;
469   int m_mcus_per_row, m_mcus_per_col;
470   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_org;
471   int m_total_lines_left;                       // total # lines left in image
472   int m_mcu_lines_left;                         // total # lines left in this MCU
473   int m_real_dest_bytes_per_scan_line;
474   int m_dest_bytes_per_scan_line;               // rounded up
475   int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
476   huff_tables*[JPGD_MAX_HUFF_TABLES] m_pHuff_tabs;
477   coeff_buf*[JPGD_MAX_COMPONENTS] m_dc_coeffs;
478   coeff_buf*[JPGD_MAX_COMPONENTS] m_ac_coeffs;
479   int m_eob_run;
480   int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
481   ubyte* m_pIn_buf_ofs;
482   int m_in_buf_left;
483   int m_tem_flag;
484   bool m_eof_flag;
485   ubyte[128] m_in_buf_pad_start;
486   ubyte[JPGD_IN_BUF_SIZE+128] m_in_buf;
487   ubyte[128] m_in_buf_pad_end;
488   int m_bits_left;
489   uint m_bit_buf;
490   int m_restart_interval;
491   int m_restarts_left;
492   int m_next_restart_num;
493   int m_max_mcus_per_row;
494   int m_max_blocks_per_mcu;
495   int m_expanded_blocks_per_mcu;
496   int m_expanded_blocks_per_row;
497   int m_expanded_blocks_per_component;
498   bool m_freq_domain_chroma_upsample;
499   int m_max_mcus_per_col;
500   uint[JPGD_MAX_COMPONENTS] m_last_dc_val;
501   jpgd_block_t* m_pMCU_coefficients;
502   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_block_max_zag;
503   ubyte* m_pSample_buf;
504   int[256] m_crr;
505   int[256] m_cbb;
506   int[256] m_crg;
507   int[256] m_cbg;
508   ubyte* m_pScan_line_0;
509   ubyte* m_pScan_line_1;
510   jpgd_status m_error_code;
511   bool m_ready_flag;
512   int m_total_bytes_read;
513 
514 public:
515   // Inspect `error_code` after constructing to determine if the stream is valid or not. You may look at the `width`, `height`, etc.
516   // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
517   this (JpegStreamReadFunc rfn) { decode_init(rfn); }
518 
519   ~this () { free_all_blocks(); }
520 
521   @disable this (this); // no copies
522 
523   // Call this method after constructing the object to begin decompression.
524   // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
525   int begin_decoding () {
526     if (m_ready_flag) return JPGD_SUCCESS;
527     if (m_error_code) return JPGD_FAILED;
528 
529     decode_start();
530     m_ready_flag = true;
531     return JPGD_SUCCESS;
532   }
533 
534   // Returns the next scan line.
535   // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (`bytes_per_pixel` will return 1).
536   // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and `bytes_per_pixel` will return 4).
537   // Returns JPGD_SUCCESS if a scan line has been returned.
538   // Returns JPGD_DONE if all scan lines have been returned.
539   // Returns JPGD_FAILED if an error occurred. Inspect `error_code` for a more info.
540   int decode (/*const void** */void** pScan_line, uint* pScan_line_len) {
541     if (m_error_code || !m_ready_flag) return JPGD_FAILED;
542     if (m_total_lines_left == 0) return JPGD_DONE;
543 
544       if (m_mcu_lines_left == 0) {
545         if (m_progressive_flag) load_next_row(); else decode_next_row();
546         // Find the EOI marker if that was the last row.
547         if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
548         m_mcu_lines_left = m_max_mcu_y_size;
549       }
550       if (m_freq_domain_chroma_upsample) {
551         expanded_convert();
552         *pScan_line = m_pScan_line_0;
553       } else {
554         switch (m_scan_type) {
555           case JPGD_YH2V2:
556             if ((m_mcu_lines_left & 1) == 0) {
557               H2V2Convert();
558               *pScan_line = m_pScan_line_0;
559             } else {
560               *pScan_line = m_pScan_line_1;
561             }
562             break;
563           case JPGD_YH2V1:
564             H2V1Convert();
565             *pScan_line = m_pScan_line_0;
566             break;
567           case JPGD_YH1V2:
568             if ((m_mcu_lines_left & 1) == 0) {
569               H1V2Convert();
570               *pScan_line = m_pScan_line_0;
571             } else {
572               *pScan_line = m_pScan_line_1;
573             }
574             break;
575           case JPGD_YH1V1:
576             H1V1Convert();
577             *pScan_line = m_pScan_line_0;
578             break;
579           case JPGD_GRAYSCALE:
580             gray_convert();
581             *pScan_line = m_pScan_line_0;
582             break;
583           default:
584         }
585       }
586       *pScan_line_len = m_real_dest_bytes_per_scan_line;
587       --m_mcu_lines_left;
588       --m_total_lines_left;
589       return JPGD_SUCCESS;
590   }
591 
592   @property const pure nothrow @safe @nogc {
593     jpgd_status error_code () { pragma(inline, true); return m_error_code; }
594 
595     int width () { pragma(inline, true); return m_image_x_size; }
596     int height () { pragma(inline, true); return m_image_y_size; }
597 
598     int num_components () { pragma(inline, true); return m_comps_in_frame; }
599 
600     int bytes_per_pixel () { pragma(inline, true); return m_dest_bytes_per_pixel; }
601     int bytes_per_scan_line () { pragma(inline, true); return m_image_x_size * bytes_per_pixel(); }
602 
603     // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
604     int total_bytes_read () { pragma(inline, true); return m_total_bytes_read; }
605   }
606 
607 private:
608   // Retrieve one character from the input stream.
609   uint get_char () {
610     // Any bytes remaining in buffer?
611     if (!m_in_buf_left) {
612       // Try to get more bytes.
613       prep_in_buffer();
614       // Still nothing to get?
615       if (!m_in_buf_left) {
616         // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
617         int t = m_tem_flag;
618         m_tem_flag ^= 1;
619         return (t ? 0xD9 : 0xFF);
620       }
621     }
622     uint c = *m_pIn_buf_ofs++;
623     --m_in_buf_left;
624     return c;
625   }
626 
627   // Same as previous method, except can indicate if the character is a pad character or not.
628   uint get_char (bool* pPadding_flag) {
629     if (!m_in_buf_left) {
630       prep_in_buffer();
631       if (!m_in_buf_left) {
632         *pPadding_flag = true;
633         int t = m_tem_flag;
634         m_tem_flag ^= 1;
635         return (t ? 0xD9 : 0xFF);
636       }
637     }
638     *pPadding_flag = false;
639     uint c = *m_pIn_buf_ofs++;
640     --m_in_buf_left;
641     return c;
642   }
643 
644   // Inserts a previously retrieved character back into the input buffer.
645   void stuff_char (ubyte q) {
646     *(--m_pIn_buf_ofs) = q;
647     m_in_buf_left++;
648   }
649 
650   // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
651   ubyte get_octet () {
652     bool padding_flag;
653     int c = get_char(&padding_flag);
654     if (c == 0xFF) {
655       if (padding_flag) return 0xFF;
656       c = get_char(&padding_flag);
657       if (padding_flag) { stuff_char(0xFF); return 0xFF; }
658       if (c == 0x00) return 0xFF;
659       stuff_char(cast(ubyte)(c));
660       stuff_char(0xFF);
661       return 0xFF;
662     }
663     return cast(ubyte)(c);
664   }
665 
666   // Retrieves a variable number of bits from the input stream. Does not recognize markers.
667   uint get_bits (int num_bits) {
668     if (!num_bits) return 0;
669     uint i = m_bit_buf >> (32 - num_bits);
670     if ((m_bits_left -= num_bits) <= 0) {
671       m_bit_buf <<= (num_bits += m_bits_left);
672       uint c1 = get_char();
673       uint c2 = get_char();
674       m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
675       m_bit_buf <<= -m_bits_left;
676       m_bits_left += 16;
677       assert(m_bits_left >= 0);
678     } else {
679       m_bit_buf <<= num_bits;
680     }
681     return i;
682   }
683 
684   // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
685   uint get_bits_no_markers (int num_bits) {
686     if (!num_bits) return 0;
687     uint i = m_bit_buf >> (32 - num_bits);
688     if ((m_bits_left -= num_bits) <= 0) {
689       m_bit_buf <<= (num_bits += m_bits_left);
690       if (m_in_buf_left < 2 || m_pIn_buf_ofs[0] == 0xFF || m_pIn_buf_ofs[1] == 0xFF) {
691         uint c1 = get_octet();
692         uint c2 = get_octet();
693         m_bit_buf |= (c1 << 8) | c2;
694       } else {
695         m_bit_buf |= (cast(uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
696         m_in_buf_left -= 2;
697         m_pIn_buf_ofs += 2;
698       }
699       m_bit_buf <<= -m_bits_left;
700       m_bits_left += 16;
701       assert(m_bits_left >= 0);
702     } else {
703       m_bit_buf <<= num_bits;
704     }
705     return i;
706   }
707 
708   // Decodes a Huffman encoded symbol.
709   int huff_decode (huff_tables *pH) {
710     int symbol;
711     // Check first 8-bits: do we have a complete symbol?
712     if ((symbol = pH.look_up.ptr[m_bit_buf >> 24]) < 0) {
713       // Decode more bits, use a tree traversal to find symbol.
714       int ofs = 23;
715       do {
716         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
717         --ofs;
718       } while (symbol < 0);
719       get_bits_no_markers(8 + (23 - ofs));
720     } else {
721       get_bits_no_markers(pH.code_size.ptr[symbol]);
722     }
723     return symbol;
724   }
725 
726   // Decodes a Huffman encoded symbol.
727   int huff_decode (huff_tables *pH, ref int extra_bits) {
728     int symbol;
729     // Check first 8-bits: do we have a complete symbol?
730     if ((symbol = pH.look_up2.ptr[m_bit_buf >> 24]) < 0) {
731       // Use a tree traversal to find symbol.
732       int ofs = 23;
733       do {
734         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
735         --ofs;
736       } while (symbol < 0);
737       get_bits_no_markers(8 + (23 - ofs));
738       extra_bits = get_bits_no_markers(symbol & 0xF);
739     } else {
740       assert(((symbol >> 8) & 31) == pH.code_size.ptr[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
741       if (symbol & 0x8000) {
742         get_bits_no_markers((symbol >> 8) & 31);
743         extra_bits = symbol >> 16;
744       } else {
745         int code_size = (symbol >> 8) & 31;
746         int num_extra_bits = symbol & 0xF;
747         int bits = code_size + num_extra_bits;
748         if (bits <= (m_bits_left + 16)) {
749           extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
750         } else {
751           get_bits_no_markers(code_size);
752           extra_bits = get_bits_no_markers(num_extra_bits);
753         }
754       }
755       symbol &= 0xFF;
756     }
757     return symbol;
758   }
759 
760   // Tables and macro used to fully decode the DPCM differences.
761   static immutable int[16] s_extend_test = [ 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 ];
762   static immutable int[16] s_extend_offset = [ 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 ];
763   static immutable int[18] s_extend_mask = [ 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) ];
764   // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
765   //#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
766   static JPGD_HUFF_EXTEND (int x, int s) nothrow @trusted @nogc { pragma(inline, true); return (((x) < s_extend_test.ptr[s & 15]) ? ((x) + s_extend_offset.ptr[s & 15]) : (x)); }
767 
768   // Clamps a value between 0-255.
769   //static ubyte clamp (int i) { if (cast(uint)(i) > 255) i = (((~i) >> 31) & 0xFF); return cast(ubyte)(i); }
770   alias clamp = CLAMP;
771 
772   static struct DCT_Upsample {
773   static:
774     static struct Matrix44 {
775     pure nothrow @trusted @nogc:
776       alias Element_Type = int;
777       enum { NUM_ROWS = 4, NUM_COLS = 4 }
778 
779       Element_Type[NUM_COLS][NUM_ROWS] v;
780 
781       this() (in auto ref Matrix44 m) {
782         foreach (immutable r; 0..NUM_ROWS) v[r][] = m.v[r][];
783       }
784 
785       //@property int rows () const { pragma(inline, true); return NUM_ROWS; }
786       //@property int cols () const { pragma(inline, true); return NUM_COLS; }
787 
788       ref inout(Element_Type) at (int r, int c) inout { pragma(inline, true); return v.ptr[r].ptr[c]; }
789 
790       ref Matrix44 opOpAssign(string op:"+") (in auto ref Matrix44 a) {
791         foreach (int r; 0..NUM_ROWS) {
792           at(r, 0) += a.at(r, 0);
793           at(r, 1) += a.at(r, 1);
794           at(r, 2) += a.at(r, 2);
795           at(r, 3) += a.at(r, 3);
796         }
797         return this;
798       }
799 
800       ref Matrix44 opOpAssign(string op:"-") (in auto ref Matrix44 a) {
801         foreach (int r; 0..NUM_ROWS) {
802           at(r, 0) -= a.at(r, 0);
803           at(r, 1) -= a.at(r, 1);
804           at(r, 2) -= a.at(r, 2);
805           at(r, 3) -= a.at(r, 3);
806         }
807         return this;
808       }
809 
810       Matrix44 opBinary(string op:"+") (in auto ref Matrix44 b) const {
811         alias a = this;
812         Matrix44 ret;
813         foreach (int r; 0..NUM_ROWS) {
814           ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
815           ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
816           ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
817           ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
818         }
819         return ret;
820       }
821 
822       Matrix44 opBinary(string op:"-") (in auto ref Matrix44 b) const {
823         alias a = this;
824         Matrix44 ret;
825         foreach (int r; 0..NUM_ROWS) {
826           ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
827           ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
828           ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
829           ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
830         }
831         return ret;
832       }
833 
834       static void add_and_store() (jpgd_block_t* pDst, in auto ref Matrix44 a, in auto ref Matrix44 b) {
835         foreach (int r; 0..4) {
836           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) + b.at(r, 0));
837           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) + b.at(r, 1));
838           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) + b.at(r, 2));
839           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) + b.at(r, 3));
840         }
841       }
842 
843       static void sub_and_store() (jpgd_block_t* pDst, in auto ref Matrix44 a, in auto ref Matrix44 b) {
844         foreach (int r; 0..4) {
845           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) - b.at(r, 0));
846           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) - b.at(r, 1));
847           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) - b.at(r, 2));
848           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) - b.at(r, 3));
849         }
850       }
851     }
852 
853     enum FRACT_BITS = 10;
854     enum SCALE = 1 << FRACT_BITS;
855 
856     alias Temp_Type = int;
857 
858     static int D(T) (T i) { pragma(inline, true); return (((i) + (SCALE >> 1)) >> FRACT_BITS); }
859     enum F(float i) = (cast(int)((i) * SCALE + 0.5f));
860 
861     // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
862     static struct P_Q(int NUM_ROWS, int NUM_COLS) {
863       static void calc (ref Matrix44 P, ref Matrix44 Q, const(jpgd_block_t)* pSrc) {
864         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
865         template AT(int c, int r) {
866           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
867         }
868         // 4x8 = 4x8 times 8x8, matrix 0 is constant
869         immutable Temp_Type X000 = mixin(AT!(0, 0));
870         immutable Temp_Type X001 = mixin(AT!(0, 1));
871         immutable Temp_Type X002 = mixin(AT!(0, 2));
872         immutable Temp_Type X003 = mixin(AT!(0, 3));
873         immutable Temp_Type X004 = mixin(AT!(0, 4));
874         immutable Temp_Type X005 = mixin(AT!(0, 5));
875         immutable Temp_Type X006 = mixin(AT!(0, 6));
876         immutable Temp_Type X007 = mixin(AT!(0, 7));
877         immutable Temp_Type X010 = D(F!(0.415735f) * mixin(AT!(1, 0)) + F!(0.791065f) * mixin(AT!(3, 0)) + F!(-0.352443f) * mixin(AT!(5, 0)) + F!(0.277785f) * mixin(AT!(7, 0)));
878         immutable Temp_Type X011 = D(F!(0.415735f) * mixin(AT!(1, 1)) + F!(0.791065f) * mixin(AT!(3, 1)) + F!(-0.352443f) * mixin(AT!(5, 1)) + F!(0.277785f) * mixin(AT!(7, 1)));
879         immutable Temp_Type X012 = D(F!(0.415735f) * mixin(AT!(1, 2)) + F!(0.791065f) * mixin(AT!(3, 2)) + F!(-0.352443f) * mixin(AT!(5, 2)) + F!(0.277785f) * mixin(AT!(7, 2)));
880         immutable Temp_Type X013 = D(F!(0.415735f) * mixin(AT!(1, 3)) + F!(0.791065f) * mixin(AT!(3, 3)) + F!(-0.352443f) * mixin(AT!(5, 3)) + F!(0.277785f) * mixin(AT!(7, 3)));
881         immutable Temp_Type X014 = D(F!(0.415735f) * mixin(AT!(1, 4)) + F!(0.791065f) * mixin(AT!(3, 4)) + F!(-0.352443f) * mixin(AT!(5, 4)) + F!(0.277785f) * mixin(AT!(7, 4)));
882         immutable Temp_Type X015 = D(F!(0.415735f) * mixin(AT!(1, 5)) + F!(0.791065f) * mixin(AT!(3, 5)) + F!(-0.352443f) * mixin(AT!(5, 5)) + F!(0.277785f) * mixin(AT!(7, 5)));
883         immutable Temp_Type X016 = D(F!(0.415735f) * mixin(AT!(1, 6)) + F!(0.791065f) * mixin(AT!(3, 6)) + F!(-0.352443f) * mixin(AT!(5, 6)) + F!(0.277785f) * mixin(AT!(7, 6)));
884         immutable Temp_Type X017 = D(F!(0.415735f) * mixin(AT!(1, 7)) + F!(0.791065f) * mixin(AT!(3, 7)) + F!(-0.352443f) * mixin(AT!(5, 7)) + F!(0.277785f) * mixin(AT!(7, 7)));
885         immutable Temp_Type X020 = mixin(AT!(4, 0));
886         immutable Temp_Type X021 = mixin(AT!(4, 1));
887         immutable Temp_Type X022 = mixin(AT!(4, 2));
888         immutable Temp_Type X023 = mixin(AT!(4, 3));
889         immutable Temp_Type X024 = mixin(AT!(4, 4));
890         immutable Temp_Type X025 = mixin(AT!(4, 5));
891         immutable Temp_Type X026 = mixin(AT!(4, 6));
892         immutable Temp_Type X027 = mixin(AT!(4, 7));
893         immutable Temp_Type X030 = D(F!(0.022887f) * mixin(AT!(1, 0)) + F!(-0.097545f) * mixin(AT!(3, 0)) + F!(0.490393f) * mixin(AT!(5, 0)) + F!(0.865723f) * mixin(AT!(7, 0)));
894         immutable Temp_Type X031 = D(F!(0.022887f) * mixin(AT!(1, 1)) + F!(-0.097545f) * mixin(AT!(3, 1)) + F!(0.490393f) * mixin(AT!(5, 1)) + F!(0.865723f) * mixin(AT!(7, 1)));
895         immutable Temp_Type X032 = D(F!(0.022887f) * mixin(AT!(1, 2)) + F!(-0.097545f) * mixin(AT!(3, 2)) + F!(0.490393f) * mixin(AT!(5, 2)) + F!(0.865723f) * mixin(AT!(7, 2)));
896         immutable Temp_Type X033 = D(F!(0.022887f) * mixin(AT!(1, 3)) + F!(-0.097545f) * mixin(AT!(3, 3)) + F!(0.490393f) * mixin(AT!(5, 3)) + F!(0.865723f) * mixin(AT!(7, 3)));
897         immutable Temp_Type X034 = D(F!(0.022887f) * mixin(AT!(1, 4)) + F!(-0.097545f) * mixin(AT!(3, 4)) + F!(0.490393f) * mixin(AT!(5, 4)) + F!(0.865723f) * mixin(AT!(7, 4)));
898         immutable Temp_Type X035 = D(F!(0.022887f) * mixin(AT!(1, 5)) + F!(-0.097545f) * mixin(AT!(3, 5)) + F!(0.490393f) * mixin(AT!(5, 5)) + F!(0.865723f) * mixin(AT!(7, 5)));
899         immutable Temp_Type X036 = D(F!(0.022887f) * mixin(AT!(1, 6)) + F!(-0.097545f) * mixin(AT!(3, 6)) + F!(0.490393f) * mixin(AT!(5, 6)) + F!(0.865723f) * mixin(AT!(7, 6)));
900         immutable Temp_Type X037 = D(F!(0.022887f) * mixin(AT!(1, 7)) + F!(-0.097545f) * mixin(AT!(3, 7)) + F!(0.490393f) * mixin(AT!(5, 7)) + F!(0.865723f) * mixin(AT!(7, 7)));
901 
902         // 4x4 = 4x8 times 8x4, matrix 1 is constant
903         P.at(0, 0) = X000;
904         P.at(0, 1) = D(X001 * F!(0.415735f) + X003 * F!(0.791065f) + X005 * F!(-0.352443f) + X007 * F!(0.277785f));
905         P.at(0, 2) = X004;
906         P.at(0, 3) = D(X001 * F!(0.022887f) + X003 * F!(-0.097545f) + X005 * F!(0.490393f) + X007 * F!(0.865723f));
907         P.at(1, 0) = X010;
908         P.at(1, 1) = D(X011 * F!(0.415735f) + X013 * F!(0.791065f) + X015 * F!(-0.352443f) + X017 * F!(0.277785f));
909         P.at(1, 2) = X014;
910         P.at(1, 3) = D(X011 * F!(0.022887f) + X013 * F!(-0.097545f) + X015 * F!(0.490393f) + X017 * F!(0.865723f));
911         P.at(2, 0) = X020;
912         P.at(2, 1) = D(X021 * F!(0.415735f) + X023 * F!(0.791065f) + X025 * F!(-0.352443f) + X027 * F!(0.277785f));
913         P.at(2, 2) = X024;
914         P.at(2, 3) = D(X021 * F!(0.022887f) + X023 * F!(-0.097545f) + X025 * F!(0.490393f) + X027 * F!(0.865723f));
915         P.at(3, 0) = X030;
916         P.at(3, 1) = D(X031 * F!(0.415735f) + X033 * F!(0.791065f) + X035 * F!(-0.352443f) + X037 * F!(0.277785f));
917         P.at(3, 2) = X034;
918         P.at(3, 3) = D(X031 * F!(0.022887f) + X033 * F!(-0.097545f) + X035 * F!(0.490393f) + X037 * F!(0.865723f));
919         // 40 muls 24 adds
920 
921         // 4x4 = 4x8 times 8x4, matrix 1 is constant
922         Q.at(0, 0) = D(X001 * F!(0.906127f) + X003 * F!(-0.318190f) + X005 * F!(0.212608f) + X007 * F!(-0.180240f));
923         Q.at(0, 1) = X002;
924         Q.at(0, 2) = D(X001 * F!(-0.074658f) + X003 * F!(0.513280f) + X005 * F!(0.768178f) + X007 * F!(-0.375330f));
925         Q.at(0, 3) = X006;
926         Q.at(1, 0) = D(X011 * F!(0.906127f) + X013 * F!(-0.318190f) + X015 * F!(0.212608f) + X017 * F!(-0.180240f));
927         Q.at(1, 1) = X012;
928         Q.at(1, 2) = D(X011 * F!(-0.074658f) + X013 * F!(0.513280f) + X015 * F!(0.768178f) + X017 * F!(-0.375330f));
929         Q.at(1, 3) = X016;
930         Q.at(2, 0) = D(X021 * F!(0.906127f) + X023 * F!(-0.318190f) + X025 * F!(0.212608f) + X027 * F!(-0.180240f));
931         Q.at(2, 1) = X022;
932         Q.at(2, 2) = D(X021 * F!(-0.074658f) + X023 * F!(0.513280f) + X025 * F!(0.768178f) + X027 * F!(-0.375330f));
933         Q.at(2, 3) = X026;
934         Q.at(3, 0) = D(X031 * F!(0.906127f) + X033 * F!(-0.318190f) + X035 * F!(0.212608f) + X037 * F!(-0.180240f));
935         Q.at(3, 1) = X032;
936         Q.at(3, 2) = D(X031 * F!(-0.074658f) + X033 * F!(0.513280f) + X035 * F!(0.768178f) + X037 * F!(-0.375330f));
937         Q.at(3, 3) = X036;
938         // 40 muls 24 adds
939       }
940     }
941 
942     static struct R_S(int NUM_ROWS, int NUM_COLS) {
943       static void calc(ref Matrix44 R, ref Matrix44 S, const(jpgd_block_t)* pSrc) {
944         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
945         template AT(int c, int r) {
946           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
947         }
948         // 4x8 = 4x8 times 8x8, matrix 0 is constant
949         immutable Temp_Type X100 = D(F!(0.906127f) * mixin(AT!(1, 0)) + F!(-0.318190f) * mixin(AT!(3, 0)) + F!(0.212608f) * mixin(AT!(5, 0)) + F!(-0.180240f) * mixin(AT!(7, 0)));
950         immutable Temp_Type X101 = D(F!(0.906127f) * mixin(AT!(1, 1)) + F!(-0.318190f) * mixin(AT!(3, 1)) + F!(0.212608f) * mixin(AT!(5, 1)) + F!(-0.180240f) * mixin(AT!(7, 1)));
951         immutable Temp_Type X102 = D(F!(0.906127f) * mixin(AT!(1, 2)) + F!(-0.318190f) * mixin(AT!(3, 2)) + F!(0.212608f) * mixin(AT!(5, 2)) + F!(-0.180240f) * mixin(AT!(7, 2)));
952         immutable Temp_Type X103 = D(F!(0.906127f) * mixin(AT!(1, 3)) + F!(-0.318190f) * mixin(AT!(3, 3)) + F!(0.212608f) * mixin(AT!(5, 3)) + F!(-0.180240f) * mixin(AT!(7, 3)));
953         immutable Temp_Type X104 = D(F!(0.906127f) * mixin(AT!(1, 4)) + F!(-0.318190f) * mixin(AT!(3, 4)) + F!(0.212608f) * mixin(AT!(5, 4)) + F!(-0.180240f) * mixin(AT!(7, 4)));
954         immutable Temp_Type X105 = D(F!(0.906127f) * mixin(AT!(1, 5)) + F!(-0.318190f) * mixin(AT!(3, 5)) + F!(0.212608f) * mixin(AT!(5, 5)) + F!(-0.180240f) * mixin(AT!(7, 5)));
955         immutable Temp_Type X106 = D(F!(0.906127f) * mixin(AT!(1, 6)) + F!(-0.318190f) * mixin(AT!(3, 6)) + F!(0.212608f) * mixin(AT!(5, 6)) + F!(-0.180240f) * mixin(AT!(7, 6)));
956         immutable Temp_Type X107 = D(F!(0.906127f) * mixin(AT!(1, 7)) + F!(-0.318190f) * mixin(AT!(3, 7)) + F!(0.212608f) * mixin(AT!(5, 7)) + F!(-0.180240f) * mixin(AT!(7, 7)));
957         immutable Temp_Type X110 = mixin(AT!(2, 0));
958         immutable Temp_Type X111 = mixin(AT!(2, 1));
959         immutable Temp_Type X112 = mixin(AT!(2, 2));
960         immutable Temp_Type X113 = mixin(AT!(2, 3));
961         immutable Temp_Type X114 = mixin(AT!(2, 4));
962         immutable Temp_Type X115 = mixin(AT!(2, 5));
963         immutable Temp_Type X116 = mixin(AT!(2, 6));
964         immutable Temp_Type X117 = mixin(AT!(2, 7));
965         immutable Temp_Type X120 = D(F!(-0.074658f) * mixin(AT!(1, 0)) + F!(0.513280f) * mixin(AT!(3, 0)) + F!(0.768178f) * mixin(AT!(5, 0)) + F!(-0.375330f) * mixin(AT!(7, 0)));
966         immutable Temp_Type X121 = D(F!(-0.074658f) * mixin(AT!(1, 1)) + F!(0.513280f) * mixin(AT!(3, 1)) + F!(0.768178f) * mixin(AT!(5, 1)) + F!(-0.375330f) * mixin(AT!(7, 1)));
967         immutable Temp_Type X122 = D(F!(-0.074658f) * mixin(AT!(1, 2)) + F!(0.513280f) * mixin(AT!(3, 2)) + F!(0.768178f) * mixin(AT!(5, 2)) + F!(-0.375330f) * mixin(AT!(7, 2)));
968         immutable Temp_Type X123 = D(F!(-0.074658f) * mixin(AT!(1, 3)) + F!(0.513280f) * mixin(AT!(3, 3)) + F!(0.768178f) * mixin(AT!(5, 3)) + F!(-0.375330f) * mixin(AT!(7, 3)));
969         immutable Temp_Type X124 = D(F!(-0.074658f) * mixin(AT!(1, 4)) + F!(0.513280f) * mixin(AT!(3, 4)) + F!(0.768178f) * mixin(AT!(5, 4)) + F!(-0.375330f) * mixin(AT!(7, 4)));
970         immutable Temp_Type X125 = D(F!(-0.074658f) * mixin(AT!(1, 5)) + F!(0.513280f) * mixin(AT!(3, 5)) + F!(0.768178f) * mixin(AT!(5, 5)) + F!(-0.375330f) * mixin(AT!(7, 5)));
971         immutable Temp_Type X126 = D(F!(-0.074658f) * mixin(AT!(1, 6)) + F!(0.513280f) * mixin(AT!(3, 6)) + F!(0.768178f) * mixin(AT!(5, 6)) + F!(-0.375330f) * mixin(AT!(7, 6)));
972         immutable Temp_Type X127 = D(F!(-0.074658f) * mixin(AT!(1, 7)) + F!(0.513280f) * mixin(AT!(3, 7)) + F!(0.768178f) * mixin(AT!(5, 7)) + F!(-0.375330f) * mixin(AT!(7, 7)));
973         immutable Temp_Type X130 = mixin(AT!(6, 0));
974         immutable Temp_Type X131 = mixin(AT!(6, 1));
975         immutable Temp_Type X132 = mixin(AT!(6, 2));
976         immutable Temp_Type X133 = mixin(AT!(6, 3));
977         immutable Temp_Type X134 = mixin(AT!(6, 4));
978         immutable Temp_Type X135 = mixin(AT!(6, 5));
979         immutable Temp_Type X136 = mixin(AT!(6, 6));
980         immutable Temp_Type X137 = mixin(AT!(6, 7));
981         // 80 muls 48 adds
982 
983         // 4x4 = 4x8 times 8x4, matrix 1 is constant
984         R.at(0, 0) = X100;
985         R.at(0, 1) = D(X101 * F!(0.415735f) + X103 * F!(0.791065f) + X105 * F!(-0.352443f) + X107 * F!(0.277785f));
986         R.at(0, 2) = X104;
987         R.at(0, 3) = D(X101 * F!(0.022887f) + X103 * F!(-0.097545f) + X105 * F!(0.490393f) + X107 * F!(0.865723f));
988         R.at(1, 0) = X110;
989         R.at(1, 1) = D(X111 * F!(0.415735f) + X113 * F!(0.791065f) + X115 * F!(-0.352443f) + X117 * F!(0.277785f));
990         R.at(1, 2) = X114;
991         R.at(1, 3) = D(X111 * F!(0.022887f) + X113 * F!(-0.097545f) + X115 * F!(0.490393f) + X117 * F!(0.865723f));
992         R.at(2, 0) = X120;
993         R.at(2, 1) = D(X121 * F!(0.415735f) + X123 * F!(0.791065f) + X125 * F!(-0.352443f) + X127 * F!(0.277785f));
994         R.at(2, 2) = X124;
995         R.at(2, 3) = D(X121 * F!(0.022887f) + X123 * F!(-0.097545f) + X125 * F!(0.490393f) + X127 * F!(0.865723f));
996         R.at(3, 0) = X130;
997         R.at(3, 1) = D(X131 * F!(0.415735f) + X133 * F!(0.791065f) + X135 * F!(-0.352443f) + X137 * F!(0.277785f));
998         R.at(3, 2) = X134;
999         R.at(3, 3) = D(X131 * F!(0.022887f) + X133 * F!(-0.097545f) + X135 * F!(0.490393f) + X137 * F!(0.865723f));
1000         // 40 muls 24 adds
1001         // 4x4 = 4x8 times 8x4, matrix 1 is constant
1002         S.at(0, 0) = D(X101 * F!(0.906127f) + X103 * F!(-0.318190f) + X105 * F!(0.212608f) + X107 * F!(-0.180240f));
1003         S.at(0, 1) = X102;
1004         S.at(0, 2) = D(X101 * F!(-0.074658f) + X103 * F!(0.513280f) + X105 * F!(0.768178f) + X107 * F!(-0.375330f));
1005         S.at(0, 3) = X106;
1006         S.at(1, 0) = D(X111 * F!(0.906127f) + X113 * F!(-0.318190f) + X115 * F!(0.212608f) + X117 * F!(-0.180240f));
1007         S.at(1, 1) = X112;
1008         S.at(1, 2) = D(X111 * F!(-0.074658f) + X113 * F!(0.513280f) + X115 * F!(0.768178f) + X117 * F!(-0.375330f));
1009         S.at(1, 3) = X116;
1010         S.at(2, 0) = D(X121 * F!(0.906127f) + X123 * F!(-0.318190f) + X125 * F!(0.212608f) + X127 * F!(-0.180240f));
1011         S.at(2, 1) = X122;
1012         S.at(2, 2) = D(X121 * F!(-0.074658f) + X123 * F!(0.513280f) + X125 * F!(0.768178f) + X127 * F!(-0.375330f));
1013         S.at(2, 3) = X126;
1014         S.at(3, 0) = D(X131 * F!(0.906127f) + X133 * F!(-0.318190f) + X135 * F!(0.212608f) + X137 * F!(-0.180240f));
1015         S.at(3, 1) = X132;
1016         S.at(3, 2) = D(X131 * F!(-0.074658f) + X133 * F!(0.513280f) + X135 * F!(0.768178f) + X137 * F!(-0.375330f));
1017         S.at(3, 3) = X136;
1018         // 40 muls 24 adds
1019       }
1020     }
1021   } // end namespace DCT_Upsample
1022 
1023   // Unconditionally frees all allocated m_blocks.
1024   void free_all_blocks () {
1025     //m_pStream = null;
1026     readfn = null;
1027     for (mem_block *b = m_pMem_blocks; b; ) {
1028       mem_block* n = b.m_pNext;
1029       jpgd_free(b);
1030       b = n;
1031     }
1032     m_pMem_blocks = null;
1033   }
1034 
1035   // This method handles all errors. It will never return.
1036   // It could easily be changed to use C++ exceptions.
1037   /*JPGD_NORETURN*/ void stop_decoding (jpgd_status status) {
1038     m_error_code = status;
1039     free_all_blocks();
1040     //longjmp(m_jmp_state, status);
1041     assert(false, "jpeg decoding error");
1042   }
1043 
1044   void* alloc (size_t nSize, bool zero=false) {
1045     nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1046     char *rv = null;
1047     for (mem_block *b = m_pMem_blocks; b; b = b.m_pNext)
1048     {
1049       if ((b.m_used_count + nSize) <= b.m_size)
1050       {
1051         rv = b.m_data.ptr + b.m_used_count;
1052         b.m_used_count += nSize;
1053         break;
1054       }
1055     }
1056     if (!rv)
1057     {
1058       int capacity = cast(int) JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1059       mem_block *b = cast(mem_block*)jpgd_malloc(mem_block.sizeof + capacity);
1060       if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
1061       b.m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1062       b.m_used_count = nSize;
1063       b.m_size = capacity;
1064       rv = b.m_data.ptr;
1065     }
1066     if (zero) memset(rv, 0, nSize);
1067     return rv;
1068   }
1069 
1070   void word_clear (void *p, ushort c, uint n) {
1071     ubyte *pD = cast(ubyte*)p;
1072     immutable ubyte l = c & 0xFF, h = (c >> 8) & 0xFF;
1073     while (n)
1074     {
1075       pD[0] = l; pD[1] = h; pD += 2;
1076       n--;
1077     }
1078   }
1079 
1080   // Refill the input buffer.
1081   // This method will sit in a loop until (A) the buffer is full or (B)
1082   // the stream's read() method reports and end of file condition.
1083   void prep_in_buffer () {
1084     m_in_buf_left = 0;
1085     m_pIn_buf_ofs = m_in_buf.ptr;
1086 
1087     if (m_eof_flag)
1088       return;
1089 
1090     do
1091     {
1092       int bytes_read = readfn(m_in_buf.ptr + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
1093       if (bytes_read == -1)
1094         stop_decoding(JPGD_STREAM_READ);
1095 
1096       m_in_buf_left += bytes_read;
1097     } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1098 
1099     m_total_bytes_read += m_in_buf_left;
1100 
1101     // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1102     // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1103     word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1104   }
1105 
1106   // Read a Huffman code table.
1107   void read_dht_marker () {
1108     int i, index, count;
1109     ubyte[17] huff_num;
1110     ubyte[256] huff_val;
1111 
1112     uint num_left = get_bits(16);
1113 
1114     if (num_left < 2)
1115       stop_decoding(JPGD_BAD_DHT_MARKER);
1116 
1117     num_left -= 2;
1118 
1119     while (num_left)
1120     {
1121       index = get_bits(8);
1122 
1123       huff_num.ptr[0] = 0;
1124 
1125       count = 0;
1126 
1127       for (i = 1; i <= 16; i++)
1128       {
1129         huff_num.ptr[i] = cast(ubyte)(get_bits(8));
1130         count += huff_num.ptr[i];
1131       }
1132 
1133       if (count > 255)
1134         stop_decoding(JPGD_BAD_DHT_COUNTS);
1135 
1136       for (i = 0; i < count; i++)
1137         huff_val.ptr[i] = cast(ubyte)(get_bits(8));
1138 
1139       i = 1 + 16 + count;
1140 
1141       if (num_left < cast(uint)i)
1142         stop_decoding(JPGD_BAD_DHT_MARKER);
1143 
1144       num_left -= i;
1145 
1146       if ((index & 0x10) > 0x10)
1147         stop_decoding(JPGD_BAD_DHT_INDEX);
1148 
1149       index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1150 
1151       if (index >= JPGD_MAX_HUFF_TABLES)
1152         stop_decoding(JPGD_BAD_DHT_INDEX);
1153 
1154       if (!m_huff_num.ptr[index])
1155         m_huff_num.ptr[index] = cast(ubyte*)alloc(17);
1156 
1157       if (!m_huff_val.ptr[index])
1158         m_huff_val.ptr[index] = cast(ubyte*)alloc(256);
1159 
1160       m_huff_ac.ptr[index] = (index & 0x10) != 0;
1161       memcpy(m_huff_num.ptr[index], huff_num.ptr, 17);
1162       memcpy(m_huff_val.ptr[index], huff_val.ptr, 256);
1163     }
1164   }
1165 
1166   // Read a quantization table.
1167   void read_dqt_marker () {
1168     int n, i, prec;
1169     uint num_left;
1170     uint temp;
1171 
1172     num_left = get_bits(16);
1173 
1174     if (num_left < 2)
1175       stop_decoding(JPGD_BAD_DQT_MARKER);
1176 
1177     num_left -= 2;
1178 
1179     while (num_left)
1180     {
1181       n = get_bits(8);
1182       prec = n >> 4;
1183       n &= 0x0F;
1184 
1185       if (n >= JPGD_MAX_QUANT_TABLES)
1186         stop_decoding(JPGD_BAD_DQT_TABLE);
1187 
1188       if (!m_quant.ptr[n])
1189         m_quant.ptr[n] = cast(jpgd_quant_t*)alloc(64 * jpgd_quant_t.sizeof);
1190 
1191       // read quantization entries, in zag order
1192       for (i = 0; i < 64; i++)
1193       {
1194         temp = get_bits(8);
1195 
1196         if (prec)
1197           temp = (temp << 8) + get_bits(8);
1198 
1199         m_quant.ptr[n][i] = cast(jpgd_quant_t)(temp);
1200       }
1201 
1202       i = 64 + 1;
1203 
1204       if (prec)
1205         i += 64;
1206 
1207       if (num_left < cast(uint)i)
1208         stop_decoding(JPGD_BAD_DQT_LENGTH);
1209 
1210       num_left -= i;
1211     }
1212   }
1213 
1214   // Read the start of frame (SOF) marker.
1215   void read_sof_marker () {
1216     int i;
1217     uint num_left;
1218 
1219     num_left = get_bits(16);
1220 
1221     if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
1222       stop_decoding(JPGD_BAD_PRECISION);
1223 
1224     m_image_y_size = get_bits(16);
1225 
1226     if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1227       stop_decoding(JPGD_BAD_HEIGHT);
1228 
1229     m_image_x_size = get_bits(16);
1230 
1231     if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1232       stop_decoding(JPGD_BAD_WIDTH);
1233 
1234     m_comps_in_frame = get_bits(8);
1235 
1236     if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1237       stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1238 
1239     if (num_left != cast(uint)(m_comps_in_frame * 3 + 8))
1240       stop_decoding(JPGD_BAD_SOF_LENGTH);
1241 
1242     for (i = 0; i < m_comps_in_frame; i++)
1243     {
1244       m_comp_ident.ptr[i]  = get_bits(8);
1245       m_comp_h_samp.ptr[i] = get_bits(4);
1246       m_comp_v_samp.ptr[i] = get_bits(4);
1247       m_comp_quant.ptr[i]  = get_bits(8);
1248     }
1249   }
1250 
1251   // Used to skip unrecognized markers.
1252   void skip_variable_marker () {
1253     uint num_left;
1254 
1255     num_left = get_bits(16);
1256 
1257     if (num_left < 2)
1258       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1259 
1260     num_left -= 2;
1261 
1262     while (num_left)
1263     {
1264       get_bits(8);
1265       num_left--;
1266     }
1267   }
1268 
1269   // Read a define restart interval (DRI) marker.
1270   void read_dri_marker () {
1271     if (get_bits(16) != 4)
1272       stop_decoding(JPGD_BAD_DRI_LENGTH);
1273 
1274     m_restart_interval = get_bits(16);
1275   }
1276 
1277   // Read a start of scan (SOS) marker.
1278   void read_sos_marker () {
1279     uint num_left;
1280     int i, ci, n, c, cc;
1281 
1282     num_left = get_bits(16);
1283 
1284     n = get_bits(8);
1285 
1286     m_comps_in_scan = n;
1287 
1288     num_left -= 3;
1289 
1290     if ( (num_left != cast(uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1291       stop_decoding(JPGD_BAD_SOS_LENGTH);
1292 
1293     for (i = 0; i < n; i++)
1294     {
1295       cc = get_bits(8);
1296       c = get_bits(8);
1297       num_left -= 2;
1298 
1299       for (ci = 0; ci < m_comps_in_frame; ci++)
1300         if (cc == m_comp_ident.ptr[ci])
1301           break;
1302 
1303       if (ci >= m_comps_in_frame)
1304         stop_decoding(JPGD_BAD_SOS_COMP_ID);
1305 
1306       m_comp_list.ptr[i]    = ci;
1307       m_comp_dc_tab.ptr[ci] = (c >> 4) & 15;
1308       m_comp_ac_tab.ptr[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1309     }
1310 
1311     m_spectral_start  = get_bits(8);
1312     m_spectral_end    = get_bits(8);
1313     m_successive_high = get_bits(4);
1314     m_successive_low  = get_bits(4);
1315 
1316     if (!m_progressive_flag)
1317     {
1318       m_spectral_start = 0;
1319       m_spectral_end = 63;
1320     }
1321 
1322     num_left -= 3;
1323 
1324     /* read past whatever is num_left */
1325     while (num_left)
1326     {
1327       get_bits(8);
1328       num_left--;
1329     }
1330   }
1331 
1332   // Finds the next marker.
1333   int next_marker () {
1334     uint c, bytes;
1335 
1336     bytes = 0;
1337 
1338     do
1339     {
1340       do
1341       {
1342         bytes++;
1343         c = get_bits(8);
1344       } while (c != 0xFF);
1345 
1346       do
1347       {
1348         c = get_bits(8);
1349       } while (c == 0xFF);
1350 
1351     } while (c == 0);
1352 
1353     // If bytes > 0 here, there where extra bytes before the marker (not good).
1354 
1355     return c;
1356   }
1357 
1358   // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1359   // encountered.
1360   int process_markers () {
1361     int c;
1362 
1363     for ( ; ; ) {
1364       c = next_marker();
1365 
1366       switch (c)
1367       {
1368         case M_SOF0:
1369         case M_SOF1:
1370         case M_SOF2:
1371         case M_SOF3:
1372         case M_SOF5:
1373         case M_SOF6:
1374         case M_SOF7:
1375         //case M_JPG:
1376         case M_SOF9:
1377         case M_SOF10:
1378         case M_SOF11:
1379         case M_SOF13:
1380         case M_SOF14:
1381         case M_SOF15:
1382         case M_SOI:
1383         case M_EOI:
1384         case M_SOS:
1385           return c;
1386         case M_DHT:
1387           read_dht_marker();
1388           break;
1389         // No arithmitic support - dumb patents!
1390         case M_DAC:
1391           stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1392           break;
1393         case M_DQT:
1394           read_dqt_marker();
1395           break;
1396         case M_DRI:
1397           read_dri_marker();
1398           break;
1399         //case M_APP0:  /* no need to read the JFIF marker */
1400 
1401         case M_JPG:
1402         case M_RST0:    /* no parameters */
1403         case M_RST1:
1404         case M_RST2:
1405         case M_RST3:
1406         case M_RST4:
1407         case M_RST5:
1408         case M_RST6:
1409         case M_RST7:
1410         case M_TEM:
1411           stop_decoding(JPGD_UNEXPECTED_MARKER);
1412           break;
1413         default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1414           skip_variable_marker();
1415           break;
1416       }
1417     }
1418   }
1419 
1420   // Finds the start of image (SOI) marker.
1421   // This code is rather defensive: it only checks the first 512 bytes to avoid
1422   // false positives.
1423   void locate_soi_marker () {
1424     uint lastchar, thischar;
1425     uint bytesleft;
1426 
1427     lastchar = get_bits(8);
1428 
1429     thischar = get_bits(8);
1430 
1431     /* ok if it's a normal JPEG file without a special header */
1432 
1433     if ((lastchar == 0xFF) && (thischar == M_SOI))
1434       return;
1435 
1436     bytesleft = 4096; //512;
1437 
1438     for ( ; ; )
1439     {
1440       if (--bytesleft == 0)
1441         stop_decoding(JPGD_NOT_JPEG);
1442 
1443       lastchar = thischar;
1444 
1445       thischar = get_bits(8);
1446 
1447       if (lastchar == 0xFF)
1448       {
1449         if (thischar == M_SOI)
1450           break;
1451         else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1452           stop_decoding(JPGD_NOT_JPEG);
1453       }
1454     }
1455 
1456     // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1457     thischar = (m_bit_buf >> 24) & 0xFF;
1458 
1459     if (thischar != 0xFF)
1460       stop_decoding(JPGD_NOT_JPEG);
1461   }
1462 
1463   // Find a start of frame (SOF) marker.
1464   void locate_sof_marker () {
1465     locate_soi_marker();
1466 
1467     int c = process_markers();
1468 
1469     switch (c)
1470     {
1471       case M_SOF2:
1472         m_progressive_flag = true;
1473         goto case;
1474       case M_SOF0:  /* baseline DCT */
1475       case M_SOF1:  /* extended sequential DCT */
1476         read_sof_marker();
1477         break;
1478       case M_SOF9:  /* Arithmitic coding */
1479         stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1480         break;
1481       default:
1482         stop_decoding(JPGD_UNSUPPORTED_MARKER);
1483         break;
1484     }
1485   }
1486 
1487   // Find a start of scan (SOS) marker.
1488   int locate_sos_marker () {
1489     int c;
1490 
1491     c = process_markers();
1492 
1493     if (c == M_EOI)
1494       return false;
1495     else if (c != M_SOS)
1496       stop_decoding(JPGD_UNEXPECTED_MARKER);
1497 
1498     read_sos_marker();
1499 
1500     return true;
1501   }
1502 
1503   // Reset everything to default/uninitialized state.
1504   void initit (JpegStreamReadFunc rfn) {
1505     m_pMem_blocks = null;
1506     m_error_code = JPGD_SUCCESS;
1507     m_ready_flag = false;
1508     m_image_x_size = m_image_y_size = 0;
1509     readfn = rfn;
1510     m_progressive_flag = false;
1511 
1512     memset(m_huff_ac.ptr, 0, m_huff_ac.sizeof);
1513     memset(m_huff_num.ptr, 0, m_huff_num.sizeof);
1514     memset(m_huff_val.ptr, 0, m_huff_val.sizeof);
1515     memset(m_quant.ptr, 0, m_quant.sizeof);
1516 
1517     m_scan_type = 0;
1518     m_comps_in_frame = 0;
1519 
1520     memset(m_comp_h_samp.ptr, 0, m_comp_h_samp.sizeof);
1521     memset(m_comp_v_samp.ptr, 0, m_comp_v_samp.sizeof);
1522     memset(m_comp_quant.ptr, 0, m_comp_quant.sizeof);
1523     memset(m_comp_ident.ptr, 0, m_comp_ident.sizeof);
1524     memset(m_comp_h_blocks.ptr, 0, m_comp_h_blocks.sizeof);
1525     memset(m_comp_v_blocks.ptr, 0, m_comp_v_blocks.sizeof);
1526 
1527     m_comps_in_scan = 0;
1528     memset(m_comp_list.ptr, 0, m_comp_list.sizeof);
1529     memset(m_comp_dc_tab.ptr, 0, m_comp_dc_tab.sizeof);
1530     memset(m_comp_ac_tab.ptr, 0, m_comp_ac_tab.sizeof);
1531 
1532     m_spectral_start = 0;
1533     m_spectral_end = 0;
1534     m_successive_low = 0;
1535     m_successive_high = 0;
1536     m_max_mcu_x_size = 0;
1537     m_max_mcu_y_size = 0;
1538     m_blocks_per_mcu = 0;
1539     m_max_blocks_per_row = 0;
1540     m_mcus_per_row = 0;
1541     m_mcus_per_col = 0;
1542     m_expanded_blocks_per_component = 0;
1543     m_expanded_blocks_per_mcu = 0;
1544     m_expanded_blocks_per_row = 0;
1545     m_freq_domain_chroma_upsample = false;
1546 
1547     memset(m_mcu_org.ptr, 0, m_mcu_org.sizeof);
1548 
1549     m_total_lines_left = 0;
1550     m_mcu_lines_left = 0;
1551     m_real_dest_bytes_per_scan_line = 0;
1552     m_dest_bytes_per_scan_line = 0;
1553     m_dest_bytes_per_pixel = 0;
1554 
1555     memset(m_pHuff_tabs.ptr, 0, m_pHuff_tabs.sizeof);
1556 
1557     memset(m_dc_coeffs.ptr, 0, m_dc_coeffs.sizeof);
1558     memset(m_ac_coeffs.ptr, 0, m_ac_coeffs.sizeof);
1559     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1560 
1561     m_eob_run = 0;
1562 
1563     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1564 
1565     m_pIn_buf_ofs = m_in_buf.ptr;
1566     m_in_buf_left = 0;
1567     m_eof_flag = false;
1568     m_tem_flag = 0;
1569 
1570     memset(m_in_buf_pad_start.ptr, 0, m_in_buf_pad_start.sizeof);
1571     memset(m_in_buf.ptr, 0, m_in_buf.sizeof);
1572     memset(m_in_buf_pad_end.ptr, 0, m_in_buf_pad_end.sizeof);
1573 
1574     m_restart_interval = 0;
1575     m_restarts_left    = 0;
1576     m_next_restart_num = 0;
1577 
1578     m_max_mcus_per_row = 0;
1579     m_max_blocks_per_mcu = 0;
1580     m_max_mcus_per_col = 0;
1581 
1582     memset(m_last_dc_val.ptr, 0, m_last_dc_val.sizeof);
1583     m_pMCU_coefficients = null;
1584     m_pSample_buf = null;
1585 
1586     m_total_bytes_read = 0;
1587 
1588     m_pScan_line_0 = null;
1589     m_pScan_line_1 = null;
1590 
1591     // Ready the input buffer.
1592     prep_in_buffer();
1593 
1594     // Prime the bit buffer.
1595     m_bits_left = 16;
1596     m_bit_buf = 0;
1597 
1598     get_bits(16);
1599     get_bits(16);
1600 
1601     for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1602       m_mcu_block_max_zag.ptr[i] = 64;
1603   }
1604 
1605   enum SCALEBITS = 16;
1606   enum ONE_HALF = (cast(int) 1 << (SCALEBITS-1));
1607   enum FIX(float x) = (cast(int)((x) * (1L<<SCALEBITS) + 0.5f));
1608 
1609   // Create a few tables that allow us to quickly convert YCbCr to RGB.
1610   void create_look_ups () {
1611     for (int i = 0; i <= 255; i++)
1612     {
1613       int k = i - 128;
1614       m_crr.ptr[i] = ( FIX!(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
1615       m_cbb.ptr[i] = ( FIX!(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
1616       m_crg.ptr[i] = (-FIX!(0.71414f)) * k;
1617       m_cbg.ptr[i] = (-FIX!(0.34414f)) * k + ONE_HALF;
1618     }
1619   }
1620 
1621   // This method throws back into the stream any bytes that where read
1622   // into the bit buffer during initial marker scanning.
1623   void fix_in_buffer () {
1624     // In case any 0xFF's where pulled into the buffer during marker scanning.
1625     assert((m_bits_left & 7) == 0);
1626 
1627     if (m_bits_left == 16)
1628       stuff_char(cast(ubyte)(m_bit_buf & 0xFF));
1629 
1630     if (m_bits_left >= 8)
1631       stuff_char(cast(ubyte)((m_bit_buf >> 8) & 0xFF));
1632 
1633     stuff_char(cast(ubyte)((m_bit_buf >> 16) & 0xFF));
1634     stuff_char(cast(ubyte)((m_bit_buf >> 24) & 0xFF));
1635 
1636     m_bits_left = 16;
1637     get_bits_no_markers(16);
1638     get_bits_no_markers(16);
1639   }
1640 
1641   void transform_mcu (int mcu_row) {
1642     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1643     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1644 
1645     for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1646     {
1647       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1648       pSrc_ptr += 64;
1649       pDst_ptr += 64;
1650     }
1651   }
1652 
1653   static immutable ubyte[64] s_max_rc = [
1654     17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1655     102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1656     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1657     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1658   ];
1659 
1660   void transform_mcu_expand (int mcu_row) {
1661     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1662     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1663 
1664     // Y IDCT
1665     int mcu_block;
1666     for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1667     {
1668       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1669       pSrc_ptr += 64;
1670       pDst_ptr += 64;
1671     }
1672 
1673     // Chroma IDCT, with upsampling
1674     jpgd_block_t[64] temp_block;
1675 
1676     for (int i = 0; i < 2; i++)
1677     {
1678       DCT_Upsample.Matrix44 P, Q, R, S;
1679 
1680       assert(m_mcu_block_max_zag.ptr[mcu_block] >= 1);
1681       assert(m_mcu_block_max_zag.ptr[mcu_block] <= 64);
1682 
1683       int max_zag = m_mcu_block_max_zag.ptr[mcu_block++] - 1;
1684       if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1685       switch (s_max_rc.ptr[max_zag])
1686       {
1687       case 1*16+1:
1688         DCT_Upsample.P_Q!(1, 1).calc(P, Q, pSrc_ptr);
1689         DCT_Upsample.R_S!(1, 1).calc(R, S, pSrc_ptr);
1690         break;
1691       case 1*16+2:
1692         DCT_Upsample.P_Q!(1, 2).calc(P, Q, pSrc_ptr);
1693         DCT_Upsample.R_S!(1, 2).calc(R, S, pSrc_ptr);
1694         break;
1695       case 2*16+2:
1696         DCT_Upsample.P_Q!(2, 2).calc(P, Q, pSrc_ptr);
1697         DCT_Upsample.R_S!(2, 2).calc(R, S, pSrc_ptr);
1698         break;
1699       case 3*16+2:
1700         DCT_Upsample.P_Q!(3, 2).calc(P, Q, pSrc_ptr);
1701         DCT_Upsample.R_S!(3, 2).calc(R, S, pSrc_ptr);
1702         break;
1703       case 3*16+3:
1704         DCT_Upsample.P_Q!(3, 3).calc(P, Q, pSrc_ptr);
1705         DCT_Upsample.R_S!(3, 3).calc(R, S, pSrc_ptr);
1706         break;
1707       case 3*16+4:
1708         DCT_Upsample.P_Q!(3, 4).calc(P, Q, pSrc_ptr);
1709         DCT_Upsample.R_S!(3, 4).calc(R, S, pSrc_ptr);
1710         break;
1711       case 4*16+4:
1712         DCT_Upsample.P_Q!(4, 4).calc(P, Q, pSrc_ptr);
1713         DCT_Upsample.R_S!(4, 4).calc(R, S, pSrc_ptr);
1714         break;
1715       case 5*16+4:
1716         DCT_Upsample.P_Q!(5, 4).calc(P, Q, pSrc_ptr);
1717         DCT_Upsample.R_S!(5, 4).calc(R, S, pSrc_ptr);
1718         break;
1719       case 5*16+5:
1720         DCT_Upsample.P_Q!(5, 5).calc(P, Q, pSrc_ptr);
1721         DCT_Upsample.R_S!(5, 5).calc(R, S, pSrc_ptr);
1722         break;
1723       case 5*16+6:
1724         DCT_Upsample.P_Q!(5, 6).calc(P, Q, pSrc_ptr);
1725         DCT_Upsample.R_S!(5, 6).calc(R, S, pSrc_ptr);
1726         break;
1727       case 6*16+6:
1728         DCT_Upsample.P_Q!(6, 6).calc(P, Q, pSrc_ptr);
1729         DCT_Upsample.R_S!(6, 6).calc(R, S, pSrc_ptr);
1730         break;
1731       case 7*16+6:
1732         DCT_Upsample.P_Q!(7, 6).calc(P, Q, pSrc_ptr);
1733         DCT_Upsample.R_S!(7, 6).calc(R, S, pSrc_ptr);
1734         break;
1735       case 7*16+7:
1736         DCT_Upsample.P_Q!(7, 7).calc(P, Q, pSrc_ptr);
1737         DCT_Upsample.R_S!(7, 7).calc(R, S, pSrc_ptr);
1738         break;
1739       case 7*16+8:
1740         DCT_Upsample.P_Q!(7, 8).calc(P, Q, pSrc_ptr);
1741         DCT_Upsample.R_S!(7, 8).calc(R, S, pSrc_ptr);
1742         break;
1743       case 8*16+8:
1744         DCT_Upsample.P_Q!(8, 8).calc(P, Q, pSrc_ptr);
1745         DCT_Upsample.R_S!(8, 8).calc(R, S, pSrc_ptr);
1746         break;
1747       default:
1748         assert(false);
1749       }
1750 
1751       auto a = DCT_Upsample.Matrix44(P + Q);
1752       P -= Q;
1753       DCT_Upsample.Matrix44* b = &P;
1754       auto c = DCT_Upsample.Matrix44(R + S);
1755       R -= S;
1756       DCT_Upsample.Matrix44* d = &R;
1757 
1758       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, a, c);
1759       idct_4x4(temp_block.ptr, pDst_ptr);
1760       pDst_ptr += 64;
1761 
1762       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, a, c);
1763       idct_4x4(temp_block.ptr, pDst_ptr);
1764       pDst_ptr += 64;
1765 
1766       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, *b, *d);
1767       idct_4x4(temp_block.ptr, pDst_ptr);
1768       pDst_ptr += 64;
1769 
1770       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, *b, *d);
1771       idct_4x4(temp_block.ptr, pDst_ptr);
1772       pDst_ptr += 64;
1773 
1774       pSrc_ptr += 64;
1775     }
1776   }
1777 
1778   // Loads and dequantizes the next row of (already decoded) coefficients.
1779   // Progressive images only.
1780   void load_next_row () {
1781     int i;
1782     jpgd_block_t *p;
1783     jpgd_quant_t *q;
1784     int mcu_row, mcu_block, row_block = 0;
1785     int component_num, component_id;
1786     int[JPGD_MAX_COMPONENTS] block_x_mcu;
1787 
1788     memset(block_x_mcu.ptr, 0, JPGD_MAX_COMPONENTS * int.sizeof);
1789 
1790     for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1791     {
1792       int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1793 
1794       for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1795       {
1796         component_id = m_mcu_org.ptr[mcu_block];
1797         q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1798 
1799         p = m_pMCU_coefficients + 64 * mcu_block;
1800 
1801         jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1802         jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1803         p[0] = pDC[0];
1804         memcpy(&p[1], &pAC[1], 63 * jpgd_block_t.sizeof);
1805 
1806         for (i = 63; i > 0; i--)
1807           if (p[g_ZAG[i]])
1808             break;
1809 
1810         m_mcu_block_max_zag.ptr[mcu_block] = i + 1;
1811 
1812         for ( ; i >= 0; i--)
1813           if (p[g_ZAG[i]])
1814             p[g_ZAG[i]] = cast(jpgd_block_t)(p[g_ZAG[i]] * q[i]);
1815 
1816         row_block++;
1817 
1818         if (m_comps_in_scan == 1)
1819           block_x_mcu.ptr[component_id]++;
1820         else
1821         {
1822           if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
1823           {
1824             block_x_mcu_ofs = 0;
1825 
1826             if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
1827             {
1828               block_y_mcu_ofs = 0;
1829 
1830               block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
1831             }
1832           }
1833         }
1834       }
1835 
1836       if (m_freq_domain_chroma_upsample)
1837         transform_mcu_expand(mcu_row);
1838       else
1839         transform_mcu(mcu_row);
1840     }
1841 
1842     if (m_comps_in_scan == 1)
1843       m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
1844     else
1845     {
1846       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
1847       {
1848         component_id = m_comp_list.ptr[component_num];
1849 
1850         m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
1851       }
1852     }
1853   }
1854 
1855   // Restart interval processing.
1856   void process_restart () {
1857     int i;
1858     int c = 0;
1859 
1860     // Align to a byte boundry
1861     // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1862     //get_bits_no_markers(m_bits_left & 7);
1863 
1864     // Let's scan a little bit to find the marker, but not _too_ far.
1865     // 1536 is a "fudge factor" that determines how much to scan.
1866     for (i = 1536; i > 0; i--)
1867       if (get_char() == 0xFF)
1868         break;
1869 
1870     if (i == 0)
1871       stop_decoding(JPGD_BAD_RESTART_MARKER);
1872 
1873     for ( ; i > 0; i--)
1874       if ((c = get_char()) != 0xFF)
1875         break;
1876 
1877     if (i == 0)
1878       stop_decoding(JPGD_BAD_RESTART_MARKER);
1879 
1880     // Is it the expected marker? If not, something bad happened.
1881     if (c != (m_next_restart_num + M_RST0))
1882       stop_decoding(JPGD_BAD_RESTART_MARKER);
1883 
1884     // Reset each component's DC prediction values.
1885     memset(&m_last_dc_val, 0, m_comps_in_frame * uint.sizeof);
1886 
1887     m_eob_run = 0;
1888 
1889     m_restarts_left = m_restart_interval;
1890 
1891     m_next_restart_num = (m_next_restart_num + 1) & 7;
1892 
1893     // Get the bit buffer going again...
1894 
1895     m_bits_left = 16;
1896     get_bits_no_markers(16);
1897     get_bits_no_markers(16);
1898   }
1899 
1900   static int dequantize_ac (int c, int q) { pragma(inline, true); c *= q; return c; }
1901 
1902   // Decodes and dequantizes the next row of coefficients.
1903   void decode_next_row () {
1904     int row_block = 0;
1905 
1906     for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1907     {
1908       if ((m_restart_interval) && (m_restarts_left == 0))
1909         process_restart();
1910 
1911       jpgd_block_t* p = m_pMCU_coefficients;
1912       for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
1913       {
1914         int component_id = m_mcu_org.ptr[mcu_block];
1915         jpgd_quant_t* q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1916 
1917         int r, s;
1918         s = huff_decode(m_pHuff_tabs.ptr[m_comp_dc_tab.ptr[component_id]], r);
1919         s = JPGD_HUFF_EXTEND(r, s);
1920 
1921         m_last_dc_val.ptr[component_id] = (s += m_last_dc_val.ptr[component_id]);
1922 
1923         p[0] = cast(jpgd_block_t)(s * q[0]);
1924 
1925         int prev_num_set = m_mcu_block_max_zag.ptr[mcu_block];
1926 
1927         huff_tables *pH = m_pHuff_tabs.ptr[m_comp_ac_tab.ptr[component_id]];
1928 
1929         int k;
1930         for (k = 1; k < 64; k++)
1931         {
1932           int extra_bits;
1933           s = huff_decode(pH, extra_bits);
1934 
1935           r = s >> 4;
1936           s &= 15;
1937 
1938           if (s)
1939           {
1940             if (r)
1941             {
1942               if ((k + r) > 63)
1943                 stop_decoding(JPGD_DECODE_ERROR);
1944 
1945               if (k < prev_num_set)
1946               {
1947                 int n = JPGD_MIN(r, prev_num_set - k);
1948                 int kt = k;
1949                 while (n--)
1950                   p[g_ZAG[kt++]] = 0;
1951               }
1952 
1953               k += r;
1954             }
1955 
1956             s = JPGD_HUFF_EXTEND(extra_bits, s);
1957 
1958             assert(k < 64);
1959 
1960             p[g_ZAG[k]] = cast(jpgd_block_t)(dequantize_ac(s, q[k])); //s * q[k];
1961           }
1962           else
1963           {
1964             if (r == 15)
1965             {
1966               if ((k + 16) > 64)
1967                 stop_decoding(JPGD_DECODE_ERROR);
1968 
1969               if (k < prev_num_set)
1970               {
1971                 int n = JPGD_MIN(16, prev_num_set - k);
1972                 int kt = k;
1973                 while (n--)
1974                 {
1975                   assert(kt <= 63);
1976                   p[g_ZAG[kt++]] = 0;
1977                 }
1978               }
1979 
1980               k += 16 - 1; // - 1 because the loop counter is k
1981               assert(p[g_ZAG[k]] == 0);
1982             }
1983             else
1984               break;
1985           }
1986         }
1987 
1988         if (k < prev_num_set)
1989         {
1990           int kt = k;
1991           while (kt < prev_num_set)
1992             p[g_ZAG[kt++]] = 0;
1993         }
1994 
1995         m_mcu_block_max_zag.ptr[mcu_block] = k;
1996 
1997         row_block++;
1998       }
1999 
2000       if (m_freq_domain_chroma_upsample)
2001         transform_mcu_expand(mcu_row);
2002       else
2003         transform_mcu(mcu_row);
2004 
2005       m_restarts_left--;
2006     }
2007   }
2008 
2009   // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
2010   void H1V1Convert () {
2011     int row = m_max_mcu_y_size - m_mcu_lines_left;
2012     ubyte *d = m_pScan_line_0;
2013     ubyte *s = m_pSample_buf + row * 8;
2014 
2015     for (int i = m_max_mcus_per_row; i > 0; i--)
2016     {
2017       for (int j = 0; j < 8; j++)
2018       {
2019         int y = s[j];
2020         int cb = s[64+j];
2021         int cr = s[128+j];
2022 
2023         d[0] = clamp(y + m_crr.ptr[cr]);
2024         d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2025         d[2] = clamp(y + m_cbb.ptr[cb]);
2026         d[3] = 255;
2027 
2028         d += 4;
2029       }
2030 
2031       s += 64*3;
2032     }
2033   }
2034 
2035   // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
2036   void H2V1Convert () {
2037     int row = m_max_mcu_y_size - m_mcu_lines_left;
2038     ubyte *d0 = m_pScan_line_0;
2039     ubyte *y = m_pSample_buf + row * 8;
2040     ubyte *c = m_pSample_buf + 2*64 + row * 8;
2041 
2042     for (int i = m_max_mcus_per_row; i > 0; i--)
2043     {
2044       for (int l = 0; l < 2; l++)
2045       {
2046         for (int j = 0; j < 4; j++)
2047         {
2048           int cb = c[0];
2049           int cr = c[64];
2050 
2051           int rc = m_crr.ptr[cr];
2052           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2053           int bc = m_cbb.ptr[cb];
2054 
2055           int yy = y[j<<1];
2056           d0[0] = clamp(yy+rc);
2057           d0[1] = clamp(yy+gc);
2058           d0[2] = clamp(yy+bc);
2059           d0[3] = 255;
2060 
2061           yy = y[(j<<1)+1];
2062           d0[4] = clamp(yy+rc);
2063           d0[5] = clamp(yy+gc);
2064           d0[6] = clamp(yy+bc);
2065           d0[7] = 255;
2066 
2067           d0 += 8;
2068 
2069           c++;
2070         }
2071         y += 64;
2072       }
2073 
2074       y += 64*4 - 64*2;
2075       c += 64*4 - 8;
2076     }
2077   }
2078 
2079   // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2080   void H1V2Convert () {
2081     int row = m_max_mcu_y_size - m_mcu_lines_left;
2082     ubyte *d0 = m_pScan_line_0;
2083     ubyte *d1 = m_pScan_line_1;
2084     ubyte *y;
2085     ubyte *c;
2086 
2087     if (row < 8)
2088       y = m_pSample_buf + row * 8;
2089     else
2090       y = m_pSample_buf + 64*1 + (row & 7) * 8;
2091 
2092     c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2093 
2094     for (int i = m_max_mcus_per_row; i > 0; i--)
2095     {
2096       for (int j = 0; j < 8; j++)
2097       {
2098         int cb = c[0+j];
2099         int cr = c[64+j];
2100 
2101         int rc = m_crr.ptr[cr];
2102         int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2103         int bc = m_cbb.ptr[cb];
2104 
2105         int yy = y[j];
2106         d0[0] = clamp(yy+rc);
2107         d0[1] = clamp(yy+gc);
2108         d0[2] = clamp(yy+bc);
2109         d0[3] = 255;
2110 
2111         yy = y[8+j];
2112         d1[0] = clamp(yy+rc);
2113         d1[1] = clamp(yy+gc);
2114         d1[2] = clamp(yy+bc);
2115         d1[3] = 255;
2116 
2117         d0 += 4;
2118         d1 += 4;
2119       }
2120 
2121       y += 64*4;
2122       c += 64*4;
2123     }
2124   }
2125 
2126   // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2127   void H2V2Convert () {
2128     int row = m_max_mcu_y_size - m_mcu_lines_left;
2129     ubyte *d0 = m_pScan_line_0;
2130     ubyte *d1 = m_pScan_line_1;
2131     ubyte *y;
2132     ubyte *c;
2133 
2134     if (row < 8)
2135       y = m_pSample_buf + row * 8;
2136     else
2137       y = m_pSample_buf + 64*2 + (row & 7) * 8;
2138 
2139     c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2140 
2141     for (int i = m_max_mcus_per_row; i > 0; i--)
2142     {
2143       for (int l = 0; l < 2; l++)
2144       {
2145         for (int j = 0; j < 8; j += 2)
2146         {
2147           int cb = c[0];
2148           int cr = c[64];
2149 
2150           int rc = m_crr.ptr[cr];
2151           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2152           int bc = m_cbb.ptr[cb];
2153 
2154           int yy = y[j];
2155           d0[0] = clamp(yy+rc);
2156           d0[1] = clamp(yy+gc);
2157           d0[2] = clamp(yy+bc);
2158           d0[3] = 255;
2159 
2160           yy = y[j+1];
2161           d0[4] = clamp(yy+rc);
2162           d0[5] = clamp(yy+gc);
2163           d0[6] = clamp(yy+bc);
2164           d0[7] = 255;
2165 
2166           yy = y[j+8];
2167           d1[0] = clamp(yy+rc);
2168           d1[1] = clamp(yy+gc);
2169           d1[2] = clamp(yy+bc);
2170           d1[3] = 255;
2171 
2172           yy = y[j+8+1];
2173           d1[4] = clamp(yy+rc);
2174           d1[5] = clamp(yy+gc);
2175           d1[6] = clamp(yy+bc);
2176           d1[7] = 255;
2177 
2178           d0 += 8;
2179           d1 += 8;
2180 
2181           c++;
2182         }
2183         y += 64;
2184       }
2185 
2186       y += 64*6 - 64*2;
2187       c += 64*6 - 8;
2188     }
2189   }
2190 
2191   // Y (1 block per MCU) to 8-bit grayscale
2192   void gray_convert () {
2193     int row = m_max_mcu_y_size - m_mcu_lines_left;
2194     ubyte *d = m_pScan_line_0;
2195     ubyte *s = m_pSample_buf + row * 8;
2196 
2197     for (int i = m_max_mcus_per_row; i > 0; i--)
2198     {
2199       *cast(uint*)d = *cast(uint*)s;
2200       *cast(uint*)(&d[4]) = *cast(uint*)(&s[4]);
2201 
2202       s += 64;
2203       d += 8;
2204     }
2205   }
2206 
2207   void expanded_convert () {
2208     int row = m_max_mcu_y_size - m_mcu_lines_left;
2209 
2210     ubyte* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp.ptr[0] + (row & 7) * 8;
2211 
2212     ubyte* d = m_pScan_line_0;
2213 
2214     for (int i = m_max_mcus_per_row; i > 0; i--)
2215     {
2216       for (int k = 0; k < m_max_mcu_x_size; k += 8)
2217       {
2218         immutable int Y_ofs = k * 8;
2219         immutable int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2220         immutable int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2221         for (int j = 0; j < 8; j++)
2222         {
2223           int y = Py[Y_ofs + j];
2224           int cb = Py[Cb_ofs + j];
2225           int cr = Py[Cr_ofs + j];
2226 
2227           d[0] = clamp(y + m_crr.ptr[cr]);
2228           d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2229           d[2] = clamp(y + m_cbb.ptr[cb]);
2230           d[3] = 255;
2231 
2232           d += 4;
2233         }
2234       }
2235 
2236       Py += 64 * m_expanded_blocks_per_mcu;
2237     }
2238   }
2239 
2240   // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2241   void find_eoi () {
2242     if (!m_progressive_flag)
2243     {
2244       // Attempt to read the EOI marker.
2245       //get_bits_no_markers(m_bits_left & 7);
2246 
2247       // Prime the bit buffer
2248       m_bits_left = 16;
2249       get_bits(16);
2250       get_bits(16);
2251 
2252       // The next marker _should_ be EOI
2253       process_markers();
2254     }
2255 
2256     m_total_bytes_read -= m_in_buf_left;
2257   }
2258 
2259   // Creates the tables needed for efficient Huffman decoding.
2260   void make_huff_table (int index, huff_tables *pH) {
2261     int p, i, l, si;
2262     ubyte[257] huffsize;
2263     uint[257] huffcode;
2264     uint code;
2265     uint subtree;
2266     int code_size;
2267     int lastp;
2268     int nextfreeentry;
2269     int currententry;
2270 
2271     pH.ac_table = m_huff_ac.ptr[index] != 0;
2272 
2273     p = 0;
2274 
2275     for (l = 1; l <= 16; l++)
2276     {
2277       for (i = 1; i <= m_huff_num.ptr[index][l]; i++)
2278         huffsize.ptr[p++] = cast(ubyte)(l);
2279     }
2280 
2281     huffsize.ptr[p] = 0;
2282 
2283     lastp = p;
2284 
2285     code = 0;
2286     si = huffsize.ptr[0];
2287     p = 0;
2288 
2289     while (huffsize.ptr[p])
2290     {
2291       while (huffsize.ptr[p] == si)
2292       {
2293         huffcode.ptr[p++] = code;
2294         code++;
2295       }
2296 
2297       code <<= 1;
2298       si++;
2299     }
2300 
2301     memset(pH.look_up.ptr, 0, pH.look_up.sizeof);
2302     memset(pH.look_up2.ptr, 0, pH.look_up2.sizeof);
2303     memset(pH.tree.ptr, 0, pH.tree.sizeof);
2304     memset(pH.code_size.ptr, 0, pH.code_size.sizeof);
2305 
2306     nextfreeentry = -1;
2307 
2308     p = 0;
2309 
2310     while (p < lastp)
2311     {
2312       i = m_huff_val.ptr[index][p];
2313       code = huffcode.ptr[p];
2314       code_size = huffsize.ptr[p];
2315 
2316       pH.code_size.ptr[i] = cast(ubyte)(code_size);
2317 
2318       if (code_size <= 8)
2319       {
2320         code <<= (8 - code_size);
2321 
2322         for (l = 1 << (8 - code_size); l > 0; l--)
2323         {
2324           assert(i < 256);
2325 
2326           pH.look_up.ptr[code] = i;
2327 
2328           bool has_extrabits = false;
2329           int extra_bits = 0;
2330           int num_extra_bits = i & 15;
2331 
2332           int bits_to_fetch = code_size;
2333           if (num_extra_bits)
2334           {
2335             int total_codesize = code_size + num_extra_bits;
2336             if (total_codesize <= 8)
2337             {
2338               has_extrabits = true;
2339               extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2340               assert(extra_bits <= 0x7FFF);
2341               bits_to_fetch += num_extra_bits;
2342             }
2343           }
2344 
2345           if (!has_extrabits)
2346             pH.look_up2.ptr[code] = i | (bits_to_fetch << 8);
2347           else
2348             pH.look_up2.ptr[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2349 
2350           code++;
2351         }
2352       }
2353       else
2354       {
2355         subtree = (code >> (code_size - 8)) & 0xFF;
2356 
2357         currententry = pH.look_up.ptr[subtree];
2358 
2359         if (currententry == 0)
2360         {
2361           pH.look_up.ptr[subtree] = currententry = nextfreeentry;
2362           pH.look_up2.ptr[subtree] = currententry = nextfreeentry;
2363 
2364           nextfreeentry -= 2;
2365         }
2366 
2367         code <<= (16 - (code_size - 8));
2368 
2369         for (l = code_size; l > 9; l--)
2370         {
2371           if ((code & 0x8000) == 0)
2372             currententry--;
2373 
2374           if (pH.tree.ptr[-currententry - 1] == 0)
2375           {
2376             pH.tree.ptr[-currententry - 1] = nextfreeentry;
2377 
2378             currententry = nextfreeentry;
2379 
2380             nextfreeentry -= 2;
2381           }
2382           else
2383             currententry = pH.tree.ptr[-currententry - 1];
2384 
2385           code <<= 1;
2386         }
2387 
2388         if ((code & 0x8000) == 0)
2389           currententry--;
2390 
2391         pH.tree.ptr[-currententry - 1] = i;
2392       }
2393 
2394       p++;
2395     }
2396   }
2397 
2398   // Verifies the quantization tables needed for this scan are available.
2399   void check_quant_tables () {
2400     for (int i = 0; i < m_comps_in_scan; i++)
2401       if (m_quant.ptr[m_comp_quant.ptr[m_comp_list.ptr[i]]] == null)
2402         stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2403   }
2404 
2405   // Verifies that all the Huffman tables needed for this scan are available.
2406   void check_huff_tables () {
2407     for (int i = 0; i < m_comps_in_scan; i++)
2408     {
2409       if ((m_spectral_start == 0) && (m_huff_num.ptr[m_comp_dc_tab.ptr[m_comp_list.ptr[i]]] == null))
2410         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2411 
2412       if ((m_spectral_end > 0) && (m_huff_num.ptr[m_comp_ac_tab.ptr[m_comp_list.ptr[i]]] == null))
2413         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2414     }
2415 
2416     for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2417       if (m_huff_num.ptr[i])
2418       {
2419         if (!m_pHuff_tabs.ptr[i])
2420           m_pHuff_tabs.ptr[i] = cast(huff_tables*)alloc(huff_tables.sizeof);
2421 
2422         make_huff_table(i, m_pHuff_tabs.ptr[i]);
2423       }
2424   }
2425 
2426   // Determines the component order inside each MCU.
2427   // Also calcs how many MCU's are on each row, etc.
2428   void calc_mcu_block_order () {
2429     int component_num, component_id;
2430     int max_h_samp = 0, max_v_samp = 0;
2431 
2432     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2433     {
2434       if (m_comp_h_samp.ptr[component_id] > max_h_samp)
2435         max_h_samp = m_comp_h_samp.ptr[component_id];
2436 
2437       if (m_comp_v_samp.ptr[component_id] > max_v_samp)
2438         max_v_samp = m_comp_v_samp.ptr[component_id];
2439     }
2440 
2441     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2442     {
2443       m_comp_h_blocks.ptr[component_id] = ((((m_image_x_size * m_comp_h_samp.ptr[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2444       m_comp_v_blocks.ptr[component_id] = ((((m_image_y_size * m_comp_v_samp.ptr[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2445     }
2446 
2447     if (m_comps_in_scan == 1)
2448     {
2449       m_mcus_per_row = m_comp_h_blocks.ptr[m_comp_list.ptr[0]];
2450       m_mcus_per_col = m_comp_v_blocks.ptr[m_comp_list.ptr[0]];
2451     }
2452     else
2453     {
2454       m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2455       m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2456     }
2457 
2458     if (m_comps_in_scan == 1)
2459     {
2460       m_mcu_org.ptr[0] = m_comp_list.ptr[0];
2461 
2462       m_blocks_per_mcu = 1;
2463     }
2464     else
2465     {
2466       m_blocks_per_mcu = 0;
2467 
2468       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2469       {
2470         int num_blocks;
2471 
2472         component_id = m_comp_list.ptr[component_num];
2473 
2474         num_blocks = m_comp_h_samp.ptr[component_id] * m_comp_v_samp.ptr[component_id];
2475 
2476         while (num_blocks--)
2477           m_mcu_org.ptr[m_blocks_per_mcu++] = component_id;
2478       }
2479     }
2480   }
2481 
2482   // Starts a new scan.
2483   int init_scan () {
2484     if (!locate_sos_marker())
2485       return false;
2486 
2487     calc_mcu_block_order();
2488 
2489     check_huff_tables();
2490 
2491     check_quant_tables();
2492 
2493     memset(m_last_dc_val.ptr, 0, m_comps_in_frame * uint.sizeof);
2494 
2495     m_eob_run = 0;
2496 
2497     if (m_restart_interval)
2498     {
2499       m_restarts_left = m_restart_interval;
2500       m_next_restart_num = 0;
2501     }
2502 
2503     fix_in_buffer();
2504 
2505     return true;
2506   }
2507 
2508   // Starts a frame. Determines if the number of components or sampling factors
2509   // are supported.
2510   void init_frame () {
2511     int i;
2512 
2513     if (m_comps_in_frame == 1)
2514     {
2515       if ((m_comp_h_samp.ptr[0] != 1) || (m_comp_v_samp.ptr[0] != 1))
2516         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2517 
2518       m_scan_type = JPGD_GRAYSCALE;
2519       m_max_blocks_per_mcu = 1;
2520       m_max_mcu_x_size = 8;
2521       m_max_mcu_y_size = 8;
2522     }
2523     else if (m_comps_in_frame == 3)
2524     {
2525       if ( ((m_comp_h_samp.ptr[1] != 1) || (m_comp_v_samp.ptr[1] != 1)) ||
2526            ((m_comp_h_samp.ptr[2] != 1) || (m_comp_v_samp.ptr[2] != 1)) )
2527         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2528 
2529       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2530       {
2531         m_scan_type = JPGD_YH1V1;
2532 
2533         m_max_blocks_per_mcu = 3;
2534         m_max_mcu_x_size = 8;
2535         m_max_mcu_y_size = 8;
2536       }
2537       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2538       {
2539         m_scan_type = JPGD_YH2V1;
2540         m_max_blocks_per_mcu = 4;
2541         m_max_mcu_x_size = 16;
2542         m_max_mcu_y_size = 8;
2543       }
2544       else if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 2))
2545       {
2546         m_scan_type = JPGD_YH1V2;
2547         m_max_blocks_per_mcu = 4;
2548         m_max_mcu_x_size = 8;
2549         m_max_mcu_y_size = 16;
2550       }
2551       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2552       {
2553         m_scan_type = JPGD_YH2V2;
2554         m_max_blocks_per_mcu = 6;
2555         m_max_mcu_x_size = 16;
2556         m_max_mcu_y_size = 16;
2557       }
2558       else
2559         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2560     }
2561     else
2562       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2563 
2564     m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2565     m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2566 
2567     // These values are for the *destination* pixels: after conversion.
2568     if (m_scan_type == JPGD_GRAYSCALE)
2569       m_dest_bytes_per_pixel = 1;
2570     else
2571       m_dest_bytes_per_pixel = 4;
2572 
2573     m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2574 
2575     m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2576 
2577     // Initialize two scan line buffers.
2578     m_pScan_line_0 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2579     if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2580       m_pScan_line_1 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2581 
2582     m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2583 
2584     // Should never happen
2585     if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2586       stop_decoding(JPGD_ASSERTION_ERROR);
2587 
2588     // Allocate the coefficient buffer, enough for one MCU
2589     m_pMCU_coefficients = cast(jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * jpgd_block_t.sizeof);
2590 
2591     for (i = 0; i < m_max_blocks_per_mcu; i++)
2592       m_mcu_block_max_zag.ptr[i] = 64;
2593 
2594     m_expanded_blocks_per_component = m_comp_h_samp.ptr[0] * m_comp_v_samp.ptr[0];
2595     m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2596     m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2597     // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2598     m_freq_domain_chroma_upsample = false;
2599     version(JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING) {
2600       m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2601     }
2602 
2603     if (m_freq_domain_chroma_upsample)
2604       m_pSample_buf = cast(ubyte*)alloc(m_expanded_blocks_per_row * 64);
2605     else
2606       m_pSample_buf = cast(ubyte*)alloc(m_max_blocks_per_row * 64);
2607 
2608     m_total_lines_left = m_image_y_size;
2609 
2610     m_mcu_lines_left = 0;
2611 
2612     create_look_ups();
2613   }
2614 
2615   // The coeff_buf series of methods originally stored the coefficients
2616   // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2617   // was used to make this process more efficient. Now, we can store the entire
2618   // thing in RAM.
2619   coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) {
2620     coeff_buf* cb = cast(coeff_buf*)alloc(coeff_buf.sizeof);
2621 
2622     cb.block_num_x = block_num_x;
2623     cb.block_num_y = block_num_y;
2624     cb.block_len_x = block_len_x;
2625     cb.block_len_y = block_len_y;
2626     cb.block_size = (block_len_x * block_len_y) * cast(int)(jpgd_block_t.sizeof);
2627     cb.pData = cast(ubyte*)alloc(cb.block_size * block_num_x * block_num_y, true);
2628     return cb;
2629   }
2630 
2631   jpgd_block_t* coeff_buf_getp (coeff_buf *cb, int block_x, int block_y) {
2632     assert((block_x < cb.block_num_x) && (block_y < cb.block_num_y));
2633     return cast(jpgd_block_t*)(cb.pData + block_x * cb.block_size + block_y * (cb.block_size * cb.block_num_x));
2634   }
2635 
2636   // The following methods decode the various types of m_blocks encountered
2637   // in progressively encoded images.
2638   static void decode_block_dc_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2639     int s, r;
2640     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2641 
2642     if ((s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_dc_tab.ptr[component_id]])) != 0)
2643     {
2644       r = pD.get_bits_no_markers(s);
2645       s = JPGD_HUFF_EXTEND(r, s);
2646     }
2647 
2648     pD.m_last_dc_val.ptr[component_id] = (s += pD.m_last_dc_val.ptr[component_id]);
2649 
2650     p[0] = cast(jpgd_block_t)(s << pD.m_successive_low);
2651   }
2652 
2653   static void decode_block_dc_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2654     if (pD.get_bits_no_markers(1))
2655     {
2656       jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2657 
2658       p[0] |= (1 << pD.m_successive_low);
2659     }
2660   }
2661 
2662   static void decode_block_ac_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2663     int k, s, r;
2664 
2665     if (pD.m_eob_run)
2666     {
2667       pD.m_eob_run--;
2668       return;
2669     }
2670 
2671     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2672 
2673     for (k = pD.m_spectral_start; k <= pD.m_spectral_end; k++)
2674     {
2675       s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2676 
2677       r = s >> 4;
2678       s &= 15;
2679 
2680       if (s)
2681       {
2682         if ((k += r) > 63)
2683           pD.stop_decoding(JPGD_DECODE_ERROR);
2684 
2685         r = pD.get_bits_no_markers(s);
2686         s = JPGD_HUFF_EXTEND(r, s);
2687 
2688         p[g_ZAG[k]] = cast(jpgd_block_t)(s << pD.m_successive_low);
2689       }
2690       else
2691       {
2692         if (r == 15)
2693         {
2694           if ((k += 15) > 63)
2695             pD.stop_decoding(JPGD_DECODE_ERROR);
2696         }
2697         else
2698         {
2699           pD.m_eob_run = 1 << r;
2700 
2701           if (r)
2702             pD.m_eob_run += pD.get_bits_no_markers(r);
2703 
2704           pD.m_eob_run--;
2705 
2706           break;
2707         }
2708       }
2709     }
2710   }
2711 
2712   static void decode_block_ac_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2713     int s, k, r;
2714     int p1 = 1 << pD.m_successive_low;
2715     int m1 = (-1) << pD.m_successive_low;
2716     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2717 
2718     assert(pD.m_spectral_end <= 63);
2719 
2720     k = pD.m_spectral_start;
2721 
2722     if (pD.m_eob_run == 0)
2723     {
2724       for ( ; k <= pD.m_spectral_end; k++)
2725       {
2726         s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2727 
2728         r = s >> 4;
2729         s &= 15;
2730 
2731         if (s)
2732         {
2733           if (s != 1)
2734             pD.stop_decoding(JPGD_DECODE_ERROR);
2735 
2736           if (pD.get_bits_no_markers(1))
2737             s = p1;
2738           else
2739             s = m1;
2740         }
2741         else
2742         {
2743           if (r != 15)
2744           {
2745             pD.m_eob_run = 1 << r;
2746 
2747             if (r)
2748               pD.m_eob_run += pD.get_bits_no_markers(r);
2749 
2750             break;
2751           }
2752         }
2753 
2754         do
2755         {
2756           jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2757 
2758           if (*this_coef != 0)
2759           {
2760             if (pD.get_bits_no_markers(1))
2761             {
2762               if ((*this_coef & p1) == 0)
2763               {
2764                 if (*this_coef >= 0)
2765                   *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2766                 else
2767                   *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2768               }
2769             }
2770           }
2771           else
2772           {
2773             if (--r < 0)
2774               break;
2775           }
2776 
2777           k++;
2778 
2779         } while (k <= pD.m_spectral_end);
2780 
2781         if ((s) && (k < 64))
2782         {
2783           p[g_ZAG[k]] = cast(jpgd_block_t)(s);
2784         }
2785       }
2786     }
2787 
2788     if (pD.m_eob_run > 0)
2789     {
2790       for ( ; k <= pD.m_spectral_end; k++)
2791       {
2792         jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2793 
2794         if (*this_coef != 0)
2795         {
2796           if (pD.get_bits_no_markers(1))
2797           {
2798             if ((*this_coef & p1) == 0)
2799             {
2800               if (*this_coef >= 0)
2801                 *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2802               else
2803                 *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2804             }
2805           }
2806         }
2807       }
2808 
2809       pD.m_eob_run--;
2810     }
2811   }
2812 
2813   // Decode a scan in a progressively encoded image.
2814   void decode_scan (pDecode_block_func decode_block_func) {
2815     int mcu_row, mcu_col, mcu_block;
2816     int[JPGD_MAX_COMPONENTS] block_x_mcu;
2817     int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
2818 
2819     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
2820 
2821     for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
2822     {
2823       int component_num, component_id;
2824 
2825       memset(block_x_mcu.ptr, 0, block_x_mcu.sizeof);
2826 
2827       for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2828       {
2829         int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2830 
2831         if ((m_restart_interval) && (m_restarts_left == 0))
2832           process_restart();
2833 
2834         for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2835         {
2836           component_id = m_mcu_org.ptr[mcu_block];
2837 
2838           decode_block_func(this, component_id, block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
2839 
2840           if (m_comps_in_scan == 1)
2841             block_x_mcu.ptr[component_id]++;
2842           else
2843           {
2844             if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
2845             {
2846               block_x_mcu_ofs = 0;
2847 
2848               if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
2849               {
2850                 block_y_mcu_ofs = 0;
2851                 block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
2852               }
2853             }
2854           }
2855         }
2856 
2857         m_restarts_left--;
2858       }
2859 
2860       if (m_comps_in_scan == 1)
2861         m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
2862       else
2863       {
2864         for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2865         {
2866           component_id = m_comp_list.ptr[component_num];
2867           m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
2868         }
2869       }
2870     }
2871   }
2872 
2873   // Decode a progressively encoded image.
2874   void init_progressive () {
2875     int i;
2876 
2877     if (m_comps_in_frame == 4)
2878       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2879 
2880     // Allocate the coefficient buffers.
2881     for (i = 0; i < m_comps_in_frame; i++)
2882     {
2883       m_dc_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 1, 1);
2884       m_ac_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 8, 8);
2885     }
2886 
2887     for ( ; ; )
2888     {
2889       int dc_only_scan, refinement_scan;
2890       pDecode_block_func decode_block_func;
2891 
2892       if (!init_scan())
2893         break;
2894 
2895       dc_only_scan = (m_spectral_start == 0);
2896       refinement_scan = (m_successive_high != 0);
2897 
2898       if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
2899         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2900 
2901       if (dc_only_scan)
2902       {
2903         if (m_spectral_end)
2904           stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2905       }
2906       else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
2907         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2908 
2909       if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
2910         stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2911 
2912       if (dc_only_scan)
2913       {
2914         if (refinement_scan)
2915           decode_block_func = &decode_block_dc_refine;
2916         else
2917           decode_block_func = &decode_block_dc_first;
2918       }
2919       else
2920       {
2921         if (refinement_scan)
2922           decode_block_func = &decode_block_ac_refine;
2923         else
2924           decode_block_func = &decode_block_ac_first;
2925       }
2926 
2927       decode_scan(decode_block_func);
2928 
2929       m_bits_left = 16;
2930       get_bits(16);
2931       get_bits(16);
2932     }
2933 
2934     m_comps_in_scan = m_comps_in_frame;
2935 
2936     for (i = 0; i < m_comps_in_frame; i++)
2937       m_comp_list.ptr[i] = i;
2938 
2939     calc_mcu_block_order();
2940   }
2941 
2942   void init_sequential () {
2943     if (!init_scan())
2944       stop_decoding(JPGD_UNEXPECTED_MARKER);
2945   }
2946 
2947   void decode_start () {
2948     init_frame();
2949 
2950     if (m_progressive_flag)
2951       init_progressive();
2952     else
2953       init_sequential();
2954   }
2955 
2956   void decode_init (JpegStreamReadFunc rfn) {
2957     initit(rfn);
2958     locate_sof_marker();
2959   }
2960 }
2961 
2962 
2963 // ////////////////////////////////////////////////////////////////////////// //
2964 /// read JPEG image header, determine dimensions and number of components.
2965 /// return `false` if image is not JPEG (i hope).
2966 public bool detect_jpeg_image_from_stream (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps) {
2967   if (rfn is null) return false;
2968   auto decoder = jpeg_decoder(rfn);
2969   version(jpegd_test) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
2970   if (decoder.error_code != JPGD_SUCCESS) return false;
2971   width = decoder.width;
2972   height = decoder.height;
2973   actual_comps = decoder.num_components;
2974   return true;
2975 }
2976 
2977 // ////////////////////////////////////////////////////////////////////////// //
2978 /// read JPEG image header, determine dimensions and number of components.
2979 /// return `false` if image is not JPEG (i hope).
2980 public bool detect_jpeg_image_from_memory (const(void)[] buf, out int width, out int height, out int actual_comps) {
2981   bool m_eof_flag;
2982   size_t bufpos;
2983   auto b = cast(const(ubyte)*)buf.ptr;
2984 
2985   return detect_jpeg_image_from_stream(
2986     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
2987       import core.stdc..string : memcpy;
2988       if (bufpos >= buf.length) {
2989         *pEOF_flag = true;
2990         return 0;
2991       }
2992       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
2993       memcpy(pBuf, b, max_bytes_to_read);
2994       b += max_bytes_to_read;
2995       return max_bytes_to_read;
2996     },
2997     width, height, actual_comps);
2998 }
2999 
3000 
3001 // ////////////////////////////////////////////////////////////////////////// //
3002 /// decompress JPEG image, what else?
3003 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3004 public ubyte[] decompress_jpeg_image_from_stream(scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps, int req_comps=-1) {
3005   import core.stdc..string : memcpy;
3006 
3007   //actual_comps = 0;
3008   if (rfn is null) return null;
3009   if (req_comps != -1 && req_comps != 1 && req_comps != 3 && req_comps != 4) return null;
3010 
3011   auto decoder = jpeg_decoder(rfn);
3012   if (decoder.error_code != JPGD_SUCCESS) return null;
3013   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3014 
3015   immutable int image_width = decoder.width;
3016   immutable int image_height = decoder.height;
3017   width = image_width;
3018   height = image_height;
3019   actual_comps = decoder.num_components;
3020   if (req_comps < 0) req_comps = decoder.num_components;
3021 
3022   if (decoder.begin_decoding() != JPGD_SUCCESS) return null;
3023 
3024   immutable int dst_bpl = image_width*req_comps;
3025 
3026    ubyte* pImage_data = cast(ubyte*)jpgd_malloc(dst_bpl*image_height);
3027    if (pImage_data is null) return null;
3028    auto idata = pImage_data[0..dst_bpl*image_height];
3029 
3030   for (int y = 0; y < image_height; ++y) {
3031     const(ubyte)* pScan_line;
3032     uint scan_line_len;
3033     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3034       jpgd_free(pImage_data);
3035       return null;
3036     }
3037 
3038     ubyte* pDst = pImage_data+y*dst_bpl;
3039 
3040     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3041       memcpy(pDst, pScan_line, dst_bpl);
3042     } else if (decoder.num_components == 1) {
3043       if (req_comps == 3) {
3044         for (int x = 0; x < image_width; ++x) {
3045           ubyte luma = pScan_line[x];
3046           pDst[0] = luma;
3047           pDst[1] = luma;
3048           pDst[2] = luma;
3049           pDst += 3;
3050         }
3051       } else {
3052         for (int x = 0; x < image_width; ++x) {
3053           ubyte luma = pScan_line[x];
3054           pDst[0] = luma;
3055           pDst[1] = luma;
3056           pDst[2] = luma;
3057           pDst[3] = 255;
3058           pDst += 4;
3059         }
3060       }
3061     } else if (decoder.num_components == 3) {
3062       if (req_comps == 1) {
3063         immutable int YR = 19595, YG = 38470, YB = 7471;
3064         for (int x = 0; x < image_width; ++x) {
3065           int r = pScan_line[x*4+0];
3066           int g = pScan_line[x*4+1];
3067           int b = pScan_line[x*4+2];
3068           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3069         }
3070       } else {
3071         for (int x = 0; x < image_width; ++x) {
3072           pDst[0] = pScan_line[x*4+0];
3073           pDst[1] = pScan_line[x*4+1];
3074           pDst[2] = pScan_line[x*4+2];
3075           pDst += 3;
3076         }
3077       }
3078     }
3079   }
3080 
3081   return idata;
3082 }
3083 
3084 
3085 // ////////////////////////////////////////////////////////////////////////// //
3086 /// decompress JPEG image from memory buffer.
3087 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3088 public ubyte[] decompress_jpeg_image_from_memory(const(void)[] buf, out int width, out int height, out int actual_comps, int req_comps=-1) {
3089   bool m_eof_flag;
3090   size_t bufpos;
3091   auto b = cast(const(ubyte)*)buf.ptr;
3092 
3093   return decompress_jpeg_image_from_stream(
3094     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3095       import core.stdc..string : memcpy;
3096       if (bufpos >= buf.length) {
3097         *pEOF_flag = true;
3098         return 0;
3099       }
3100       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3101       memcpy(pBuf, b, max_bytes_to_read);
3102       b += max_bytes_to_read;
3103       return max_bytes_to_read;
3104     },
3105     width, height, actual_comps, req_comps);
3106 }