1 /**
2 String build code, plus no-locale float parsing functions.
3 
4 Copyright: Guillaume Piolat, 2022.
5 License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 
8 module dplug.core..string;
9 
10 import core.stdc.stdlib;
11 import core.stdc.string;
12 import core.stdc.stdarg;
13 import dplug.core.vec;
14 
15 
16 nothrow @nogc:
17 
18 /// Create a `String` from a D `string`.
19 String makeString(const(char)[] s)
20 {
21     return String(s);
22 }
23 
24 /// For now, just a string builder that owns its memory.
25 /// Dplug `String`, designed to ease the usage of all the C string function,
26 /// allow appending, etc.
27 /// `String` always owns its memory, and can return as a D slice.
28 /// FUTURE: use realloc to be able to size down.
29 ///         Capacity to be a slice into existing memory and not own.
30 ///         Capacity to disown memory (implies: stop using Vec)
31 /// QUESTION: should String just be a managed slice!T instead? Like Go slices.
32 struct String
33 {
34 public:
35 nothrow @nogc:
36 
37     this(char ch)
38     {
39         this ~= ch;
40     }
41 
42     this(const(char)[] s)
43     {
44         this ~= s;
45     }
46 
47     ~this()
48     {
49     }
50 
51     @disable this(this);
52 
53     /// Sets as empty/null string.
54     void makeEmpty()
55     {
56         _chars.clearContents();
57     }
58 
59     /// Pointer to first character in the string, or `null`.
60     inout(char)* ptr() inout return
61     {
62         return _chars.ptr;
63     }
64 
65     /// Length in bytes of the string.
66     size_t length() const
67     {
68         return _chars.length;
69     }
70 
71     /// Converts to a D string, sliced into the `String` memory.
72     inout(char)[] asSlice() inout return
73     {
74         size_t len = length();
75         if (len == 0)
76             return null;
77         return _chars[0..len];
78     }
79 
80     /// Returns: Whole content of the sring in one slice.
81     inout(char)[] opSlice() inout return
82     {
83         return asSlice();
84     }
85 
86     /// Returns: A slice of the array.
87     inout(char)[] opSlice(size_t i1, size_t i2) inout
88     {
89         return _chars[i1 .. i2];
90     }
91 
92     void opAssign(T : char)(T x)
93     {
94         makeEmpty();
95         this ~= x;
96     }
97 
98     void opAssign(T : const(char)[])(T x)
99     {
100         makeEmpty();
101         this ~= x;
102     }
103 
104     void opAssign(T : String)(T x)
105     {
106         makeEmpty();
107         this ~= x;
108     }
109 
110     // <Appending>
111 
112     /// Append a character to the string. This invalidates pointers to characters
113     /// returned before.
114     void opOpAssign(string op)(char x) if (op == "~")
115     {
116         _chars.pushBack(x);
117     }
118 
119     /// Append a characters to the string.
120     void opOpAssign(string op)(const(char)[] str) if (op == "~")
121     {
122         size_t len = str.length;
123         for (size_t n = 0; n < len; ++n)
124             _chars.pushBack(str[n]);
125     }
126 
127     /// Append a characters to the string.
128     void opOpAssign(string op)(ref const(String) str) if (op == "~")
129     {
130         this ~= str.asSlice();
131     }
132 
133     /// Append a zero-terminated character to the string.
134     /// Name is explicit, because it should be rare and overload conflict.
135     void appendZeroTerminatedString(const(char)* str)
136     {
137         while(*str != '\0')
138             _chars.pushBack(*str++);
139     }
140 
141     bool opEquals(const(char)[] s)
142     {
143         size_t lenS = s.length;
144         size_t lenT = this.length;
145         if (lenS != lenT)
146             return false;
147         for (size_t n = 0; n < lenS; ++n)
148         {
149             if (s[n] != _chars[n])
150                 return false;
151         }        
152         return true;
153     }
154 
155     bool opEquals(ref const(String) str)
156     {
157         return this.asSlice() == str.asSlice();
158     }
159 
160     // </Appending>
161 
162 private:
163 
164     // FUTURE
165 
166     /*alias Flags = int;
167     enum : Flags
168     {
169         owned          = 1, /// String data is currently owned (C's malloc/free), not borrowed.
170         zeroTerminated = 2, /// String data is currently zero-terminated.
171     }
172 
173     Flags _flags = 0;
174     */
175 
176     Vec!char _chars;
177 
178     void clearContents()
179     {
180         _chars.clearContents();
181     }
182 }
183 
184 // Null and .ptr
185 unittest
186 {
187     string z;
188     string a = "";
189     string b = null;
190 
191     assert(a == z);
192     assert(b == z);
193     assert(a == b);
194     assert(a !is b);
195     assert(a.length == 0);
196     assert(b.length == 0);
197     assert(a.ptr !is null);
198 
199     // Must preserve semantics from D strings.
200     String Z = z;
201     String A = a;
202     String B = b;
203     assert(A == Z);
204     assert(B == Z);
205     assert(A == B);
206 }
207 
208 // Basic appending.
209 unittest
210 {
211     String s = "Hello,";
212     s ~= " world!";
213     assert(s == "Hello, world!");
214     s.makeEmpty();
215     assert(s == null);
216     assert(s.length == 0);
217 }
218 
219 /// strtod replacement, but without locale
220 ///     s Must be a zero-terminated string.
221 /// Note that this code is duplicated in wren-port, to avoid a dependency on dplug:core there.
222 public double strtod_nolocale(const(char)* s, const(char)** p)
223 {
224     bool strtod_err = false;
225     const(char)* pend;
226     double r = stb__clex_parse_number_literal(s, &pend, &strtod_err, true);
227     if (p) 
228         *p = pend;
229     if (strtod_err)
230         r = 0.0;
231     return r;
232 }
233 unittest
234 {
235     string[8] sPartial = ["0x123lol", "+0x1.921fb54442d18p+0001()", "0,", "-0.0,,,,", "0.65,stuff", "1.64587okokok", "-1.0e+9HELLO", "1.1454e-25f#STUFF"]; 
236     for (int n = 0; n < 8; ++n)
237     {
238         const(char)* p1, p2;
239         double r1 = strtod(sPartial[n].ptr, &p1); // in unittest, no program tampering the C locale
240         double r2 = strtod_nolocale(sPartial[n].ptr, &p2);
241         //import core.stdc.stdio;
242         //debug printf("parsing \"%s\" %lg %lg %p %p\n", sPartial[n].ptr, r1, r2, p1, p2);
243         assert(p1 == p2);
244     }
245 }
246 
247 /// C-locale independent string to integer parsing.
248 /// Params:
249 ///     s Must be a zero-terminated string.
250 ///     mustConsumeEntireInput if true, check that s is entirely consumed by parsing the number.
251 ///     err: optional bool
252 /// Note: unlike with `convertStringToDouble`, the string "4.7" will parse to just 4. Replaces %d in scanf-like functions.
253 /// Only parse correctly from -2147483648 to 2147483647.
254 /// Larger values are clamped to this -2147483648 to 2147483647 range.
255 public int convertStringToInteger(const(char)* s, 
256                                   bool mustConsumeEntireInput,
257                                   bool* err) pure nothrow @nogc
258 {
259     if (s is null)
260     {
261         if (err) *err = true;
262         return 0;
263     }
264 
265     const(char)* end;
266     bool strtod_err = false;
267     bool allowFloat = false;
268     double r = stb__clex_parse_number_literal(s, &end, &strtod_err, allowFloat);
269 
270     if (strtod_err)
271     {
272         if (err) *err = true;
273         return 0;
274     }
275 
276     if (mustConsumeEntireInput)
277     {
278         size_t len = strlen(s);
279         if (end != s + len)
280         {
281             if (err) *err = true; // did not consume whole string
282             return 0;
283         }
284     }
285 
286     if (err) *err = false; // no error
287 
288     double r2 = cast(int)r;
289     assert(r2 == r); // should have returned an integer that fits in a double, like the whole int.min to int.max range.
290     return cast(int)r;
291 }
292 unittest
293 {
294     bool err;
295     assert(4 == convertStringToInteger(" 4.7\n", false, &err));
296     assert(!err);
297 
298     assert(-2147483648 == convertStringToInteger("-2147483649", false, &err));
299     assert( 1 == convertStringToInteger("1e30", false, &err));
300     assert( 0 == convertStringToInteger("-0", false, &err));
301     assert( 2147483647 == convertStringToInteger("10000000000", false, &err));
302 }
303 
304 
305 /// C-locale independent string to float parsing.
306 /// Params:
307 ///     s Must be a zero-terminated string.
308 ///     mustConsumeEntireInput if true, check that s is entirely consumed by parsing the number.
309 ///     err: optional bool
310 public double convertStringToDouble(const(char)* s, 
311                                     bool mustConsumeEntireInput,
312                                     bool* err) pure nothrow @nogc
313 {
314     if (s is null)
315     {
316         if (err) *err = true;
317         return 0.0;
318     }
319 
320     const(char)* end;
321     bool strtod_err = false;
322     double r = stb__clex_parse_number_literal(s, &end, &strtod_err, true);
323 
324     if (strtod_err)
325     {
326         if (err) *err = true;
327         return 0.0;
328     }
329     
330     if (mustConsumeEntireInput)
331     {
332         size_t len = strlen(s);
333         if (end != s + len)
334         {
335             if (err) *err = true; // did not consume whole string
336             return 0.0;
337         }
338     }
339 
340     if (err) *err = false; // no error
341     return r;
342 }
343  
344 unittest
345 {
346     //import core.stdc.stdio;
347     import std.math.operations;
348 
349     string[9] s = ["14", "0x123", "+0x1.921fb54442d18p+0001", "0", "-0.0", "   \n\t\n\f\r 0.65", "1.64587", "-1.0e+9", "1.1454e-25"]; 
350     double[9] correct = [14, 0x123, +0x1.921fb54442d18p+0001, 0.0, -0.0, 0.65L, 1.64587, -1e9, 1.1454e-25f];
351 
352     string[9] sPartial = ["14top", "0x123lol", "+0x1.921fb54442d18p+0001()", "0,", "-0.0,,,,", "   \n\t\n\f\r 0.65,stuff", "1.64587okokok", "-1.0e+9HELLO", "1.1454e-25f#STUFF"]; 
353     for (int n = 0; n < s.length; ++n)
354     {
355         /*
356         // Check vs scanf
357         double sa;
358         if (sscanf(s[n].ptr, "%lf", &sa) == 1)
359         {
360             debug printf("scanf finds %lg\n", sa);
361         }
362         else
363             debug printf("scanf no parse\n");
364         */
365 
366         bool err;
367         double a = convertStringToDouble(s[n].ptr, true, &err);
368         //import std.stdio;
369         //debug writeln(a, " correct is ", correct[n]);
370         assert(!err);
371         assert( isClose(a, correct[n], 0.0001) );
372 
373         bool err2;
374         double b = convertStringToDouble(s[n].ptr, false, &err2);
375         assert(!err2);
376         assert(b == a); // same parse
377 
378         //debug printf("%lf\n", a);
379 
380         convertStringToDouble(s[n].ptr, true, null); // should run without error pointer
381     }
382 }
383 
384 private double stb__clex_parse_number_literal(const(char)* p, 
385                                               const(char)**q, 
386                                               bool* err,
387                                               bool allowFloat) pure nothrow @nogc
388 {
389     const(char)* s = p;
390     double value=0;
391     int base=10;
392     int exponent=0;
393     int signMantissa = 1;
394 
395     // Skip leading whitespace, like scanf and strtod do
396     while (true)
397     {
398         char ch = *p;
399         if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\f' || ch == '\r')
400         {
401             p += 1;
402         }
403         else
404             break;
405     }
406 
407 
408     if (*p == '-') 
409     {
410         signMantissa = -1;
411         p += 1;
412     } 
413     else if (*p == '+') 
414     {
415         p += 1;
416     }
417 
418     if (*p == '0') 
419     {
420         if (p[1] == 'x' || p[1] == 'X') 
421         {
422             base=16;
423             p += 2;
424         }
425     }
426 
427     for (;;) 
428     {
429         if (*p >= '0' && *p <= '9')
430             value = value*base + (*p++ - '0');
431         else if (base == 16 && *p >= 'a' && *p <= 'f')
432             value = value*base + 10 + (*p++ - 'a');
433         else if (base == 16 && *p >= 'A' && *p <= 'F')
434             value = value*base + 10 + (*p++ - 'A');
435         else
436             break;
437     }
438 
439     if (allowFloat)
440     {
441         if (*p == '.') 
442         {
443             double pow, addend = 0;
444             ++p;
445             for (pow=1; ; pow*=base) 
446             {
447                 if (*p >= '0' && *p <= '9')
448                     addend = addend*base + (*p++ - '0');
449                 else if (base == 16 && *p >= 'a' && *p <= 'f')
450                     addend = addend*base + 10 + (*p++ - 'a');
451                 else if (base == 16 && *p >= 'A' && *p <= 'F')
452                     addend = addend*base + 10 + (*p++ - 'A');
453                 else
454                     break;
455             }
456             value += addend / pow;
457         }
458         if (base == 16) {
459             // exponent required for hex float literal, else it's an integer literal like 0x123
460             exponent = (*p == 'p' || *p == 'P');
461         } else
462             exponent = (*p == 'e' || *p == 'E');
463 
464         if (exponent) 
465         {
466             int sign = p[1] == '-';
467             uint exponent2 = 0;
468             double power=1;
469             ++p;
470             if (*p == '-' || *p == '+')
471                 ++p;
472             while (*p >= '0' && *p <= '9')
473                 exponent2 = exponent2*10 + (*p++ - '0');
474 
475             if (base == 16)
476                 power = stb__clex_pow(2, exponent2);
477             else
478                 power = stb__clex_pow(10, exponent2);
479             if (sign)
480                 value /= power;
481             else
482                 value *= power;
483         }
484     }
485     
486     if (q) *q = p;
487     if (err) *err = false; // seen no error
488 
489     if (signMantissa < 0)
490         value = -value;
491 
492     if (!allowFloat)
493     {
494         // clamp and round to nearest integer
495         if (value > int.max) value = int.max;
496         if (value < int.min) value = int.min;
497     }    
498     return value;
499 }
500 
501 private double stb__clex_pow(double base, uint exponent) pure nothrow @nogc
502 {
503     double value=1;
504     for ( ; exponent; exponent >>= 1) {
505         if (exponent & 1)
506             value *= base;
507         base *= base;
508     }
509     return value;
510 }