dplug.core.string source code

1 /**
2 String build code, plus no-locale float parsing functions.
3 
4 Copyright: Guillaume Piolat, 2022.
5 License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 
8 module dplug.core..string;
9 
10 import core.stdc.stdlib;
11 import core.stdc.string;
12 import core.stdc.stdarg;
13 import dplug.core.vec;
14 
15 
16 nothrow @nogc:
17 
18 /// Create a `String` from a D `string`.
19 String makeString(const(char)[] s)
20 {
21     return String(s);
22 }
23 
24 /// For now, just a string builder that owns its memory.
25 /// Dplug `String`, designed to ease the usage of all the C string function,
26 /// allow appending, etc.
27 /// `String` always owns its memory, and can return as a D slice.
28 /// FUTURE: use realloc to be able to size down.
29 ///         Capacity to be a slice into existing memory and not own.
30 ///         Capacity to disown memory (implies: stop using Vec)
31 /// QUESTION: should String just be a managed slice!T instead? Like Go slices.
32 struct String
33 {
34 public:
35 nothrow @nogc:
36 
37     this(char ch)
38     {
39         this ~= ch;
40     }
41 
42     this(const(char)[] s)
43     {
44         this ~= s;
45     }
46 
47     ~this()
48     {
49     }
50 
51     @disable this(this);
52 
53     /// Sets as empty/null string.
54     void makeEmpty()
55     {
56         _chars.clearContents();
57     }
58 
59     /// Pointer to first character in the string, or `null`.
60     inout(char)* ptr() inout return
61     {
62         return _chars.ptr;
63     }
64 
65     /// Length in bytes of the string.
66     size_t length() const
67     {
68         return _chars.length;
69     }
70 
71     /// Converts to a D string, sliced into the `String` memory.
72     inout(char)[] asSlice() inout return
73     {
74         size_t len = length();
75         if (len == 0)
76             return null;
77         return _chars[0..len];
78     }
79 
80     /// Returns: Whole content of the sring in one slice.
81     inout(char)[] opSlice() inout return
82     {
83         return asSlice();
84     }
85 
86     /// Returns: A slice of the array.
87     inout(char)[] opSlice(size_t i1, size_t i2) inout
88     {
89         return _chars[i1 .. i2];
90     }
91 
92     void opAssign(T : char)(T x)
93     {
94         makeEmpty();
95         this ~= x;
96     }
97 
98     void opAssign(T : const(char)[])(T x)
99     {
100         makeEmpty();
101         this ~= x;
102     }
103 
104     void opAssign(T : String)(T x)
105     {
106         makeEmpty();
107         this ~= x;
108     }
109 
110     // <Appending>
111 
112     /// Append a character to the string. This invalidates pointers to characters
113     /// returned before.
114     void opOpAssign(string op)(char x) if (op == "~")
115     {
116         _chars.pushBack(x);
117     }
118 
119     /// Append a characters to the string.
120     void opOpAssign(string op)(const(char)[] str) if (op == "~")
121     {
122         size_t len = str.length;
123         for (size_t n = 0; n < len; ++n)
124             _chars.pushBack(str[n]);
125     }
126 
127     /// Append a characters to the string.
128     void opOpAssign(string op)(ref const(String) str) if (op == "~")
129     {
130         this ~= str.asSlice();
131     }
132 
133     /// Append a zero-terminated character to the string.
134     /// Name is explicit, because it should be rare and overload conflict.
135     void appendZeroTerminatedString(const(char)* str)
136     {
137         while(*str != '\0')
138             _chars.pushBack(*str++);
139     }
140 
141     bool opEquals(const(char)[] s)
142     {
143         size_t lenS = s.length;
144         size_t lenT = this.length;
145         if (lenS != lenT)
146             return false;
147         for (size_t n = 0; n < lenS; ++n)
148         {
149             if (s[n] != _chars[n])
150                 return false;
151         }        
152         return true;
153     }
154 
155     bool opEquals(ref const(String) str)
156     {
157         return this.asSlice() == str.asSlice();
158     }
159 
160     // </Appending>
161 
162 private:
163 
164     // FUTURE
165 
166     /*alias Flags = int;
167     enum : Flags
168     {
169         owned          = 1, /// String data is currently owned (C's malloc/free), not borrowed.
170         zeroTerminated = 2, /// String data is currently zero-terminated.
171     }
172 
173     Flags _flags = 0;
174     */
175 
176     Vec!char _chars;
177 
178     void clearContents()
179     {
180         _chars.clearContents();
181     }
182 }
183 
184 // Null and .ptr
185 unittest
186 {
187     string z;
188     string a = "";
189     string b = null;
190 
191     assert(a == z);
192     assert(b == z);
193     assert(a == b);
194     assert(a !is b);
195     assert(a.length == 0);
196     assert(b.length == 0);
197     assert(a.ptr !is null);
198 
199     // Must preserve semantics from D strings.
200     String Z = z;
201     String A = a;
202     String B = b;
203     assert(A == Z);
204     assert(B == Z);
205     assert(A == B);
206 }
207 
208 // Basic appending.
209 unittest
210 {
211     String s = "Hello,";
212     s ~= " world!";
213     assert(s == "Hello, world!");
214     s.makeEmpty();
215     assert(s == null);
216     assert(s.length == 0);
217 }
218 
219 /// strtod replacement, but without locale
220 ///     s Must be a zero-terminated string.
221 /// Note that this code is duplicated in wren-port, to avoid a dependency on dplug:core there.
222 public double strtod_nolocale(const(char)* s, const(char)** p)
223 {
224     bool strtod_err = false;
225     const(char)* pend;
226     double r = stb__clex_parse_number_literal(s, &pend, &strtod_err, true);
227     if (p) 
228         *p = pend;
229     if (strtod_err)
230         r = 0.0;
231     return r;
232 }
233 unittest
234 {
235     string[18] sPartial = 
236     [
237         "0x123lol", "+0x1.921fb54442d18p+0001()", "0,", "-0.0,,,,", 
238         "0.65,stuff", "1.64587okokok", "-1.0e+9HELLO", "1.1454e-25f#STUFF",
239         "+iNfu", "-infEXCESS", "infuh", "-infinity", 
240         "+infinity", "+nan", "-nan", "nan",
241         "INFINITY", "-NAN"
242     ]; 
243 
244     for (int n = 0; n < sPartial.length; ++n)
245     {
246         const(char)* p1, p2;
247         double r1 = strtod(sPartial[n].ptr, &p1); // in unittest, no program tampering the C locale
248         double r2 = strtod_nolocale(sPartial[n].ptr, &p2);
249         //import core.stdc.stdio;
250         //debug printf("parsing \"%s\" %lg %lg %p %p\n", sPartial[n].ptr, r1, r2, p1, p2);
251         assert(p1 == p2);
252     }
253 }
254 
255 /// C-locale independent string to integer parsing.
256 /// Params:
257 ///     s = Must be a zero-terminated string.
258 ///     mustConsumeEntireInput = if true, check that s is entirely consumed by parsing the number.
259 ///     err = optional bool
260 /// Note: unlike with `convertStringToDouble`, the string "4.7" will parse to just 4. Replaces %d in scanf-like functions.
261 /// Only parse correctly from -2147483648 to 2147483647.
262 /// Larger values are clamped to this -2147483648 to 2147483647 range.
263 public int convertStringToInteger(const(char)* s,
264                                   bool mustConsumeEntireInput,
265                                   bool* err) pure nothrow @nogc
266 {
267     if (s is null)
268     {
269         if (err) *err = true;
270         return 0;
271     }
272 
273     const(char)* end;
274     bool strtod_err = false;
275     bool allowFloat = false;
276     double r = stb__clex_parse_number_literal(s, &end, &strtod_err, allowFloat);
277 
278     if (strtod_err)
279     {
280         if (err) *err = true;
281         return 0;
282     }
283 
284     if (mustConsumeEntireInput)
285     {
286         size_t len = strlen(s);
287         if (end != s + len)
288         {
289             if (err) *err = true; // did not consume whole string
290             return 0;
291         }
292     }
293 
294     if (err) *err = false; // no error
295 
296     double r2 = cast(int)r;
297     assert(r2 == r); // should have returned an integer that fits in a double, like the whole int.min to int.max range.
298     return cast(int)r;
299 }
300 unittest
301 {
302     bool err;
303     assert(4 == convertStringToInteger(" 4.7\n", false, &err));
304     assert(!err);
305 
306     assert(-2147483648 == convertStringToInteger("-2147483649", false, &err));
307     assert( 1 == convertStringToInteger("1e30", false, &err));
308     assert( 0 == convertStringToInteger("-0", false, &err));
309     assert( 2147483647 == convertStringToInteger("10000000000", false, &err));
310 }
311 
312 
313 /// C-locale independent string to float parsing.
314 /// Params:
315 ///     s = Must be a zero-terminated string.
316 ///     mustConsumeEntireInput = if true, check that s is entirely consumed by parsing the number.
317 ///     err = optional bool
318 public double convertStringToDouble(const(char)* s, 
319                                     bool mustConsumeEntireInput,
320                                     bool* err) pure nothrow @nogc
321 {
322     if (s is null)
323     {
324         if (err) *err = true;
325         return 0.0;
326     }
327 
328     const(char)* end;
329     bool strtod_err = false;
330     double r = stb__clex_parse_number_literal(s, &end, &strtod_err, true);
331 
332     if (strtod_err)
333     {
334         if (err) *err = true;
335         return 0.0;
336     }
337     
338     if (mustConsumeEntireInput)
339     {
340         size_t len = strlen(s);
341         if (end != s + len)
342         {
343             if (err) *err = true; // did not consume whole string
344             return 0.0;
345         }
346     }
347 
348     if (err) *err = false; // no error
349     return r;
350 }
351  
352 unittest
353 {
354     //import core.stdc.stdio;
355     import std.math.operations;
356 
357     string[9] s = ["14", "0x123", "+0x1.921fb54442d18p+0001", "0", "-0.0", "   \n\t\n\f\r 0.65", "1.64587", "-1.0e+9", "1.1454e-25"]; 
358     double[9] correct = [14, 0x123, +0x1.921fb54442d18p+0001, 0.0, -0.0, 0.65L, 1.64587, -1e9, 1.1454e-25f];
359 
360     string[9] sPartial = ["14top", "0x123lol", "+0x1.921fb54442d18p+0001()", "0,", "-0.0,,,,", "   \n\t\n\f\r 0.65,stuff", "1.64587okokok", "-1.0e+9HELLO", "1.1454e-25f#STUFF"]; 
361     for (int n = 0; n < s.length; ++n)
362     {
363         /*
364         // Check vs scanf
365         double sa;
366         if (sscanf(s[n].ptr, "%lf", &sa) == 1)
367         {
368             debug printf("scanf finds %lg\n", sa);
369         }
370         else
371             debug printf("scanf no parse\n");
372         */
373 
374         bool err;
375         double a = convertStringToDouble(s[n].ptr, true, &err);
376         //import std.stdio;
377         //debug writeln(a, " correct is ", correct[n]);
378         assert(!err);
379         assert( isClose(a, correct[n], 0.0001) );
380 
381         bool err2;
382         double b = convertStringToDouble(s[n].ptr, false, &err2);
383         assert(!err2);
384         assert(b == a); // same parse
385 
386         //debug printf("%lf\n", a);
387 
388         convertStringToDouble(s[n].ptr, true, null); // should run without error pointer
389     }
390 }
391 
392 private double stb__clex_parse_number_literal(const(char)* p, 
393                                               const(char)**q, 
394                                               bool* err,
395                                               bool allowFloat) pure nothrow @nogc
396 {
397     const(char)* s = p;
398     double value=0;
399     int base=10;
400     int exponent=0;
401     int signMantissa = 1;
402 
403     // Skip leading whitespace, like scanf and strtod do
404     while (true)
405     {
406         char ch = *p;
407         if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\f' || ch == '\r')
408         {
409             p += 1;
410         }
411         else
412             break;
413     }
414 
415 
416     if (*p == '-') 
417     {
418         signMantissa = -1;
419         p += 1;
420     } 
421     else if (*p == '+') 
422     {
423         p += 1;
424     }
425 
426     // Issue #865, "-inf" was parsed as 0
427     // libc can produce "infinity" as well as "inf"
428     // %f specifier can produce "infinity", "inf", "nan"
429     // %F specifier can produce "INFINITY", "INF", "NAN"
430     // In practice, C libraries parse combination of uppercase and lowercase
431     if (allowFloat)
432     {
433         if (  (p[0] == 'i' || p[0] == 'I')
434            && (p[1] == 'n' || p[1] == 'N')
435            && (p[2] == 'f' || p[2] == 'F') )
436         {
437             value = double.infinity;
438             p += 3;
439 
440             if (  (p[0] == 'i' || p[0] == 'I')
441                && (p[1] == 'n' || p[1] == 'N')
442                && (p[2] == 'i' || p[2] == 'I')
443                && (p[3] == 't' || p[3] == 'T')
444                && (p[4] == 'y' || p[4] == 'Y') )            
445                 p += 5;
446 
447             goto found_value;
448         }
449 
450          if (  (p[0] == 'n' || p[0] == 'N')
451             && (p[1] == 'a' || p[1] == 'A')
452             && (p[2] == 'n' || p[2] == 'N') )
453         {
454             value = double.nan;
455             p += 3;
456             goto found_value;
457         }
458     }
459 
460     if (*p == '0') 
461     {
462         if (p[1] == 'x' || p[1] == 'X') 
463         {
464             base=16;
465             p += 2;
466         }
467     }
468 
469     for (;;) 
470     {
471         if (*p >= '0' && *p <= '9')
472             value = value*base + (*p++ - '0');
473         else if (base == 16 && *p >= 'a' && *p <= 'f')
474             value = value*base + 10 + (*p++ - 'a');
475         else if (base == 16 && *p >= 'A' && *p <= 'F')
476             value = value*base + 10 + (*p++ - 'A');
477         else
478             break;
479     }
480 
481     if (allowFloat)
482     {
483         if (*p == '.') 
484         {
485             double pow, addend = 0;
486             ++p;
487             for (pow=1; ; pow*=base) 
488             {
489                 if (*p >= '0' && *p <= '9')
490                     addend = addend*base + (*p++ - '0');
491                 else if (base == 16 && *p >= 'a' && *p <= 'f')
492                     addend = addend*base + 10 + (*p++ - 'a');
493                 else if (base == 16 && *p >= 'A' && *p <= 'F')
494                     addend = addend*base + 10 + (*p++ - 'A');
495                 else
496                     break;
497             }
498             value += addend / pow;
499         }
500         if (base == 16) {
501             // exponent required for hex float literal, else it's an integer literal like 0x123
502             exponent = (*p == 'p' || *p == 'P');
503         } else
504             exponent = (*p == 'e' || *p == 'E');
505 
506         if (exponent) 
507         {
508             int sign = p[1] == '-';
509             uint exponent2 = 0;
510             double power=1;
511             ++p;
512             if (*p == '-' || *p == '+')
513                 ++p;
514             while (*p >= '0' && *p <= '9')
515                 exponent2 = exponent2*10 + (*p++ - '0');
516 
517             if (base == 16)
518                 power = stb__clex_pow(2, exponent2);
519             else
520                 power = stb__clex_pow(10, exponent2);
521             if (sign)
522                 value /= power;
523             else
524                 value *= power;
525         }
526     }
527 
528     found_value:
529     
530     if (q) *q = p;
531     if (err) *err = false; // seen no error
532 
533     if (signMantissa < 0)
534         value = -value;
535 
536     if (!allowFloat)
537     {
538         // clamp and round to nearest integer
539         if (value > int.max) value = int.max;
540         if (value < int.min) value = int.min;
541     }    
542     return value;
543 }
544 
545 private double stb__clex_pow(double base, uint exponent) pure nothrow @nogc
546 {
547     double value=1;
548     for ( ; exponent; exponent >>= 1) {
549         if (exponent & 1)
550             value *= base;
551         base *= base;
552     }
553     return value;
554 }