1 /** 2 String build code, plus no-locale float parsing functions. 3 4 Copyright: Guillaume Piolat, 2022. 5 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module dplug.core..string; 9 10 import core.stdc.stdlib; 11 import core.stdc.string; 12 import core.stdc.stdarg; 13 import dplug.core.vec; 14 15 16 nothrow @nogc: 17 18 /// Create a `String` from a D `string`. 19 String makeString(const(char)[] s) 20 { 21 return String(s); 22 } 23 24 /// For now, just a string builder that owns its memory. 25 /// Dplug `String`, designed to ease the usage of all the C string function, 26 /// allow appending, etc. 27 /// `String` always owns its memory, and can return as a D slice. 28 /// FUTURE: use realloc to be able to size down. 29 /// Capacity to be a slice into existing memory and not own. 30 /// Capacity to disown memory (implies: stop using Vec) 31 /// QUESTION: should String just be a managed slice!T instead? Like Go slices. 32 struct String 33 { 34 public: 35 nothrow @nogc: 36 37 this(char ch) 38 { 39 this ~= ch; 40 } 41 42 this(const(char)[] s) 43 { 44 this ~= s; 45 } 46 47 ~this() 48 { 49 } 50 51 @disable this(this); 52 53 /// Sets as empty/null string. 54 void makeEmpty() 55 { 56 _chars.clearContents(); 57 } 58 59 /// Pointer to first character in the string, or `null`. 60 inout(char)* ptr() inout return 61 { 62 return _chars.ptr; 63 } 64 65 /// Length in bytes of the string. 66 size_t length() const 67 { 68 return _chars.length; 69 } 70 71 /// Converts to a D string, sliced into the `String` memory. 72 inout(char)[] asSlice() inout return 73 { 74 size_t len = length(); 75 if (len == 0) 76 return null; 77 return _chars[0..len]; 78 } 79 80 /// Returns: Whole content of the sring in one slice. 81 inout(char)[] opSlice() inout return 82 { 83 return asSlice(); 84 } 85 86 /// Returns: A slice of the array. 87 inout(char)[] opSlice(size_t i1, size_t i2) inout 88 { 89 return _chars[i1 .. i2]; 90 } 91 92 void opAssign(T : char)(T x) 93 { 94 makeEmpty(); 95 this ~= x; 96 } 97 98 void opAssign(T : const(char)[])(T x) 99 { 100 makeEmpty(); 101 this ~= x; 102 } 103 104 void opAssign(T : String)(T x) 105 { 106 makeEmpty(); 107 this ~= x; 108 } 109 110 // <Appending> 111 112 /// Append a character to the string. This invalidates pointers to characters 113 /// returned before. 114 void opOpAssign(string op)(char x) if (op == "~") 115 { 116 _chars.pushBack(x); 117 } 118 119 /// Append a characters to the string. 120 void opOpAssign(string op)(const(char)[] str) if (op == "~") 121 { 122 size_t len = str.length; 123 for (size_t n = 0; n < len; ++n) 124 _chars.pushBack(str[n]); 125 } 126 127 /// Append a characters to the string. 128 void opOpAssign(string op)(ref const(String) str) if (op == "~") 129 { 130 this ~= str.asSlice(); 131 } 132 133 /// Append a zero-terminated character to the string. 134 /// Name is explicit, because it should be rare and overload conflict. 135 void appendZeroTerminatedString(const(char)* str) 136 { 137 while(*str != '\0') 138 _chars.pushBack(*str++); 139 } 140 141 bool opEquals(const(char)[] s) 142 { 143 size_t lenS = s.length; 144 size_t lenT = this.length; 145 if (lenS != lenT) 146 return false; 147 for (size_t n = 0; n < lenS; ++n) 148 { 149 if (s[n] != _chars[n]) 150 return false; 151 } 152 return true; 153 } 154 155 bool opEquals(ref const(String) str) 156 { 157 return this.asSlice() == str.asSlice(); 158 } 159 160 // </Appending> 161 162 private: 163 164 // FUTURE 165 166 /*alias Flags = int; 167 enum : Flags 168 { 169 owned = 1, /// String data is currently owned (C's malloc/free), not borrowed. 170 zeroTerminated = 2, /// String data is currently zero-terminated. 171 } 172 173 Flags _flags = 0; 174 */ 175 176 Vec!char _chars; 177 178 void clearContents() 179 { 180 _chars.clearContents(); 181 } 182 } 183 184 // Null and .ptr 185 unittest 186 { 187 string z; 188 string a = ""; 189 string b = null; 190 191 assert(a == z); 192 assert(b == z); 193 assert(a == b); 194 assert(a !is b); 195 assert(a.length == 0); 196 assert(b.length == 0); 197 assert(a.ptr !is null); 198 199 // Must preserve semantics from D strings. 200 String Z = z; 201 String A = a; 202 String B = b; 203 assert(A == Z); 204 assert(B == Z); 205 assert(A == B); 206 } 207 208 // Basic appending. 209 unittest 210 { 211 String s = "Hello,"; 212 s ~= " world!"; 213 assert(s == "Hello, world!"); 214 s.makeEmpty(); 215 assert(s == null); 216 assert(s.length == 0); 217 } 218 219 /// strtod replacement, but without locale 220 /// s Must be a zero-terminated string. 221 /// Note that this code is duplicated in wren-port, to avoid a dependency on dplug:core there. 222 public double strtod_nolocale(const(char)* s, const(char)** p) 223 { 224 bool strtod_err = false; 225 const(char)* pend; 226 double r = stb__clex_parse_number_literal(s, &pend, &strtod_err, true); 227 if (p) 228 *p = pend; 229 if (strtod_err) 230 r = 0.0; 231 return r; 232 } 233 unittest 234 { 235 string[18] sPartial = 236 [ 237 "0x123lol", "+0x1.921fb54442d18p+0001()", "0,", "-0.0,,,,", 238 "0.65,stuff", "1.64587okokok", "-1.0e+9HELLO", "1.1454e-25f#STUFF", 239 "+iNfu", "-infEXCESS", "infuh", "-infinity", 240 "+infinity", "+nan", "-nan", "nan", 241 "INFINITY", "-NAN" 242 ]; 243 244 for (int n = 0; n < sPartial.length; ++n) 245 { 246 const(char)* p1, p2; 247 double r1 = strtod(sPartial[n].ptr, &p1); // in unittest, no program tampering the C locale 248 double r2 = strtod_nolocale(sPartial[n].ptr, &p2); 249 //import core.stdc.stdio; 250 //debug printf("parsing \"%s\" %lg %lg %p %p\n", sPartial[n].ptr, r1, r2, p1, p2); 251 assert(p1 == p2); 252 } 253 } 254 255 /// C-locale independent string to integer parsing. 256 /// Params: 257 /// s = Must be a zero-terminated string. 258 /// mustConsumeEntireInput = if true, check that s is entirely consumed by parsing the number. 259 /// err = optional bool 260 /// Note: unlike with `convertStringToDouble`, the string "4.7" will parse to just 4. Replaces %d in scanf-like functions. 261 /// Only parse correctly from -2147483648 to 2147483647. 262 /// Larger values are clamped to this -2147483648 to 2147483647 range. 263 public int convertStringToInteger(const(char)* s, 264 bool mustConsumeEntireInput, 265 bool* err) pure nothrow @nogc 266 { 267 if (s is null) 268 { 269 if (err) *err = true; 270 return 0; 271 } 272 273 const(char)* end; 274 bool strtod_err = false; 275 bool allowFloat = false; 276 double r = stb__clex_parse_number_literal(s, &end, &strtod_err, allowFloat); 277 278 if (strtod_err) 279 { 280 if (err) *err = true; 281 return 0; 282 } 283 284 if (mustConsumeEntireInput) 285 { 286 size_t len = strlen(s); 287 if (end != s + len) 288 { 289 if (err) *err = true; // did not consume whole string 290 return 0; 291 } 292 } 293 294 if (err) *err = false; // no error 295 296 double r2 = cast(int)r; 297 assert(r2 == r); // should have returned an integer that fits in a double, like the whole int.min to int.max range. 298 return cast(int)r; 299 } 300 unittest 301 { 302 bool err; 303 assert(4 == convertStringToInteger(" 4.7\n", false, &err)); 304 assert(!err); 305 306 assert(-2147483648 == convertStringToInteger("-2147483649", false, &err)); 307 assert( 1 == convertStringToInteger("1e30", false, &err)); 308 assert( 0 == convertStringToInteger("-0", false, &err)); 309 assert( 2147483647 == convertStringToInteger("10000000000", false, &err)); 310 } 311 312 313 /// C-locale independent string to float parsing. 314 /// Params: 315 /// s = Must be a zero-terminated string. 316 /// mustConsumeEntireInput = if true, check that s is entirely consumed by parsing the number. 317 /// err = optional bool 318 public double convertStringToDouble(const(char)* s, 319 bool mustConsumeEntireInput, 320 bool* err) pure nothrow @nogc 321 { 322 if (s is null) 323 { 324 if (err) *err = true; 325 return 0.0; 326 } 327 328 const(char)* end; 329 bool strtod_err = false; 330 double r = stb__clex_parse_number_literal(s, &end, &strtod_err, true); 331 332 if (strtod_err) 333 { 334 if (err) *err = true; 335 return 0.0; 336 } 337 338 if (mustConsumeEntireInput) 339 { 340 size_t len = strlen(s); 341 if (end != s + len) 342 { 343 if (err) *err = true; // did not consume whole string 344 return 0.0; 345 } 346 } 347 348 if (err) *err = false; // no error 349 return r; 350 } 351 352 unittest 353 { 354 //import core.stdc.stdio; 355 import std.math.operations; 356 357 string[9] s = ["14", "0x123", "+0x1.921fb54442d18p+0001", "0", "-0.0", " \n\t\n\f\r 0.65", "1.64587", "-1.0e+9", "1.1454e-25"]; 358 double[9] correct = [14, 0x123, +0x1.921fb54442d18p+0001, 0.0, -0.0, 0.65L, 1.64587, -1e9, 1.1454e-25f]; 359 360 string[9] sPartial = ["14top", "0x123lol", "+0x1.921fb54442d18p+0001()", "0,", "-0.0,,,,", " \n\t\n\f\r 0.65,stuff", "1.64587okokok", "-1.0e+9HELLO", "1.1454e-25f#STUFF"]; 361 for (int n = 0; n < s.length; ++n) 362 { 363 /* 364 // Check vs scanf 365 double sa; 366 if (sscanf(s[n].ptr, "%lf", &sa) == 1) 367 { 368 debug printf("scanf finds %lg\n", sa); 369 } 370 else 371 debug printf("scanf no parse\n"); 372 */ 373 374 bool err; 375 double a = convertStringToDouble(s[n].ptr, true, &err); 376 //import std.stdio; 377 //debug writeln(a, " correct is ", correct[n]); 378 assert(!err); 379 assert( isClose(a, correct[n], 0.0001) ); 380 381 bool err2; 382 double b = convertStringToDouble(s[n].ptr, false, &err2); 383 assert(!err2); 384 assert(b == a); // same parse 385 386 //debug printf("%lf\n", a); 387 388 convertStringToDouble(s[n].ptr, true, null); // should run without error pointer 389 } 390 } 391 392 private double stb__clex_parse_number_literal(const(char)* p, 393 const(char)**q, 394 bool* err, 395 bool allowFloat) pure nothrow @nogc 396 { 397 const(char)* s = p; 398 double value=0; 399 int base=10; 400 int exponent=0; 401 int signMantissa = 1; 402 403 // Skip leading whitespace, like scanf and strtod do 404 while (true) 405 { 406 char ch = *p; 407 if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\f' || ch == '\r') 408 { 409 p += 1; 410 } 411 else 412 break; 413 } 414 415 416 if (*p == '-') 417 { 418 signMantissa = -1; 419 p += 1; 420 } 421 else if (*p == '+') 422 { 423 p += 1; 424 } 425 426 // Issue #865, "-inf" was parsed as 0 427 // libc can produce "infinity" as well as "inf" 428 // %f specifier can produce "infinity", "inf", "nan" 429 // %F specifier can produce "INFINITY", "INF", "NAN" 430 // In practice, C libraries parse combination of uppercase and lowercase 431 if (allowFloat) 432 { 433 if ( (p[0] == 'i' || p[0] == 'I') 434 && (p[1] == 'n' || p[1] == 'N') 435 && (p[2] == 'f' || p[2] == 'F') ) 436 { 437 value = double.infinity; 438 p += 3; 439 440 if ( (p[0] == 'i' || p[0] == 'I') 441 && (p[1] == 'n' || p[1] == 'N') 442 && (p[2] == 'i' || p[2] == 'I') 443 && (p[3] == 't' || p[3] == 'T') 444 && (p[4] == 'y' || p[4] == 'Y') ) 445 p += 5; 446 447 goto found_value; 448 } 449 450 if ( (p[0] == 'n' || p[0] == 'N') 451 && (p[1] == 'a' || p[1] == 'A') 452 && (p[2] == 'n' || p[2] == 'N') ) 453 { 454 value = double.nan; 455 p += 3; 456 goto found_value; 457 } 458 } 459 460 if (*p == '0') 461 { 462 if (p[1] == 'x' || p[1] == 'X') 463 { 464 base=16; 465 p += 2; 466 } 467 } 468 469 for (;;) 470 { 471 if (*p >= '0' && *p <= '9') 472 value = value*base + (*p++ - '0'); 473 else if (base == 16 && *p >= 'a' && *p <= 'f') 474 value = value*base + 10 + (*p++ - 'a'); 475 else if (base == 16 && *p >= 'A' && *p <= 'F') 476 value = value*base + 10 + (*p++ - 'A'); 477 else 478 break; 479 } 480 481 if (allowFloat) 482 { 483 if (*p == '.') 484 { 485 double pow, addend = 0; 486 ++p; 487 for (pow=1; ; pow*=base) 488 { 489 if (*p >= '0' && *p <= '9') 490 addend = addend*base + (*p++ - '0'); 491 else if (base == 16 && *p >= 'a' && *p <= 'f') 492 addend = addend*base + 10 + (*p++ - 'a'); 493 else if (base == 16 && *p >= 'A' && *p <= 'F') 494 addend = addend*base + 10 + (*p++ - 'A'); 495 else 496 break; 497 } 498 value += addend / pow; 499 } 500 if (base == 16) { 501 // exponent required for hex float literal, else it's an integer literal like 0x123 502 exponent = (*p == 'p' || *p == 'P'); 503 } else 504 exponent = (*p == 'e' || *p == 'E'); 505 506 if (exponent) 507 { 508 int sign = p[1] == '-'; 509 uint exponent2 = 0; 510 double power=1; 511 ++p; 512 if (*p == '-' || *p == '+') 513 ++p; 514 while (*p >= '0' && *p <= '9') 515 exponent2 = exponent2*10 + (*p++ - '0'); 516 517 if (base == 16) 518 power = stb__clex_pow(2, exponent2); 519 else 520 power = stb__clex_pow(10, exponent2); 521 if (sign) 522 value /= power; 523 else 524 value *= power; 525 } 526 } 527 528 found_value: 529 530 if (q) *q = p; 531 if (err) *err = false; // seen no error 532 533 if (signMantissa < 0) 534 value = -value; 535 536 if (!allowFloat) 537 { 538 // clamp and round to nearest integer 539 if (value > int.max) value = int.max; 540 if (value < int.min) value = int.min; 541 } 542 return value; 543 } 544 545 private double stb__clex_pow(double base, uint exponent) pure nothrow @nogc 546 { 547 double value=1; 548 for ( ; exponent; exponent >>= 1) { 549 if (exponent & 1) 550 value *= base; 551 base *= base; 552 } 553 return value; 554 }