1 /** 2 String build code, plus no-locale float parsing functions. 3 4 Copyright: Guillaume Piolat, 2022. 5 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module dplug.core..string; 9 10 import core.stdc.stdlib; 11 import core.stdc.string; 12 import core.stdc.stdarg; 13 import dplug.core.vec; 14 15 16 nothrow @nogc: 17 18 /// Create a `String` from a D `string`. 19 String makeString(const(char)[] s) 20 { 21 return String(s); 22 } 23 24 /// For now, just a string builder that owns its memory. 25 /// Dplug `String`, designed to ease the usage of all the C string function, 26 /// allow appending, etc. 27 /// `String` always owns its memory, and can return as a D slice. 28 /// FUTURE: use realloc to be able to size down. 29 /// Capacity to be a slice into existing memory and not own. 30 /// Capacity to disown memory (implies: stop using Vec) 31 /// QUESTION: should String just be a managed slice!T instead? Like Go slices. 32 struct String 33 { 34 public: 35 nothrow @nogc: 36 37 this(char ch) 38 { 39 this ~= ch; 40 } 41 42 this(const(char)[] s) 43 { 44 this ~= s; 45 } 46 47 ~this() 48 { 49 } 50 51 @disable this(this); 52 53 /// Sets as empty/null string. 54 void makeEmpty() 55 { 56 _chars.clearContents(); 57 } 58 59 /// Pointer to first character in the string, or `null`. 60 inout(char)* ptr() inout return 61 { 62 return _chars.ptr; 63 } 64 65 /// Length in bytes of the string. 66 size_t length() const 67 { 68 return _chars.length; 69 } 70 71 /// Converts to a D string, sliced into the `String` memory. 72 inout(char)[] asSlice() inout return 73 { 74 size_t len = length(); 75 if (len == 0) 76 return null; 77 return _chars[0..len]; 78 } 79 80 /// Returns: Whole content of the sring in one slice. 81 inout(char)[] opSlice() inout return 82 { 83 return asSlice(); 84 } 85 86 /// Returns: A slice of the array. 87 inout(char)[] opSlice(size_t i1, size_t i2) inout 88 { 89 return _chars[i1 .. i2]; 90 } 91 92 void opAssign(T : char)(T x) 93 { 94 makeEmpty(); 95 this ~= x; 96 } 97 98 void opAssign(T : const(char)[])(T x) 99 { 100 makeEmpty(); 101 this ~= x; 102 } 103 104 void opAssign(T : String)(T x) 105 { 106 makeEmpty(); 107 this ~= x; 108 } 109 110 // <Appending> 111 112 /// Append a character to the string. This invalidates pointers to characters 113 /// returned before. 114 void opOpAssign(string op)(char x) if (op == "~") 115 { 116 _chars.pushBack(x); 117 } 118 119 /// Append a characters to the string. 120 void opOpAssign(string op)(const(char)[] str) if (op == "~") 121 { 122 size_t len = str.length; 123 for (size_t n = 0; n < len; ++n) 124 _chars.pushBack(str[n]); 125 } 126 127 /// Append a characters to the string. 128 void opOpAssign(string op)(ref const(String) str) if (op == "~") 129 { 130 this ~= str.asSlice(); 131 } 132 133 /// Append a zero-terminated character to the string. 134 /// Name is explicit, because it should be rare and overload conflict. 135 void appendZeroTerminatedString(const(char)* str) 136 { 137 while(*str != '\0') 138 _chars.pushBack(*str++); 139 } 140 141 bool opEquals(const(char)[] s) 142 { 143 size_t lenS = s.length; 144 size_t lenT = this.length; 145 if (lenS != lenT) 146 return false; 147 for (size_t n = 0; n < lenS; ++n) 148 { 149 if (s[n] != _chars[n]) 150 return false; 151 } 152 return true; 153 } 154 155 bool opEquals(ref const(String) str) 156 { 157 return this.asSlice() == str.asSlice(); 158 } 159 160 // </Appending> 161 162 private: 163 164 // FUTURE 165 166 /*alias Flags = int; 167 enum : Flags 168 { 169 owned = 1, /// String data is currently owned (C's malloc/free), not borrowed. 170 zeroTerminated = 2, /// String data is currently zero-terminated. 171 } 172 173 Flags _flags = 0; 174 */ 175 176 Vec!char _chars; 177 178 void clearContents() 179 { 180 _chars.clearContents(); 181 } 182 } 183 184 // Null and .ptr 185 unittest 186 { 187 string z; 188 string a = ""; 189 string b = null; 190 191 assert(a == z); 192 assert(b == z); 193 assert(a == b); 194 assert(a !is b); 195 assert(a.length == 0); 196 assert(b.length == 0); 197 assert(a.ptr !is null); 198 199 // Must preserve semantics from D strings. 200 String Z = z; 201 String A = a; 202 String B = b; 203 assert(A == Z); 204 assert(B == Z); 205 assert(A == B); 206 } 207 208 // Basic appending. 209 unittest 210 { 211 String s = "Hello,"; 212 s ~= " world!"; 213 assert(s == "Hello, world!"); 214 s.makeEmpty(); 215 assert(s == null); 216 assert(s.length == 0); 217 } 218 219 /// strtod replacement, but without locale 220 /// s Must be a zero-terminated string. 221 /// Note that this code is duplicated in wren-port, to avoid a dependency on dplug:core there. 222 public double strtod_nolocale(const(char)* s, const(char)** p) 223 { 224 bool strtod_err = false; 225 const(char)* pend; 226 double r = stb__clex_parse_number_literal(s, &pend, &strtod_err, true); 227 if (p) 228 *p = pend; 229 if (strtod_err) 230 r = 0.0; 231 return r; 232 } 233 unittest 234 { 235 string[8] sPartial = ["0x123lol", "+0x1.921fb54442d18p+0001()", "0,", "-0.0,,,,", "0.65,stuff", "1.64587okokok", "-1.0e+9HELLO", "1.1454e-25f#STUFF"]; 236 for (int n = 0; n < 8; ++n) 237 { 238 const(char)* p1, p2; 239 double r1 = strtod(sPartial[n].ptr, &p1); // in unittest, no program tampering the C locale 240 double r2 = strtod_nolocale(sPartial[n].ptr, &p2); 241 //import core.stdc.stdio; 242 //debug printf("parsing \"%s\" %lg %lg %p %p\n", sPartial[n].ptr, r1, r2, p1, p2); 243 assert(p1 == p2); 244 } 245 } 246 247 /// C-locale independent string to integer parsing. 248 /// Params: 249 /// s Must be a zero-terminated string. 250 /// mustConsumeEntireInput if true, check that s is entirely consumed by parsing the number. 251 /// err: optional bool 252 /// Note: unlike with `convertStringToDouble`, the string "4.7" will parse to just 4. Replaces %d in scanf-like functions. 253 /// Only parse correctly from -2147483648 to 2147483647. 254 /// Larger values are clamped to this -2147483648 to 2147483647 range. 255 public int convertStringToInteger(const(char)* s, 256 bool mustConsumeEntireInput, 257 bool* err) pure nothrow @nogc 258 { 259 if (s is null) 260 { 261 if (err) *err = true; 262 return 0; 263 } 264 265 const(char)* end; 266 bool strtod_err = false; 267 bool allowFloat = false; 268 double r = stb__clex_parse_number_literal(s, &end, &strtod_err, allowFloat); 269 270 if (strtod_err) 271 { 272 if (err) *err = true; 273 return 0; 274 } 275 276 if (mustConsumeEntireInput) 277 { 278 size_t len = strlen(s); 279 if (end != s + len) 280 { 281 if (err) *err = true; // did not consume whole string 282 return 0; 283 } 284 } 285 286 if (err) *err = false; // no error 287 288 double r2 = cast(int)r; 289 assert(r2 == r); // should have returned an integer that fits in a double, like the whole int.min to int.max range. 290 return cast(int)r; 291 } 292 unittest 293 { 294 bool err; 295 assert(4 == convertStringToInteger(" 4.7\n", false, &err)); 296 assert(!err); 297 298 assert(-2147483648 == convertStringToInteger("-2147483649", false, &err)); 299 assert( 1 == convertStringToInteger("1e30", false, &err)); 300 assert( 0 == convertStringToInteger("-0", false, &err)); 301 assert( 2147483647 == convertStringToInteger("10000000000", false, &err)); 302 } 303 304 305 /// C-locale independent string to float parsing. 306 /// Params: 307 /// s Must be a zero-terminated string. 308 /// mustConsumeEntireInput if true, check that s is entirely consumed by parsing the number. 309 /// err: optional bool 310 public double convertStringToDouble(const(char)* s, 311 bool mustConsumeEntireInput, 312 bool* err) pure nothrow @nogc 313 { 314 if (s is null) 315 { 316 if (err) *err = true; 317 return 0.0; 318 } 319 320 const(char)* end; 321 bool strtod_err = false; 322 double r = stb__clex_parse_number_literal(s, &end, &strtod_err, true); 323 324 if (strtod_err) 325 { 326 if (err) *err = true; 327 return 0.0; 328 } 329 330 if (mustConsumeEntireInput) 331 { 332 size_t len = strlen(s); 333 if (end != s + len) 334 { 335 if (err) *err = true; // did not consume whole string 336 return 0.0; 337 } 338 } 339 340 if (err) *err = false; // no error 341 return r; 342 } 343 344 unittest 345 { 346 //import core.stdc.stdio; 347 import std.math.operations; 348 349 string[9] s = ["14", "0x123", "+0x1.921fb54442d18p+0001", "0", "-0.0", " \n\t\n\f\r 0.65", "1.64587", "-1.0e+9", "1.1454e-25"]; 350 double[9] correct = [14, 0x123, +0x1.921fb54442d18p+0001, 0.0, -0.0, 0.65L, 1.64587, -1e9, 1.1454e-25f]; 351 352 string[9] sPartial = ["14top", "0x123lol", "+0x1.921fb54442d18p+0001()", "0,", "-0.0,,,,", " \n\t\n\f\r 0.65,stuff", "1.64587okokok", "-1.0e+9HELLO", "1.1454e-25f#STUFF"]; 353 for (int n = 0; n < s.length; ++n) 354 { 355 /* 356 // Check vs scanf 357 double sa; 358 if (sscanf(s[n].ptr, "%lf", &sa) == 1) 359 { 360 debug printf("scanf finds %lg\n", sa); 361 } 362 else 363 debug printf("scanf no parse\n"); 364 */ 365 366 bool err; 367 double a = convertStringToDouble(s[n].ptr, true, &err); 368 //import std.stdio; 369 //debug writeln(a, " correct is ", correct[n]); 370 assert(!err); 371 assert( isClose(a, correct[n], 0.0001) ); 372 373 bool err2; 374 double b = convertStringToDouble(s[n].ptr, false, &err2); 375 assert(!err2); 376 assert(b == a); // same parse 377 378 //debug printf("%lf\n", a); 379 380 convertStringToDouble(s[n].ptr, true, null); // should run without error pointer 381 } 382 } 383 384 private double stb__clex_parse_number_literal(const(char)* p, 385 const(char)**q, 386 bool* err, 387 bool allowFloat) pure nothrow @nogc 388 { 389 const(char)* s = p; 390 double value=0; 391 int base=10; 392 int exponent=0; 393 int signMantissa = 1; 394 395 // Skip leading whitespace, like scanf and strtod do 396 while (true) 397 { 398 char ch = *p; 399 if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\f' || ch == '\r') 400 { 401 p += 1; 402 } 403 else 404 break; 405 } 406 407 408 if (*p == '-') 409 { 410 signMantissa = -1; 411 p += 1; 412 } 413 else if (*p == '+') 414 { 415 p += 1; 416 } 417 418 if (*p == '0') 419 { 420 if (p[1] == 'x' || p[1] == 'X') 421 { 422 base=16; 423 p += 2; 424 } 425 } 426 427 for (;;) 428 { 429 if (*p >= '0' && *p <= '9') 430 value = value*base + (*p++ - '0'); 431 else if (base == 16 && *p >= 'a' && *p <= 'f') 432 value = value*base + 10 + (*p++ - 'a'); 433 else if (base == 16 && *p >= 'A' && *p <= 'F') 434 value = value*base + 10 + (*p++ - 'A'); 435 else 436 break; 437 } 438 439 if (allowFloat) 440 { 441 if (*p == '.') 442 { 443 double pow, addend = 0; 444 ++p; 445 for (pow=1; ; pow*=base) 446 { 447 if (*p >= '0' && *p <= '9') 448 addend = addend*base + (*p++ - '0'); 449 else if (base == 16 && *p >= 'a' && *p <= 'f') 450 addend = addend*base + 10 + (*p++ - 'a'); 451 else if (base == 16 && *p >= 'A' && *p <= 'F') 452 addend = addend*base + 10 + (*p++ - 'A'); 453 else 454 break; 455 } 456 value += addend / pow; 457 } 458 if (base == 16) { 459 // exponent required for hex float literal, else it's an integer literal like 0x123 460 exponent = (*p == 'p' || *p == 'P'); 461 } else 462 exponent = (*p == 'e' || *p == 'E'); 463 464 if (exponent) 465 { 466 int sign = p[1] == '-'; 467 uint exponent2 = 0; 468 double power=1; 469 ++p; 470 if (*p == '-' || *p == '+') 471 ++p; 472 while (*p >= '0' && *p <= '9') 473 exponent2 = exponent2*10 + (*p++ - '0'); 474 475 if (base == 16) 476 power = stb__clex_pow(2, exponent2); 477 else 478 power = stb__clex_pow(10, exponent2); 479 if (sign) 480 value /= power; 481 else 482 value *= power; 483 } 484 } 485 486 if (q) *q = p; 487 if (err) *err = false; // seen no error 488 489 if (signMantissa < 0) 490 value = -value; 491 492 if (!allowFloat) 493 { 494 // clamp and round to nearest integer 495 if (value > int.max) value = int.max; 496 if (value < int.min) value = int.min; 497 } 498 return value; 499 } 500 501 private double stb__clex_pow(double base, uint exponent) pure nothrow @nogc 502 { 503 double value=1; 504 for ( ; exponent; exponent >>= 1) { 505 if (exponent & 1) 506 value *= base; 507 base *= base; 508 } 509 return value; 510 }