445 lines
9.3 KiB
D
445 lines
9.3 KiB
D
module core.arsd.utf_decoding;
|
|
|
|
|
|
import core.internal.utf : decode, toUTF8;
|
|
|
|
/**********************************************/
|
|
/* 1 argument versions */
|
|
|
|
/**
|
|
Delegate type corresponding to transformed loop body
|
|
|
|
The parameter is a pointer to the current `char`, `wchar` or `dchar`
|
|
|
|
Returns: non-zero when a `break` statement is hit
|
|
*/
|
|
extern (D) alias dg_t = int delegate(void* c);
|
|
|
|
// Note: dg is extern(D), but _aApplycd() is extern(C)
|
|
|
|
/**
|
|
Loop over a string while changing the UTF encoding
|
|
|
|
There are 6 combinations of conversions between `char`, `wchar`, and `dchar`,
|
|
and 2 of each of those.
|
|
|
|
The naming convention is as follows:
|
|
|
|
_aApply{c,d,w}{c,d,w}{1,2}
|
|
|
|
The first letter corresponds to the input string encoding, and the second letter corresponds to the target character type.
|
|
|
|
- c = `char`
|
|
- w = `wchar`
|
|
- d = `dchar`
|
|
|
|
The `1` variant only produces the character, the `2` variant also produces a loop index.
|
|
|
|
Examples:
|
|
---
|
|
void main()
|
|
{
|
|
string str;
|
|
wtring wstr;
|
|
dstring dstr;
|
|
|
|
foreach (dchar c; str) {}
|
|
// _aApplycd1
|
|
|
|
foreach (wchar c; dstr) {}
|
|
// _aApplydw1
|
|
|
|
foreach (i, wchar c; str) {}
|
|
// _aApplycw2
|
|
|
|
foreach (wchar w; wstr) {}
|
|
// no conversion
|
|
}
|
|
---
|
|
|
|
Params:
|
|
aa = input string
|
|
dg = foreach body transformed into a delegate, similar to `opApply`
|
|
|
|
Returns:
|
|
non-zero when the loop was exited through a `break`
|
|
*/
|
|
extern (C) int _aApplycd1(char[] aa, dg_t dg)
|
|
{
|
|
int result;
|
|
size_t len = aa.length;
|
|
|
|
debug(apply) printf("_aApplycd1(), len = %d\n", len);
|
|
for (size_t i = 0; i < len; )
|
|
{
|
|
dchar d = aa[i];
|
|
if (d & 0x80)
|
|
d = decode(aa, i);
|
|
else
|
|
++i;
|
|
result = dg(cast(void *)&d);
|
|
if (result)
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
|
|
/// ditto
|
|
extern (C) int _aApplywd1(wchar[] aa, dg_t dg)
|
|
{
|
|
int result;
|
|
size_t len = aa.length;
|
|
|
|
debug(apply) printf("_aApplywd1(), len = %d\n", len);
|
|
for (size_t i = 0; i < len; )
|
|
{
|
|
dchar d = aa[i];
|
|
if (d >= 0xD800)
|
|
d = decode(aa, i);
|
|
else
|
|
++i;
|
|
result = dg(cast(void *)&d);
|
|
if (result)
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/// ditto
|
|
extern (C) int _aApplycw1(char[] aa, dg_t dg)
|
|
{
|
|
int result;
|
|
size_t len = aa.length;
|
|
|
|
debug(apply) printf("_aApplycw1(), len = %d\n", len);
|
|
for (size_t i = 0; i < len; )
|
|
{
|
|
wchar w = aa[i];
|
|
if (w & 0x80)
|
|
{
|
|
dchar d = decode(aa, i);
|
|
if (d <= 0xFFFF)
|
|
w = cast(wchar) d;
|
|
else
|
|
{
|
|
w = cast(wchar)((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
|
|
result = dg(cast(void *)&w);
|
|
if (result)
|
|
break;
|
|
w = cast(wchar)(((d - 0x10000) & 0x3FF) + 0xDC00);
|
|
}
|
|
}
|
|
else
|
|
++i;
|
|
result = dg(cast(void *)&w);
|
|
if (result)
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/// ditto
|
|
extern (C) int _aApplywc1(wchar[] aa, dg_t dg)
|
|
{
|
|
int result;
|
|
size_t len = aa.length;
|
|
|
|
debug(apply) printf("_aApplywc1(), len = %d\n", len);
|
|
for (size_t i = 0; i < len; )
|
|
{
|
|
wchar w = aa[i];
|
|
if (w & ~0x7F)
|
|
{
|
|
char[4] buf = void;
|
|
|
|
dchar d = decode(aa, i);
|
|
auto b = toUTF8(buf, d);
|
|
foreach (char c2; b)
|
|
{
|
|
result = dg(cast(void *)&c2);
|
|
if (result)
|
|
return result;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
char c = cast(char)w;
|
|
++i;
|
|
result = dg(cast(void *)&c);
|
|
if (result)
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/// ditto
|
|
extern (C) int _aApplydc1(dchar[] aa, dg_t dg)
|
|
{
|
|
int result;
|
|
|
|
debug(apply) printf("_aApplydc1(), len = %d\n", aa.length);
|
|
foreach (dchar d; aa)
|
|
{
|
|
if (d & ~0x7F)
|
|
{
|
|
char[4] buf = void;
|
|
|
|
auto b = toUTF8(buf, d);
|
|
foreach (char c2; b)
|
|
{
|
|
result = dg(cast(void *)&c2);
|
|
if (result)
|
|
return result;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
char c = cast(char)d;
|
|
result = dg(cast(void *)&c);
|
|
if (result)
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/// ditto
|
|
extern (C) int _aApplydw1(dchar[] aa, dg_t dg)
|
|
{
|
|
int result;
|
|
|
|
debug(apply) printf("_aApplydw1(), len = %d\n", aa.length);
|
|
foreach (dchar d; aa)
|
|
{
|
|
wchar w;
|
|
|
|
if (d <= 0xFFFF)
|
|
w = cast(wchar) d;
|
|
else
|
|
{
|
|
w = cast(wchar)((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
|
|
result = dg(cast(void *)&w);
|
|
if (result)
|
|
break;
|
|
w = cast(wchar)(((d - 0x10000) & 0x3FF) + 0xDC00);
|
|
}
|
|
result = dg(cast(void *)&w);
|
|
if (result)
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/****************************************************************************/
|
|
/* 2 argument versions */
|
|
|
|
/**
|
|
Delegate type corresponding to transformed loop body
|
|
|
|
Parameters are pointers to a `size_t` loop index, and the current `char`, `wchar` or `dchar`.
|
|
|
|
Returns: non-zero when a `break` statement is hit
|
|
*/
|
|
extern (D) alias dg2_t = int delegate(void* i, void* c);
|
|
|
|
// Note: dg is extern(D), but _aApplycd2() is extern(C)
|
|
|
|
/**
|
|
Variants of _aApplyXXX that include a loop index.
|
|
*/
|
|
extern (C) int _aApplycd2(char[] aa, dg2_t dg)
|
|
{
|
|
int result;
|
|
size_t len = aa.length;
|
|
|
|
debug(apply) printf("_aApplycd2(), len = %d\n", len);
|
|
size_t n;
|
|
for (size_t i = 0; i < len; i += n)
|
|
{
|
|
dchar d = aa[i];
|
|
if (d & 0x80)
|
|
{
|
|
n = i;
|
|
d = decode(aa, n);
|
|
n -= i;
|
|
}
|
|
else
|
|
n = 1;
|
|
result = dg(&i, cast(void *)&d);
|
|
if (result)
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/// ditto
|
|
extern (C) int _aApplywd2(wchar[] aa, dg2_t dg)
|
|
{
|
|
int result;
|
|
size_t len = aa.length;
|
|
|
|
debug(apply) printf("_aApplywd2(), len = %d\n", len);
|
|
size_t n;
|
|
for (size_t i = 0; i < len; i += n)
|
|
{
|
|
dchar d = aa[i];
|
|
if (d & ~0x7F)
|
|
{
|
|
n = i;
|
|
d = decode(aa, n);
|
|
n -= i;
|
|
}
|
|
else
|
|
n = 1;
|
|
result = dg(&i, cast(void *)&d);
|
|
if (result)
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/// ditto
|
|
extern (C) int _aApplycw2(char[] aa, dg2_t dg)
|
|
{
|
|
int result;
|
|
size_t len = aa.length;
|
|
|
|
debug(apply) printf("_aApplycw2(), len = %d\n", len);
|
|
size_t n;
|
|
for (size_t i = 0; i < len; i += n)
|
|
{
|
|
wchar w = aa[i];
|
|
if (w & 0x80)
|
|
{
|
|
n = i;
|
|
dchar d = decode(aa, n);
|
|
n -= i;
|
|
if (d <= 0xFFFF)
|
|
w = cast(wchar) d;
|
|
else
|
|
{
|
|
w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
|
|
result = dg(&i, cast(void *)&w);
|
|
if (result)
|
|
break;
|
|
w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
|
|
}
|
|
}
|
|
else
|
|
n = 1;
|
|
result = dg(&i, cast(void *)&w);
|
|
if (result)
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/// ditto
|
|
extern (C) int _aApplywc2(wchar[] aa, dg2_t dg)
|
|
{
|
|
int result;
|
|
size_t len = aa.length;
|
|
|
|
debug(apply) printf("_aApplywc2(), len = %d\n", len);
|
|
size_t n;
|
|
for (size_t i = 0; i < len; i += n)
|
|
{
|
|
wchar w = aa[i];
|
|
if (w & ~0x7F)
|
|
{
|
|
char[4] buf = void;
|
|
|
|
n = i;
|
|
dchar d = decode(aa, n);
|
|
n -= i;
|
|
auto b = toUTF8(buf, d);
|
|
foreach (char c2; b)
|
|
{
|
|
result = dg(&i, cast(void *)&c2);
|
|
if (result)
|
|
return result;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
char c = cast(char)w;
|
|
n = 1;
|
|
result = dg(&i, cast(void *)&c);
|
|
if (result)
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/// ditto
|
|
extern (C) int _aApplydc2(dchar[] aa, dg2_t dg)
|
|
{
|
|
int result;
|
|
size_t len = aa.length;
|
|
|
|
debug(apply) printf("_aApplydc2(), len = %d\n", len);
|
|
for (size_t i = 0; i < len; i++)
|
|
{
|
|
dchar d = aa[i];
|
|
if (d & ~0x7F)
|
|
{
|
|
char[4] buf = void;
|
|
|
|
auto b = toUTF8(buf, d);
|
|
foreach (char c2; b)
|
|
{
|
|
result = dg(&i, cast(void *)&c2);
|
|
if (result)
|
|
return result;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
char c = cast(char)d;
|
|
result = dg(&i, cast(void *)&c);
|
|
if (result)
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/// ditto
|
|
extern (C) int _aApplydw2(dchar[] aa, dg2_t dg)
|
|
{ int result;
|
|
|
|
debug(apply) printf("_aApplydw2(), len = %d\n", aa.length);
|
|
foreach (size_t i, dchar d; aa)
|
|
{
|
|
wchar w;
|
|
auto j = i;
|
|
|
|
if (d <= 0xFFFF)
|
|
w = cast(wchar) d;
|
|
else
|
|
{
|
|
w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
|
|
result = dg(&j, cast(void *)&w);
|
|
if (result)
|
|
break;
|
|
w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
|
|
}
|
|
result = dg(&j, cast(void *)&w);
|
|
if (result)
|
|
break;
|
|
}
|
|
return result;
|
|
}
|