Commit 1761a222 authored by Michael Droettboom's avatar Michael Droettboom

Simplify handling of non-BMP characters

parent 46f64b5c
...@@ -79,20 +79,20 @@ EM_JS(int, __js2python, (int id), { ...@@ -79,20 +79,20 @@ EM_JS(int, __js2python, (int id), {
// to determine if is needs to be a 1-, 2- or 4-byte string, since // to determine if is needs to be a 1-, 2- or 4-byte string, since
// Python handles all 3. // Python handles all 3.
var max_code_point = 0; var max_code_point = 0;
var length = value.length;
for (var i = 0; i < value.length; i++) { for (var i = 0; i < value.length; i++) {
code_point = value.codePointAt(i); code_point = value.codePointAt(i);
max_code_point = Math.max(max_code_point, code_point); max_code_point = Math.max(max_code_point, code_point);
if (max_code_point > 0xffff) { if (code_point > 0xffff) {
// If we're dealing with UTF-16 surrogate pairs, convert the string // If we have a code point requiring UTF-16 surrogate pairs, the
// to an array of each of its characters, so we correctly count the // number of characters (codePoints) is less than value.length,
// number of characters. // so skip the next charCode and subtract 1 from the length.
value = Array.from(value[Symbol.iterator]()); i++;
// We can short circuit here -- we already know we need a 4-byte output. length--;
break;
} }
} }
var result = __js2python_allocate_string(value.length, max_code_point); var result = __js2python_allocate_string(length, max_code_point);
if (result == 0) { if (result == 0) {
return 0; return 0;
} }
...@@ -100,16 +100,20 @@ EM_JS(int, __js2python, (int id), { ...@@ -100,16 +100,20 @@ EM_JS(int, __js2python, (int id), {
var ptr = __js2python_get_ptr(result); var ptr = __js2python_get_ptr(result);
if (max_code_point > 0xffff) { if (max_code_point > 0xffff) {
ptr = ptr / 4; ptr = ptr / 4;
for (var i = 0; i < value.length; i++) { for (var i = 0, j = 0; j < length; i++, j++) {
Module.HEAPU32[ptr + i] = value[i].codePointAt(0); var code_point = value.codePointAt(i);
Module.HEAPU32[ptr + j] = code_point;
if (code_point > 0xffff) {
i++;
}
} }
} else if (max_code_point > 0xff) { } else if (max_code_point > 0xff) {
ptr = ptr / 2; ptr = ptr / 2;
for (var i = 0; i < value.length; i++) { for (var i = 0; i < length; i++) {
Module.HEAPU16[ptr + i] = value.codePointAt(i); Module.HEAPU16[ptr + i] = value.codePointAt(i);
} }
} else { } else {
for (var i = 0; i < value.length; i++) { for (var i = 0; i < length; i++) {
Module.HEAPU8[ptr + i] = value.codePointAt(i); Module.HEAPU8[ptr + i] = value.codePointAt(i);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment