You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
127 lines
3.5 KiB
127 lines
3.5 KiB
var ucs2 = require('./ucs2'); |
|
|
|
exports = { |
|
encode: function(str) { |
|
var codePoints = ucs2.decode(str); |
|
var byteArr = ''; |
|
for (var i = 0, len = codePoints.length; i < len; i++) { |
|
byteArr += encodeCodePoint(codePoints[i]); |
|
} |
|
return byteArr; |
|
}, |
|
decode: function(str, safe) { |
|
byteArr = ucs2.decode(str); |
|
byteIdx = 0; |
|
byteCount = byteArr.length; |
|
codePoint = 0; |
|
bytesSeen = 0; |
|
bytesNeeded = 0; |
|
lowerBoundary = 0x80; |
|
upperBoundary = 0xbf; |
|
var codePoints = []; |
|
var tmp; |
|
while ((tmp = decodeCodePoint(safe)) !== false) { |
|
codePoints.push(tmp); |
|
} |
|
return ucs2.encode(codePoints); |
|
} |
|
}; |
|
var fromCharCode = String.fromCharCode; |
|
function encodeCodePoint(codePoint) { |
|
if ((codePoint & 0xffffff80) === 0) { |
|
return fromCharCode(codePoint); |
|
} |
|
var ret = '', |
|
count, |
|
offset; |
|
|
|
if ((codePoint & 0xfffff800) === 0) { |
|
count = 1; |
|
offset = 0xc0; |
|
} else if ((codePoint & 0xffff0000) === 0) { |
|
count = 2; |
|
offset = 0xe0; |
|
} else if ((codePoint & 0xffe00000) == 0) { |
|
count = 3; |
|
offset = 0xf0; |
|
} |
|
ret += fromCharCode((codePoint >> (6 * count)) + offset); |
|
while (count > 0) { |
|
var tmp = codePoint >> (6 * (count - 1)); |
|
ret += fromCharCode(0x80 | (tmp & 0x3f)); |
|
count--; |
|
} |
|
return ret; |
|
} |
|
var byteArr, |
|
byteIdx, |
|
byteCount, |
|
codePoint, |
|
bytesSeen, |
|
bytesNeeded, |
|
lowerBoundary, |
|
upperBoundary; |
|
function decodeCodePoint(safe) { |
|
while (true) { |
|
if (byteIdx >= byteCount && bytesNeeded) { |
|
if (safe) return goBack(); |
|
throw new Error('Invalid byte index'); |
|
} |
|
if (byteIdx === byteCount) return false; |
|
var byte = byteArr[byteIdx]; |
|
byteIdx++; |
|
if (!bytesNeeded) { |
|
if ((byte & 0x80) === 0) { |
|
return byte; |
|
} |
|
|
|
if ((byte & 0xe0) === 0xc0) { |
|
bytesNeeded = 1; |
|
codePoint = byte & 0x1f; |
|
} else if ((byte & 0xf0) === 0xe0) { |
|
if (byte === 0xe0) lowerBoundary = 0xa0; |
|
if (byte === 0xed) upperBoundary = 0x9f; |
|
bytesNeeded = 2; |
|
codePoint = byte & 0xf; |
|
} else if ((byte & 0xf8) === 0xf0) { |
|
if (byte === 0xf0) lowerBoundary = 0x90; |
|
if (byte === 0xf4) upperBoundary = 0x8f; |
|
bytesNeeded = 3; |
|
codePoint = byte & 0x7; |
|
} else { |
|
if (safe) return goBack(); |
|
throw new Error('Invalid UTF-8 detected'); |
|
} |
|
continue; |
|
} |
|
if (byte < lowerBoundary || byte > upperBoundary) { |
|
if (safe) { |
|
byteIdx--; |
|
return goBack(); |
|
} |
|
throw new Error('Invalid continuation byte'); |
|
} |
|
lowerBoundary = 0x80; |
|
upperBoundary = 0xbf; |
|
codePoint = (codePoint << 6) | (byte & 0x3f); |
|
bytesSeen++; |
|
if (bytesSeen !== bytesNeeded) continue; |
|
var tmp = codePoint; |
|
codePoint = 0; |
|
bytesNeeded = 0; |
|
bytesSeen = 0; |
|
return tmp; |
|
} |
|
} |
|
function goBack() { |
|
var start = byteIdx - bytesSeen - 1; |
|
byteIdx = start + 1; |
|
codePoint = 0; |
|
bytesNeeded = 0; |
|
bytesSeen = 0; |
|
lowerBoundary = 0x80; |
|
upperBoundary = 0xbf; |
|
return byteArr[start]; |
|
} |
|
|
|
module.exports = exports;
|
|
|