diff --git a/lib/.index.js.swp b/lib/.index.js.swp new file mode 100644 index 0000000..757cbd6 Binary files /dev/null and b/lib/.index.js.swp differ diff --git a/lib/index.js b/lib/index.js index 836b1c6..f42eff8 100644 --- a/lib/index.js +++ b/lib/index.js @@ -72,12 +72,54 @@ exports.encodePacket = function (packet, supportsBinary, utf8encode, callback) { // data fragment is optional if (undefined !== packet.data) { - encoded += utf8encode ? utf8.encode(String(packet.data)) : String(packet.data); + if (utf8encode) { + encoded += utf8.encode(sanitizeLoneSurrogates(String(packet.data))); + } else { + encoded += String(packet.data); + } } return callback('' + encoded); }; +/** + * Sanitize a WTF-8 string, replacing lone surrogates with + * U+FFFD 'REPLACEMENT CHARACTER' + */ +function sanitizeLoneSurrogates(str) { + var i = 0; + var output = ''; + while (i < str.length) { + var code = str.charCodeAt(i); + if (code >= 0xd800 && code <= 0xdbff) { + // High surrogate -- expect next char to be a low surrogate + if (i + 1 < str.length) { + var nextCode = str.charCodeAt(i + 1); + if (nextCode >= 0xdc00 && nextCode <= 0xdfff) { + // Valid surrogate pair + output += str[i]; + output += str[i + 1]; + i++; + } else { + // High surrogate was not followed by a low surrogate + output += '\ufffd'; + } + } else { + // High surrogate was at the end of a string (and therefore + // not followed by a low surrogate) + output += '\ufffd'; + } + } else if (code >= 0xdc00 && code <= 0xdfff) { + // Low surrogate by itself -- invalid + output += '\ufffd'; + } else { + output += str[i]; + } + i++; + } + return output; +} + /** * Encode Buffer data */ diff --git a/test/node/index.js b/test/node/index.js index 84744d5..cd692f4 100644 --- a/test/node/index.js +++ b/test/node/index.js @@ -25,7 +25,7 @@ describe('parser', function() { var data = new Buffer(5); for (var i = 0; i < data.length; i++) { data[i] = i; } encode({ type: 'message', data: data }, function(encoded) { - expect(decode(encoded)).to.eql({ type: 'message', data: data }); + expect(decode(encoded)).to.eql({ type: 'message', data: data }); done(); }); }); @@ -130,6 +130,22 @@ describe('parser', function() { }); }); + it('should encode a string message with lone surrogates replaced', function(done) { + var data = String.fromCharCode(0xd800); + encode({ type: 'message', data: data }, null, true, function(encoded) { + expect(decode(encoded, null, true)).to.eql({ type: 'message', data: '\ufffd' }); + done(); + }); + }); + + it('should encode a string message with valid surrogate pairs', function(done) { + var data = String.fromCharCode(0xd800) + String.fromCharCode(0xdc00); + encode({ type: 'message', data: data }, null, true, function(encoded) { + expect(decode(encoded, null, true)).to.eql({ type: 'message', data: data }); + done(); + }); + }); + }); function areArraysEqual(x, y) {