@@ -1299,7 +1299,28 @@ - (NSString *)diff_text2:(NSMutableArray *)diffs;
12991299- (NSString  *)diff_toDelta : (NSMutableArray  *)diffs ;
13001300{
13011301  NSMutableString  *delta = [NSMutableString  string ];
1302+   UniChar lastEnd = 0 ;
13021303  for  (Diff *aDiff in diffs) {
1304+     if  (0  == [aDiff.text length ]) {
1305+         continue ;
1306+     }
1307+ 
1308+     UniChar thisTop = [aDiff.text characterAtIndex: 0 ];
1309+     UniChar thisEnd = [aDiff.text characterAtIndex: ([aDiff.text length ]-1 )];
1310+ 
1311+     if  (CFStringIsSurrogateHighCharacter (thisEnd)) {
1312+       lastEnd = thisEnd;
1313+       aDiff.text  = [aDiff.text substringToIndex: ([aDiff.text length ] - 1 )];
1314+     }
1315+ 
1316+     if  (0  != lastEnd && CFStringIsSurrogateHighCharacter (lastEnd) && CFStringIsSurrogateLowCharacter (thisTop)) {
1317+       aDiff.text  = [NSString  stringWithFormat: @" %C %@ "  , lastEnd, aDiff.text];
1318+     }
1319+ 
1320+     if  (0  == [aDiff.text length ]) {
1321+       continue ;
1322+     }
1323+ 
13031324    switch  (aDiff.operation ) {
13041325      case  DIFF_INSERT:
13051326        [delta appendFormat: @" +%@ \t "  , [[aDiff.text diff_stringByAddingPercentEscapesForEncodeUriCompatibility ]
@@ -1321,6 +1342,176 @@ - (NSString *)diff_toDelta:(NSMutableArray *)diffs;
13211342  return  delta;
13221343}
13231344
1345+ - (NSUInteger )diff_digit16 : (unichar )c 
1346+ {
1347+     switch  (c) {
1348+         case  ' 0'  : return  0 ;
1349+         case  ' 1'  : return  1 ;
1350+         case  ' 2'  : return  2 ;
1351+         case  ' 3'  : return  3 ;
1352+         case  ' 4'  : return  4 ;
1353+         case  ' 5'  : return  5 ;
1354+         case  ' 6'  : return  6 ;
1355+         case  ' 7'  : return  7 ;
1356+         case  ' 8'  : return  8 ;
1357+         case  ' 9'  : return  9 ;
1358+         case  ' A'  : case  ' a'  : return  10 ;
1359+         case  ' B'  : case  ' b'  : return  11 ;
1360+         case  ' C'  : case  ' c'  : return  12 ;
1361+         case  ' D'  : case  ' d'  : return  13 ;
1362+         case  ' E'  : case  ' e'  : return  14 ;
1363+         case  ' F'  : case  ' f'  : return  15 ;
1364+         default :
1365+             [NSException  raise: @" Invalid percent-encoded string"   format: @" %c  is not a hex digit"  , c];
1366+     }
1367+ }
1368+ 
1369+ /* *
1370+ * Decode a percent-encoded UTF-8 string into a string of UTF-16 code units 
1371+ * This is more permissive than `stringByRemovingPercentEncoding` because 
1372+ * that fails if the input represents invalid Unicode characters. However, different 
1373+ * diff-match-patch libraries may encode surrogate halves as if they were valid 
1374+ * Unicode code points. Therefore, instead of failing or corrupting the output, which 
1375+ * `stringByRemovingPercentEncoding` does when it inserts "(null)" in these places 
1376+ * we can decode it anyway and then once the string is reconstructed from the diffs 
1377+ * we'll end up with valid Unicode again, after the surrogate halves are re-joined 
1378+ */ 
1379+ - (NSString  *)diff_decodeURIWithText : (NSString  *)percentEncoded 
1380+ {
1381+     unichar  decoded[[percentEncoded length ]];
1382+     NSInteger  input = 0 ;
1383+     NSInteger  output = 0 ;
1384+     
1385+     @try  {
1386+         while  (input < [percentEncoded length ]) {
1387+             unichar  c = [percentEncoded characterAtIndex: input];
1388+ 
1389+             //  not special, so just return it
1390+             if  (' %'   != c) {
1391+                 decoded[output++] = c;
1392+                 input += 1 ;
1393+                 continue ;
1394+             }
1395+ 
1396+             NSUInteger  byte1 = ([self  diff_digit16: [percentEncoded characterAtIndex: (input+1 )]] << 4 ) +
1397+                                 [self  diff_digit16: [percentEncoded characterAtIndex: (input+2 )]];
1398+ 
1399+             //  single-byte UTF-8 first byte has bitmask 0xxx xxxx
1400+             if  ((byte1 & 0x80 ) == 0 ) {
1401+                 decoded[output++] = byte1;
1402+                 input += 3 ;
1403+                 continue ;
1404+             }
1405+ 
1406+             //  at least one continuation byte
1407+             if  (' %'   != [percentEncoded characterAtIndex: (input + 3 )]) {
1408+                 return  nil ;
1409+             }
1410+ 
1411+             NSUInteger  byte2 = ([self  diff_digit16: [percentEncoded characterAtIndex: (input+4 )]] << 4 ) +
1412+                                 [self  diff_digit16: [percentEncoded characterAtIndex: (input+5 )]];
1413+ 
1414+             //  continuation bytes have bitmask 10xx xxxx
1415+             if  ((byte2 & 0xC0 ) != 0x80 ) {
1416+                 return  nil ;
1417+             }
1418+ 
1419+             //  continuation bytes thus only contribute six bits each
1420+             //  these data bits are found with the bit mask xx11 1111
1421+             byte2 = byte2 & 0x3F ;
1422+ 
1423+             //  in two-byte sequences the first byte has bitmask 110x xxxx
1424+             if  ((byte1 & 0xE0 ) == 0xC0 ) {
1425+                 //  byte1 ___x xxxx << 6
1426+                 //  byte2        __yy yyyy
1427+                 //  value    x xxxxyy yyyy -> 11 bits
1428+                 decoded[output++] = ((byte1 & 0x1F ) << 6 ) | byte2;
1429+                 input += 6 ;
1430+                 continue ;
1431+             }
1432+ 
1433+             //  at least two continuation bytes
1434+             if  (' %'   != [percentEncoded characterAtIndex: (input + 6 )]) {
1435+                 return  nil ;
1436+             }
1437+ 
1438+             NSUInteger  byte3 = ([self  diff_digit16: [percentEncoded characterAtIndex: (input+7 )]] << 4 ) +
1439+                                 [self  diff_digit16: [percentEncoded characterAtIndex: (input+8 )]];
1440+ 
1441+             if  ((byte3 & 0xC0 ) != 0x80 ) {
1442+                 return  nil ;
1443+             }
1444+ 
1445+             byte3 = byte3 & 0x3F ;
1446+ 
1447+             //  in three-byte sequences the first byte has bitmask 1110 xxxx
1448+             if  ((byte1 & 0xF0 ) == 0xE0 ) {
1449+                 //  byte1 ____ xxxx << 12
1450+                 //  byte2        __yy yyyy << 6
1451+                 //  byte3               __zz zzzz
1452+                 //  value      xxxxyy yyyyzz zzzz -> 16 bits
1453+                 decoded[output++] = ((byte1 & 0x0F ) << 12 ) | (byte2 << 6 ) | byte3;
1454+                 input += 9 ;
1455+                 continue ;
1456+             }
1457+ 
1458+             //  three continuation bytes
1459+             if  (' %'   != [percentEncoded characterAtIndex: (input + 9 )]) {
1460+                 return  nil ;
1461+             }
1462+ 
1463+             NSUInteger  byte4 = ([self  diff_digit16: [percentEncoded characterAtIndex: (input+10 )]] << 4 ) +
1464+                                 [self  diff_digit16: [percentEncoded characterAtIndex: (input+11 )]];
1465+ 
1466+             if  ((byte4 & 0xC0 ) != 0x80 ) {
1467+                 return  nil ;
1468+             }
1469+ 
1470+             byte4 = byte4 & 0x3F ;
1471+ 
1472+             //  in four-byte sequences the first byte has bitmask 1111 0xxx
1473+             if  ((byte1 & 0xF8 ) == 0xF0 ) {
1474+                 //  byte1 ____ _xxx << 18
1475+                 //  byte2        __yy yyyy << 12
1476+                 //  byte3               __zz zzzz << 6
1477+                 //  byte4                      __tt tttt
1478+                 //  value       xxxyy yyyyzz zzzztt tttt -> 21 bits
1479+                 NSUInteger  codePoint = ((byte1 & 0x07 ) << 0x12 ) | (byte2 << 0x0C ) | (byte3 << 0x06 ) | byte4;
1480+                 if  (codePoint >= 0x010000  && codePoint <= 0x10FFFF ) {
1481+                     codePoint -= 0x010000 ;
1482+                     decoded[output++] = ((codePoint >> 10 ) & 0x3FF ) | 0xD800 ;
1483+                     decoded[output++] = 0xDC00  | (codePoint & 0x3FF );
1484+                     input += 12 ;
1485+                     continue ;
1486+                 }
1487+             }
1488+ 
1489+             return  nil ;
1490+         }
1491+     }
1492+     @catch  (NSException  *e) {
1493+         return  nil ;
1494+     }
1495+ 
1496+     //  some objective-c versions of the library produced patches with
1497+     //  (null) in the place where surrogates were split across diff
1498+     //  boundaries. if we leave those in we'll be stuck with a
1499+     //  high-surrogate (null) low-surrogate pattern that will break
1500+     //  deeper in the library or consuming application. we'll "fix"
1501+     //  these by dropping the (null) and re-joining the surrogate halves
1502+     NSString  *result = [NSString  stringWithCharacters: decoded length: output];
1503+     NSRegularExpression  *replacer = [NSRegularExpression 
1504+                                      regularExpressionWithPattern: @" ([\\ x{D800}-\\ x{DBFF}])\\ (null\\ )([\\ x{DC00}-\\ x{DFFF}])" 
1505+                                      options: 0 
1506+                                      error: nil ];
1507+ 
1508+     return  [replacer
1509+             stringByReplacingMatchesInString: result
1510+             options: 0 
1511+             range: NSMakeRange (0 , [result length ])
1512+             withTemplate: @" $1$2"  ];
1513+ }
1514+ 
13241515/* *
13251516 * Given the original text1, and an encoded NSString which describes the 
13261517 * operations required to transform text1 into text2, compute the full diff. 
@@ -1348,7 +1539,7 @@ - (NSMutableArray *)diff_fromDeltaWithText:(NSString *)text1
13481539    NSString  *param = [token substringFromIndex: 1 ];
13491540    switch  ([token characterAtIndex: 0 ]) {
13501541      case  ' +'  :
1351-         param = [param  diff_stringByReplacingPercentEscapesForEncodeUriCompatibility ];
1542+         param = [self   diff_decodeURIWithText: param ];
13521543        if  (param == nil ) {
13531544          if  (error != NULL ) {
13541545            errorDetail = [NSDictionary  dictionaryWithObjectsAndKeys: 
0 commit comments