@@ -88,6 +88,35 @@ diff_match_patch.Diff.prototype.toString = function() {
8888  return  this [ 0 ]  +  ','  +  this [ 1 ] ; 
8989} ; 
9090
91+ diff_match_patch . prototype . isHighSurrogate  =  function ( c )  { 
92+   var  v  =  c . charCodeAt ( 0 ) ; 
93+   return  v  >=  0xD800  &&  v  <=  0xDBFF ; 
94+ } 
95+ 
96+ diff_match_patch . prototype . isLowSurrogate  =  function ( c )  { 
97+   var  v  =  c . charCodeAt ( 0 ) ; 
98+   return  v  >=  0xDC00  &&  v  <=  0xDFFF ; 
99+ } 
100+ 
101+ diff_match_patch . prototype . scalarValues  =  function ( str )  { 
102+   var  length  =  str . length ; 
103+   var  scalars  =  [ ] ; 
104+ 
105+   for  ( var  i  =  0 ;  i  <  length ;  i ++ )  { 
106+     var  scalar  =  str [ i ] ; 
107+ 
108+     // proper surrogate pairs will come through as the whole scalar value 
109+     // but if the pairs are broken they will be passed-through unaltered 
110+     if  ( i  <  length  -  1  &&  this . isHighSurrogate ( scalar )  &&  this . isLowSurrogate ( str [ i + 1 ] ) )  { 
111+       scalar  +=  str [ i + 1 ] ; 
112+       i ++ ; 
113+     } 
114+ 
115+     scalars . push ( scalar ) ; 
116+   } 
117+ 
118+   return  scalars ; 
119+ } 
91120
92121/** 
93122 * Find the differences between two texts.  Simplifies the problem by stripping 
@@ -134,12 +163,18 @@ diff_match_patch.prototype.diff_main = function(text1, text2, opt_checklines,
134163
135164  // Trim off common prefix (speedup). 
136165  var  commonlength  =  this . diff_commonPrefix ( text1 ,  text2 ) ; 
166+   if  ( commonlength  >  0  &&  this . isHighSurrogate ( text1 [ commonlength  -  1 ] ) )  { 
167+     commonlength -- ; 
168+   } 
137169  var  commonprefix  =  text1 . substring ( 0 ,  commonlength ) ; 
138170  text1  =  text1 . substring ( commonlength ) ; 
139171  text2  =  text2 . substring ( commonlength ) ; 
140172
141173  // Trim off common suffix (speedup). 
142174  commonlength  =  this . diff_commonSuffix ( text1 ,  text2 ) ; 
175+   if  ( commonlength  >  0  &&  this . isLowSurrogate ( text1 [ text1 . length  -  commonlength ] ) )  { 
176+     commonlength -- ; 
177+   } 
143178  var  commonsuffix  =  text1 . substring ( text1 . length  -  commonlength ) ; 
144179  text1  =  text1 . substring ( 0 ,  text1 . length  -  commonlength ) ; 
145180  text2  =  text2 . substring ( 0 ,  text2 . length  -  commonlength ) ; 
@@ -187,13 +222,23 @@ diff_match_patch.prototype.diff_compute_ = function(text1, text2, checklines,
187222
188223  var  longtext  =  text1 . length  >  text2 . length  ? text1  : text2 ; 
189224  var  shorttext  =  text1 . length  >  text2 . length  ? text2  : text1 ; 
225+   var  shortlength  =  shorttext . length ; 
190226  var  i  =  longtext . indexOf ( shorttext ) ; 
191227  if  ( i  !=  - 1 )  { 
228+     // skip leading unpaired surrogate 
229+     if  ( this . isLowSurrogate ( longtext [ i ] ) )  { 
230+       shortlength -- ; 
231+       i ++ ; 
232+     } 
233+     // skip trailing unpaired surrogate 
234+     if  ( this . isHighSurrogate ( longtext [ i  +  shortlength ] ) )  { 
235+       shortlength -- ; 
236+     } 
192237    // Shorter text is inside the longer text (speedup). 
193238    diffs  =  [ new  diff_match_patch . Diff ( DIFF_INSERT ,  longtext . substring ( 0 ,  i ) ) , 
194239             new  diff_match_patch . Diff ( DIFF_EQUAL ,  shorttext ) , 
195240             new  diff_match_patch . Diff ( DIFF_INSERT , 
196-                  longtext . substring ( i  +  shorttext . length ) ) ] ; 
241+                  longtext . substring ( i  +  shortlength ) ) ] ; 
197242    // Swap insertions for deletions if diff is reversed. 
198243    if  ( text1 . length  >  text2 . length )  { 
199244      diffs [ 0 ] [ 0 ]  =  diffs [ 2 ] [ 0 ]  =  DIFF_DELETE ; 
@@ -439,6 +484,15 @@ diff_match_patch.prototype.diff_bisect_ = function(text1, text2, deadline) {
439484 */ 
440485diff_match_patch . prototype . diff_bisectSplit_  =  function ( text1 ,  text2 ,  x ,  y , 
441486    deadline )  { 
487+   // backup if we split a surrogate 
488+   if  ( 
489+       x  >  0  &&  x  <  text1 . length  &&  this . isLowSurrogate ( text1 [ x ] )  && 
490+       y  >  0  &&  y  <  text2 . length  &&  this . isLowSurrogate ( text2 [ y ] ) 
491+   )  { 
492+     x -- ; 
493+     y -- ; 
494+   } 
495+ 
442496  var  text1a  =  text1 . substring ( 0 ,  x ) ; 
443497  var  text2a  =  text2 . substring ( 0 ,  y ) ; 
444498  var  text1b  =  text1 . substring ( x ) ; 
@@ -569,6 +623,12 @@ diff_match_patch.prototype.diff_commonPrefix = function(text1, text2) {
569623    } 
570624    pointermid  =  Math . floor ( ( pointermax  -  pointermin )  /  2  +  pointermin ) ; 
571625  } 
626+ 
627+   // shorten the prefix if it splits a surrogate 
628+   if  ( pointermid  >  0  &&  this . isHighSurrogate ( text1 [ pointermid - 1 ] ) )  { 
629+     pointermid -- ; 
630+   } 
631+ 
572632  return  pointermid ; 
573633} ; 
574634
@@ -601,6 +661,12 @@ diff_match_patch.prototype.diff_commonSuffix = function(text1, text2) {
601661    } 
602662    pointermid  =  Math . floor ( ( pointermax  -  pointermin )  /  2  +  pointermin ) ; 
603663  } 
664+ 
665+   // shorten the suffix if it splits a surrogate 
666+   if  ( pointermid  <  length  -  1  &&  this . isLowSurrogate ( text1 [ pointermid ] ) )  { 
667+     pointermid ++ ; 
668+   } 
669+ 
604670  return  pointermid ; 
605671} ; 
606672
@@ -749,6 +815,24 @@ diff_match_patch.prototype.diff_halfMatch_ = function(text1, text2) {
749815    text1_b  =  hm [ 3 ] ; 
750816  } 
751817  var  mid_common  =  hm [ 4 ] ; 
818+ 
819+   // move forward to prevent splitting a surrogate pair 
820+   if  ( mid_common . length  >  0  &&  this . isLowSurrogate ( mid_common [ 0 ] ) )  { 
821+     text1_a  =  text1_a  +  mid_common [ 0 ] ; 
822+     text2_a  =  text2_a  +  mid_common [ 0 ] ; 
823+     mid_common  =  mid_common . substring ( 1 ) ; 
824+   } 
825+ 
826+   // back up to prevent splitting a surrogate pair 
827+   if  ( 
828+     text1_b . length  >  0  &&  this . isLowSurrogate ( text1_b [ 0 ] )  && 
829+     text2_b . length  >  0  &&  this . isLowSurrogate ( text2_b [ 0 ] ) 
830+   )  { 
831+     text1_b  =  mid_common [ mid_common . length  -  1 ]  +  text1_b ; 
832+     text2_b  =  mid_common [ mid_common . length  -  1 ]  +  text2_b ; 
833+     mid_common  =  mid_common . substring ( 0 ,  - 1 ) ; 
834+   } 
835+ 
752836  return  [ text1_a ,  text1_b ,  text2_a ,  text2_b ,  mid_common ] ; 
753837} ; 
754838
0 commit comments