@@ -47,15 +47,45 @@ class BucketizerSuite extends FunSuite with MLlibTestSparkContext {
4747    }
4848  }
4949
50-   test(" Binary search for finding buckets " 
51-     val  data  =  Array .fill[ Double ] (100 )(Random .nextDouble())
52-     val  splits  =  Array .fill[ Double ] (10 )(Random .nextDouble()).sorted
50+   test(" Binary search correctness in contrast with linear search " 
51+     val  data  =  Array .fill(100 )(Random .nextDouble())
52+     val  splits  =  Array .fill(10 )(Random .nextDouble()).sorted
5353    val  wrappedSplits  =  Array (Double .MinValue ) ++  splits ++  Array (Double .MaxValue )
5454    val  bsResult  =  Vectors .dense(
5555      data.map(x =>  Bucketizer .binarySearchForBuckets(wrappedSplits, x, true , true )))
5656    val  lsResult  =  Vectors .dense(data.map(x =>  BucketizerSuite .linearSearchForBuckets(splits, x)))
5757    assert(bsResult ~==  lsResult absTol 1e-5 )
5858  }
59+ 
60+   test(" Binary search of features at splits" 
61+     val  splits  =  Array .fill(10 )(Random .nextDouble()).sorted
62+     val  data  =  splits
63+     val  expected  =  Vectors .dense(1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 9.0 , 10.0 )
64+     val  wrappedSplits  =  Array (Double .MinValue ) ++  splits ++  Array (Double .MaxValue )
65+     val  result  =  Vectors .dense(
66+       data.map(x =>  Bucketizer .binarySearchForBuckets(wrappedSplits, x, true , true )))
67+     assert(result ~==  expected absTol 1e-5 )
68+   }
69+ 
70+   test(" Binary search of features between splits" 
71+     val  data  =  Array .fill(10 )(Random .nextDouble())
72+     val  splits  =  Array (- 0.1 , 1.1 )
73+     val  expected  =  Vectors .dense(Array .fill(10 )(1.0 ))
74+     val  wrappedSplits  =  Array (Double .MinValue ) ++  splits ++  Array (Double .MaxValue )
75+     val  result  =  Vectors .dense(
76+       data.map(x =>  Bucketizer .binarySearchForBuckets(wrappedSplits, x, true , true )))
77+     assert(result ~==  expected absTol 1e-5 )
78+   }
79+ 
80+   test(" Binary search of features outside splits" 
81+     val  data  =  Array .fill(5 )(Random .nextDouble() +  1.1 ) ++  Array .fill(5 )(Random .nextDouble() -  1.1 )
82+     val  splits  =  Array (0.0 , 1.1 )
83+     val  expected  =  Vectors .dense(Array .fill(5 )(2.0 ) ++  Array .fill(5 )(0.0 ))
84+     val  wrappedSplits  =  Array (Double .MinValue ) ++  splits ++  Array (Double .MaxValue )
85+     val  result  =  Vectors .dense(
86+       data.map(x =>  Bucketizer .binarySearchForBuckets(wrappedSplits, x, true , true )))
87+     assert(result ~==  expected absTol 1e-5 )
88+   }
5989}
6090
6191private  object  BucketizerSuite  {
0 commit comments