@@ -47,15 +47,45 @@ class BucketizerSuite extends FunSuite with MLlibTestSparkContext {
4747 }
4848 }
4949
50- test(" Binary search for finding buckets " ) {
51- val data = Array .fill[ Double ] (100 )(Random .nextDouble())
52- val splits = Array .fill[ Double ] (10 )(Random .nextDouble()).sorted
50+ test(" Binary search correctness in contrast with linear search " ) {
51+ val data = Array .fill(100 )(Random .nextDouble())
52+ val splits = Array .fill(10 )(Random .nextDouble()).sorted
5353 val wrappedSplits = Array (Double .MinValue ) ++ splits ++ Array (Double .MaxValue )
5454 val bsResult = Vectors .dense(
5555 data.map(x => Bucketizer .binarySearchForBuckets(wrappedSplits, x, true , true )))
5656 val lsResult = Vectors .dense(data.map(x => BucketizerSuite .linearSearchForBuckets(splits, x)))
5757 assert(bsResult ~== lsResult absTol 1e-5 )
5858 }
59+
60+ test(" Binary search of features at splits" ) {
61+ val splits = Array .fill(10 )(Random .nextDouble()).sorted
62+ val data = splits
63+ val expected = Vectors .dense(1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 9.0 , 10.0 )
64+ val wrappedSplits = Array (Double .MinValue ) ++ splits ++ Array (Double .MaxValue )
65+ val result = Vectors .dense(
66+ data.map(x => Bucketizer .binarySearchForBuckets(wrappedSplits, x, true , true )))
67+ assert(result ~== expected absTol 1e-5 )
68+ }
69+
70+ test(" Binary search of features between splits" ) {
71+ val data = Array .fill(10 )(Random .nextDouble())
72+ val splits = Array (- 0.1 , 1.1 )
73+ val expected = Vectors .dense(Array .fill(10 )(1.0 ))
74+ val wrappedSplits = Array (Double .MinValue ) ++ splits ++ Array (Double .MaxValue )
75+ val result = Vectors .dense(
76+ data.map(x => Bucketizer .binarySearchForBuckets(wrappedSplits, x, true , true )))
77+ assert(result ~== expected absTol 1e-5 )
78+ }
79+
80+ test(" Binary search of features outside splits" ) {
81+ val data = Array .fill(5 )(Random .nextDouble() + 1.1 ) ++ Array .fill(5 )(Random .nextDouble() - 1.1 )
82+ val splits = Array (0.0 , 1.1 )
83+ val expected = Vectors .dense(Array .fill(5 )(2.0 ) ++ Array .fill(5 )(0.0 ))
84+ val wrappedSplits = Array (Double .MinValue ) ++ splits ++ Array (Double .MaxValue )
85+ val result = Vectors .dense(
86+ data.map(x => Bucketizer .binarySearchForBuckets(wrappedSplits, x, true , true )))
87+ assert(result ~== expected absTol 1e-5 )
88+ }
5989}
6090
6191private object BucketizerSuite {
0 commit comments